Exemple #1
0
def pgroupby(df, groups, f, **kwargs):
    '''# mirror groupby order (group then agg)
    replace:
        results = df.groupby(['col1','col2']).apply(f)
    with:
        results = df.pgroupby(['col1','col2'], f)
    '''
    # split into names and groups
    names, df_split = zip(*[(n, g) for n, g in df.groupby(groups)])
    # pmap groups
    out = pmap(f, df_split, **kwargs)
    # reassemble and return
    groups = [groups] if isinstance(groups, str) else groups
    return pd.concat(
        [pd.concat({k: v}, names=groups) for k, v in zip(names, out)])
Exemple #2
0
            import traceback
            log.writeln("decoder raised exception: %s" %
                        "".join(traceback.format_exception(*sys.exc_info())))
            global decoder_errors
            decoder_errors += 1
            if decoder_errors >= 5:
                raise
            else:
                return
        bestv, best = decoder.get_nbest(goal, 1)[0]
        log.write("done decoding\n")

        # Collect hypotheses that will be used for learning
        sent.hyps = get_hyps(sent, goal, thedecoder.weights)
        log.write("done rescoring\n")

        return sent

    if opts.parallel:
        outsents = parallel.pmap(process, insents, tag=0, verbose=1)
    else:
        outsents = (process(sent) for sent in insents)

    if not opts.parallel or parallel.rank == parallel.master:
        bleu_comps = svector.Vector()
        for outsent in outsents:
            if outsent:
                for hyp in outsent.hyps:
                    visualization_output(output_file, outsent, hyp)
            output_file.flush()
Exemple #3
0
        (bestv, best) = outputs[0]

        if french_parse_file:
            french_parse_file.write("%s ||| %s\n" % (sent.id, goal.viterbi_deriv().french_tree()))
            french_parse_file.flush()
        if english_parse_file:
            english_parse_file.write("%s ||| %s\n" % (sent.id, goal.viterbi_deriv().english_tree()))
            english_parse_file.flush()

        if log.level >= 1:
            gc.collect()
            log.write("  done decoding, memory=%s\n" % monitor.memory())
            log.write("  features: %s; %s\n" % (bestv, thedecoder.weights.dot(bestv)))

        sent.ewords = best
        return sent

    if opts.parallel:
        outsents = parallel.pmap(process, insents, tag=0, verbose=1)
    else:
        outsents = (process(sent) for sent in insents)

    if not opts.parallel or parallel.rank == parallel.master:
        for outsent in outsents:
            if outsent is None:
                output_file.write("\n")
            else:
                output_file.write("%s\n" % " ".join(outsent.ewords))
            output_file.flush()
Exemple #4
0
            if not opts.parallel or parallel.rank == parallel.master:
                outweightfile.write("%s\n" % outweights)
                outweightfile.flush()

        if opts.parallel:
            outweights = MPI.COMM_WORLD.bcast(outweights, root=parallel.master)

        # Process heldout data

        if not opts.parallel or parallel.rank != parallel.master:
            saveweights = thedecoder.weights
            thedecoder.weights = outweights

        if opts.parallel:
            outsents = parallel.pmap(process_heldout,
                                     heldoutsents,
                                     tag=0,
                                     verbose=1)
        else:
            outsents = (process_heldout(sent) for sent in heldoutsents)

        if not opts.parallel or parallel.rank == parallel.master:
            heldout_score_comps = svector.Vector()
            for outsent in outsents:
                if outsent:
                    output_file.write("%s\n" % " ".join(outsent.ewords))
                    heldout_score_comps += outsent.score_comps
                else:
                    output_file.write("\n")  # dummy output for decoder failure
                output_file.flush()

        if not opts.parallel or parallel.rank == parallel.master:
Exemple #5
0
    if loop_forever:
        iterations = itertools.count()
    else:
        iterations = xrange(1)

    for iteration in iterations:
        log.writeln("epoch %d" % iteration)

        # Process training data

        if shuffle_sentences and (not opts.parallel or parallel.rank == parallel.master):
            random.shuffle(trainsents)

        if opts.parallel:
            outsents = parallel.pmap(lambda (si, sent): (si, process(sent)), trainsents, tag=0, verbose=1)
            if parallel.rank == parallel.master:
                outsents = list(outsents)
        else:
            outsents = [(si, process(sent)) for (si, sent) in trainsents]

        if not opts.parallel or parallel.rank == parallel.master:
            outsents.sort()
            train_score_comps = svector.Vector()
            for _, outsent in outsents:
                if outsent:
                    output_file.write("%s\n" % " ".join(outsent.ewords))
                    train_score_comps += outsent.score_comps
                else:
                    output_file.write("\n")  # dummy output for decoder failure