コード例 #1
0
ファイル: reproduction.py プロジェクト: rgeorgi/intent
def filter(ef: ExperimentFiles, overwrite = False):
    """
    Filter all of the desired languages for the experiment.
    """
    filtration_performed = False

    # Iterate over all the languages.
    for lang in ef.langs:
        orig_path = ef.orig(lang)
        filt_path = ef.filtered(lang)

        # Don't overwrite already existing files unless
        # we've been asked to overwrite.
        if not os.path.exists(filt_path) or overwrite:

            if not filtration_performed:
                REPRO_LOG.log(NORM_LEVEL, "Filtering ODIN data.")

            if not USE_CONDOR:
                filter_corpus([orig_path], filt_path,
                              require_aln=True,
                              require_lang=True,
                              require_gloss=True,
                              require_trans=True)
            else:
                REPRO_LOG.log(NORM_LEVEL, "Filtering {}...".format(lang))
                args = ['filter',
                        '--require-aln',
                        '--require-lang',
                        '--require-gloss',
                        '--require-trans',
                        orig_path, filt_path]
                condorify(args, ef._filter_dir(condor=True), ef.filtered(lang, True))

            filtration_performed = True

    # If we're using condor, wait until all the
    # tasks for this step have completed.
    if USE_CONDOR and filtration_performed:
        if condor_email:
            condor.condor_wait_notify('Filtering of languages performed.', condor_email, "Filtration Done")
        else:
            condor.condor_wait()

    if filtration_performed:
        REPRO_LOG.log(NORM_LEVEL, "Filtration complete.")
コード例 #2
0
ファイル: run_dependencies.py プロジェクト: rgeorgi/intent
# 1) Filter the data
# -------------------------------------------
filtration_done = False
for lang in ef.langs:
    orig_f = ef.get_original_file(lang)
    filtered_f = ef.get_filtered_file(lang)

    if not os.path.exists(filtered_f):
        filtration_done = True
        if USE_CONDOR:
            model_prefix, name = ef.get_condor_filter(lang)
            run_cmd([p3path, intent_script, 'filter',
                     '--require-aln', '--require-gloss', '--require-trans', '--require-lang',
                     orig_f, filtered_f], model_prefix, name, False)
        else:
            filter_corpus([orig_f], filtered_f, require_lang=True, require_gloss=True, require_trans=True, require_aln=True)

if USE_CONDOR and filtration_done:
    condor_wait_notify("Data has been filtered.", email_address, "CONDOR: Filtration complete.")


# -------------------------------------------
# 2) Enriched data
# -------------------------------------------
enrichment_done = False
for lang in ef.langs:
    filtered_f = ef.get_filtered_file(lang)
    enriched_f = ef.get_enriched_file(lang)

    if not os.path.exists(enriched_f):
        enrichment_done = True
コード例 #3
0
ファイル: intent.py プロジェクト: rgeorgi/intent
# ENRICH
if args.subcommand == CMD_ENRICH:
    enrich(**vars(args))

# STATS
elif args.subcommand == CMD_STATS:
    igt_stats(flatten_list(args.FILE), type='xigt', show_filename=True)

# SPLIT
elif args.subcommand == CMD_SPLIT:
    split_corpus(flatten_list(args.FILE), args.train, args.dev, args.test, prefix=args.prefix, overwrite=args.overwrite,
                 nfold=args.nfold)

# FILTER
elif args.subcommand == CMD_FILTER:
    filter_corpus(flatten_list(getattr(args, ARG_INFILE)), getattr(args, ARG_OUTFILE), **vars(args))

# EXTRACT
elif args.subcommand == CMD_EXTRACT:
    extract_from_xigt(input_filelist=flatten_list(args.FILE), **vars(args))

# EVAL
elif args.subcommand == CMD_EVAL:
    evaluate_intent(flatten_list(args.FILE),
                    eval_alignment=args.alignment,
                    eval_ds=args.ds_projection,
                    eval_posproj=args.pos_projection,
                    classifier_path=args.classifier,
                    classifier_feats=args.classifier_feats,
                    eval_tagger=args.pos_tagger,
                    gold_tagmap=args.tagmap_gold, trans_tagmap=args.tagmap_trans, outpath=args.output)