Python callの例、attelo.harness.util.call Pythonの例

コード例 #1

0

ファイルを表示

ファイル: gather.py プロジェクト: eipiplusun/irit-stac

def main(args):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    if args.skip_training:
        tdir = latest_tmp()
    else:
        tdir = current_tmp()
        extract_features(TRAINING_CORPUS, tdir, strip_mode=args.strip_mode)

    if TEST_CORPUS is not None:
        vocab_path = fp.join(tdir,
                             (fp.basename(TRAINING_CORPUS) +
                              '.relations.sparse.vocab'))
        extract_features(TEST_CORPUS, tdir,
                         vocab_path=vocab_path,
                         strip_mode=args.strip_mode)

    with open(os.path.join(tdir, "versions-gather.txt"), "w") as stream:
        call(["pip", "freeze"], stdout=stream)

    if not args.skip_training:
        latest_dir = latest_tmp()
        force_symlink(fp.basename(tdir), latest_dir)

コード例 #2

0

ファイルを表示

ファイル: gather.py プロジェクト: popescuv/irit-stac

def extract_features(corpus, output_dir, vocab_path=None, strip_mode=None):
    """Extract features for a corpus, dump the instances.

    Run feature extraction for a particular corpus; and store the
    results in the output directory. Output file name will be
    computed from the corpus file name.

    This triggers two distinct processes, for pairs of EDUs then for
    single EDUs.

    Parameters
    ----------
    corpus: filepath
        Selected corpus
    output_dir: filepath
        Folder where instances will be dumped
    vocab_path: filepath
        Vocabulary to load for feature extraction (needed if extracting
        test data; must ensure we have the same vocab in test as we'd
        have in training)
    strip_mode: one of {'head', 'broadcast', 'custom'}
        Method to strip CDUs
    """
    # TODO: perhaps we could just directly invoke the appropriate
    # educe module here instead of going through the command line?
    cmd = [
        "stac-learning", "extract", corpus, LEX_DIR, output_dir, "--anno",
        ANNOTATORS
    ]
    if vocab_path is not None:
        cmd.extend(['--vocabulary', vocab_path])
    if strip_mode is not None:
        cmd.extend(['--strip-mode', strip_mode])
    call(cmd)
    call(cmd + ["--single"])

コード例 #3

0

ファイルを表示

ファイル: parse.py プロジェクト: eipiplusun/irit-stac

def _graph(lconf, log):
    "Visualise the parses"

    corpus_dir = minicorpus_path(lconf, result=True)
    cmd = ["stac-util", "graph", corpus_dir,
           "--output", corpus_dir]
    call(cmd, stderr=log)

コード例 #4

0

ファイルを表示

def main(args):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    if args.skip_training:
        tdir = latest_tmp()
    else:
        tdir = current_tmp()
        extract_features(TRAINING_CORPUS, tdir, args.coarse,
                         args.fix_pseudo_rels)
    if TEST_CORPUS is not None:
        train_path = fp.join(tdir, fp.basename(TRAINING_CORPUS))
        label_path = train_path + '.relations.sparse'
        vocab_path = label_path + '.vocab'
        extract_features(TEST_CORPUS, tdir, args.coarse,
                         args.fix_pseudo_rels,
                         vocab_path=vocab_path,
                         label_path=label_path)
    with open(os.path.join(tdir, "versions-gather.txt"), "w") as stream:
        call(["pip", "freeze"], stdout=stream)
    if not args.skip_training:
        latest_dir = latest_tmp()
        force_symlink(fp.basename(tdir), latest_dir)

コード例 #5

0

ファイルを表示

ファイル: gather.py プロジェクト: kowey/irit-rst-dt

def extract_features(corpus, output_dir,
                     vocab_path=None,
                     label_path=None):
    """
    Run feature extraction for a particular corpus; and store the
    results in the output directory. Output file name will be
    computed from the corpus file name

    :type: corpus: filepath

    :param: vocab_path: vocabulary to load for feature extraction
    (needed if extracting test data; must ensure we have the same
    vocab in test as we'd have in training)
    """
    # TODO: perhaps we could just directly invoke the appropriate
    # educe module here instead of going through the command line?
    cmd = ["rst-dt-learning", "extract",
           corpus,
           PTB_DIR,
           output_dir,
           '--feature_set', FEATURE_SET]
    if vocab_path is not None:
        cmd.extend(['--vocabulary', vocab_path])
    if label_path is not None:
        cmd.extend(['--labels', label_path])
    call(cmd)

コード例 #6

0

ファイルを表示

ファイル: parse.py プロジェクト: popescuv/irit-stac

def _graph(lconf, log):
    "Visualise the parses"

    corpus_dir = minicorpus_path(lconf, result=True)
    cmd = ["stac-util", "graph", corpus_dir,
           "--output", corpus_dir]
    call(cmd, stderr=log)

コード例 #7

0

ファイルを表示

def extract_features(corpus, output_dir, coarse, fix_pseudo_rels,
                     vocab_path=None,
                     label_path=None):
    """Extract instances from a corpus, store them in files.

    Run feature extraction for a particular corpus and store the
    results in the output directory. Output file name will be
    computed from the corpus file name.

    Parameters
    ----------
    corpus: filepath
        Path to the corpus.
    output_dir: filepath
        Path to the output folder.
    coarse: boolean, False by default
        Use coarse-grained relation labels.
    fix_pseudo_rels: boolean, False by default
        Rewrite pseudo-relations to improve consistency (WIP).
    vocab_path: filepath
        Path to a fixed vocabulary mapping, for feature extraction
        (needed if extracting test data: the same vocabulary should be
        used in train and test).
    label_path: filepath
        Path to a list of labels.
    """
    # TODO: perhaps we could just directly invoke the appropriate
    # educe module here instead of going through the command line?
    cmd = [
        "rst-dt-learning", "extract",
        corpus,
        PTB_DIR,  # TODO make this optional and exclusive from CoreNLP
        output_dir,
        '--feature_set', FEATURE_SET,
    ]
    # NEW 2016-05-19 rewrite pseudo-relations
    if fix_pseudo_rels:
        cmd.extend([
            '--fix_pseudo_rels'
        ])
    # NEW 2016-05-03 use coarse- or fine-grained relation labels
    # NB "coarse" was the previous default
    if coarse:
        cmd.extend([
            '--coarse'
        ])
    if CORENLP_OUT_DIR is not None:
        cmd.extend([
            '--corenlp_out_dir', CORENLP_OUT_DIR,
        ])
    if LECSIE_DATA_DIR is not None:
        cmd.extend([
            '--lecsie_data_dir', LECSIE_DATA_DIR,
        ])
    if vocab_path is not None:
        cmd.extend(['--vocabulary', vocab_path])
    if label_path is not None:
        cmd.extend(['--labels', label_path])
    call(cmd)

コード例 #8

0

ファイルを表示

ファイル: features.py プロジェクト: padenis/irit-rst-dt

def main(_):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    call(["rst-dt-learning", "features"])

コード例 #9

0

ファイルを表示

def _feature_extraction(lconf, log):
    """
    Extract features from our input glozz file
    """
    corpus_dir = minicorpus_path(lconf)
    vocab_path = lconf.mpack_paths(test_data=False)[3]
    cmd = [
        "stac-learning", "extract", "--parsing", "--vocab", vocab_path,
        corpus_dir,
        lconf.abspath(LEX_DIR), lconf.tmp_dir
    ]
    call(cmd, stderr=log)

コード例 #10

0

ファイルを表示

ファイル: parse.py プロジェクト: eipiplusun/irit-stac

def _resource_extraction(lconf, log):
    """
    Using a previously predicted dialogue act model,
    guess dialogue acts for all the EDUs
    """
    corpus_dir = minicorpus_path(lconf)
    cmd = ["stac-learning", "resource-nps",
           corpus_dir,
           lconf.abspath(LEX_DIR),
           "--output",
           resource_np_path(lconf)]
    call(cmd, stderr=log)

コード例 #11

0

ファイルを表示

def _resource_extraction(lconf, log):
    """
    Using a previously predicted dialogue act model,
    guess dialogue acts for all the EDUs
    """
    corpus_dir = minicorpus_path(lconf)
    cmd = [
        "stac-learning", "resource-nps", corpus_dir,
        lconf.abspath(LEX_DIR), "--output",
        resource_np_path(lconf)
    ]
    call(cmd, stderr=log)

コード例 #12

0

ファイルを表示

ファイル: parse.py プロジェクト: eipiplusun/irit-stac

def _feature_extraction(lconf, log):
    """
    Extract features from our input glozz file
    """
    corpus_dir = minicorpus_path(lconf)
    vocab_path = lconf.mpack_paths(test_data=False)[3]
    cmd = ["stac-learning", "extract",
           "--parsing",
           "--vocab", vocab_path,
           corpus_dir,
           lconf.abspath(LEX_DIR),
           lconf.tmp_dir]
    call(cmd, stderr=log)

コード例 #13

0

ファイルを表示

ファイル: gather.py プロジェクト: padenis/irit-rst-dt

def main(_):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    tdir = current_tmp()
    call(["rst-dt-learning", "extract", TRAINING_CORPUS, PTB_DIR, tdir,
          '--feature_set', FEATURE_SET])
    with open(os.path.join(tdir, "versions-gather.txt"), "w") as stream:
        call(["pip", "freeze"], stdout=stream)
    latest_dir = latest_tmp()
    force_symlink(os.path.basename(tdir), latest_dir)

コード例 #14

0

ファイルを表示

def _create_snapshot_dir(data_dir):
    """
    Instantiate a snapshot dir and return its path
    """

    bname = fp.basename(os.readlink(data_dir))
    snap_dir = fp.join(SNAPSHOTS, bname)
    if not fp.exists(snap_dir):
        os.makedirs(snap_dir)
        link_files(data_dir, snap_dir)
        force_symlink(bname, latest_snap())
    with open(fp.join(snap_dir, "versions-model.txt"), "w") as stream:
        call(["pip", "freeze"], stdout=stream)
    return snap_dir

コード例 #15

0

ファイルを表示

ファイル: model.py プロジェクト: moreymat/irit-stac

def _create_snapshot_dir(data_dir):
    """
    Instantiate a snapshot dir and return its path
    """

    bname = fp.basename(os.readlink(data_dir))
    snap_dir = fp.join(SNAPSHOTS, bname)
    if not fp.exists(snap_dir):
        os.makedirs(snap_dir)
        link_files(data_dir, snap_dir)
        force_symlink(bname, latest_snap())
    with open(fp.join(snap_dir, "versions-model.txt"), "w") as stream:
        call(["pip", "freeze"], stdout=stream)
    return snap_dir

コード例 #16

0

ファイルを表示

ファイル: evaluate.py プロジェクト: phimit/irit-rst-dt

def _mk_report(parent_dir, lconf, idx_file):
    "Generate reports for scores"
    score_prefix = _score_file_path_prefix(parent_dir, lconf)
    json_file = score_prefix + ".json"
    pretty_file = score_prefix + ".txt"

    with open(pretty_file, "w") as pretty_stream:
        call(["attelo", "report",
              idx_file,
              "--json", json_file],
             stdout=pretty_stream)

    print("Scores summarised in %s" % pretty_file,
          file=sys.stderr)

コード例 #17

0

ファイルを表示

ファイル: count.py プロジェクト: popescuv/irit-stac

def main(args):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    corpora = [TRAINING_CORPUS]
    odir = get_output_dir(args)
    for corpus in corpora:
        ofilename = fp.join(odir, fp.basename(corpus) + ".txt")
        with open(ofilename, 'w') as ofile:
            call(["stac-util", "count", corpus, "--annotator", ANNOTATORS],
                 stdout=ofile)
    announce_output_dir(odir)

コード例 #18

0

ファイルを表示

ファイル: count.py プロジェクト: eipiplusun/irit-stac

def main(args):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    corpora = [TRAINING_CORPUS]
    odir = get_output_dir(args)
    for corpus in corpora:
        ofilename = fp.join(odir, fp.basename(corpus) + ".txt")
        with open(ofilename, 'w') as ofile:
            call(["stac-util", "count", corpus,
                  "--annotator", ANNOTATORS],
                 stdout=ofile)
    announce_output_dir(odir)

コード例 #19

0

ファイルを表示

ファイル: evaluate.py プロジェクト: kowey/attelo

def prepare_dirs(runcfg, data_dir):
    """
    Return eval and scatch directory paths
    """
    eval_prefix = fp.join(data_dir, "eval")
    scratch_prefix = fp.join(data_dir, "scratch")

    eval_current = eval_prefix + '-current'
    scratch_current = scratch_prefix + '-current'
    stage = runcfg.stage

    if (runcfg.mode == 'resume' or stage in [ClusterStage.main,
                                             ClusterStage.combined_models,
                                             ClusterStage.end]):
        if not fp.exists(eval_current) or not fp.exists(scratch_current):
            sys.exit("No currently running evaluation to resume!")
        else:
            eval_dir = fp.realpath(eval_current)
            scratch_dir = fp.realpath(scratch_current)
            # in case there are any new data files to link
            _link_data_files(data_dir, eval_dir)
            return eval_dir, scratch_dir
    else:
        eval_actual_old = fp.realpath(eval_current)
        scratch_actual_old = fp.realpath(scratch_current)
        tstamp = timestamp()
        if _create_tstamped_dir(eval_prefix, tstamp):
            eval_dir = eval_prefix + '-' + tstamp
            scratch_dir = scratch_prefix + '-' + tstamp
            _create_tstamped_dir(scratch_prefix, tstamp)
            _link_data_files(data_dir, eval_dir)
            if runcfg.stage == 'jumpstart':
                _link_fold_files(eval_actual_old, eval_dir)
                _link_model_files(scratch_actual_old, scratch_dir)
        else:
            sys.exit("Try again in one minute")

        with open(fp.join(eval_dir, "versions-evaluate.txt"), "w") as stream:
            call(["pip", "freeze"], stdout=stream)

        return eval_dir, scratch_dir

コード例 #20

0

ファイルを表示

def prepare_dirs(runcfg, data_dir):
    """
    Return eval and scatch directory paths
    """
    eval_prefix = fp.join(data_dir, "eval")
    scratch_prefix = fp.join(data_dir, "scratch")

    eval_current = eval_prefix + '-current'
    scratch_current = scratch_prefix + '-current'
    stage = runcfg.stage

    if (runcfg.mode == 'resume' or stage in [
            ClusterStage.main, ClusterStage.combined_models, ClusterStage.end
    ]):
        if not fp.exists(eval_current) or not fp.exists(scratch_current):
            sys.exit("No currently running evaluation to resume!")
        else:
            eval_dir = fp.realpath(eval_current)
            scratch_dir = fp.realpath(scratch_current)
            # in case there are any new data files to link
            _link_data_files(data_dir, eval_dir)
            return eval_dir, scratch_dir
    else:
        eval_actual_old = fp.realpath(eval_current)
        scratch_actual_old = fp.realpath(scratch_current)
        tstamp = timestamp()
        if _create_tstamped_dir(eval_prefix, tstamp):
            eval_dir = eval_prefix + '-' + tstamp
            scratch_dir = scratch_prefix + '-' + tstamp
            _create_tstamped_dir(scratch_prefix, tstamp)
            _link_data_files(data_dir, eval_dir)
            if runcfg.stage == 'jumpstart':
                _link_fold_files(eval_actual_old, eval_dir)
                _link_model_files(scratch_actual_old, scratch_dir)
        else:
            sys.exit("Try again in one minute")

        with open(fp.join(eval_dir, "versions-evaluate.txt"), "w") as stream:
            call(["pip", "freeze"], stdout=stream)

        return eval_dir, scratch_dir

コード例 #21

0

ファイルを表示

ファイル: evaluate.py プロジェクト: padenis/irit-rst-dt

def _create_eval_dirs(args, data_dir, jumpstart):
    """
    Return eval and scatch directory paths
    """

    eval_current = fp.join(data_dir, "eval-current")
    scratch_current = fp.join(data_dir, "scratch-current")
    stage = args_to_stage(args)

    if args.resume or stage in [ClusterStage.main,
                                ClusterStage.combined_models,
                                ClusterStage.end]:
        if not fp.exists(eval_current) or not fp.exists(scratch_current):
            sys.exit("No currently running evaluation to resume!")
        else:
            return eval_current, scratch_current
    else:
        tstamp = "TEST" if _DEBUG else timestamp()
        eval_dir = fp.join(data_dir, "eval-" + tstamp)
        if not fp.exists(eval_dir):
            os.makedirs(eval_dir)
            _link_data_files(data_dir, eval_dir)
            force_symlink(fp.basename(eval_dir), eval_current)
        elif not _DEBUG:
            sys.exit("Try again in one minute")

        scratch_dir = fp.join(data_dir, "scratch-" + tstamp)
        if not fp.exists(scratch_dir):
            os.makedirs(scratch_dir)
            if jumpstart:
                _link_model_files(scratch_current, scratch_dir)
            force_symlink(fp.basename(scratch_dir), scratch_current)

        with open(fp.join(eval_dir, "versions-evaluate.txt"), "w") as stream:
            call(["pip", "freeze"], stdout=stream)

        return eval_dir, scratch_dir

コード例 #22

0

ファイルを表示

ファイル: gather.py プロジェクト: eipiplusun/irit-stac

def extract_features(corpus, output_dir,
                     vocab_path=None, strip_mode=None):
    """Extract features for a corpus, dump the instances.

    Run feature extraction for a particular corpus; and store the
    results in the output directory. Output file name will be
    computed from the corpus file name.

    This triggers two distinct processes, for pairs of EDUs then for
    single EDUs.

    Parameters
    ----------
    corpus: filepath
        Selected corpus
    output_dir: filepath
        Folder where instances will be dumped
    vocab_path: filepath
        Vocabulary to load for feature extraction (needed if extracting
        test data; must ensure we have the same vocab in test as we'd
        have in training)
    strip_mode: one of {'head', 'broadcast', 'custom'}
        Method to strip CDUs
    """
    # TODO: perhaps we could just directly invoke the appropriate
    # educe module here instead of going through the command line?
    cmd = ["stac-learning", "extract",
           corpus,
           LEX_DIR,
           output_dir,
           "--anno", ANNOTATORS]
    if vocab_path is not None:
        cmd.extend(['--vocabulary', vocab_path])
    if strip_mode is not None:
        cmd.extend(['--strip-mode', strip_mode])
    call(cmd)
    call(cmd + ["--single"])

コード例 #23

0

ファイルを表示

ファイル: evaluate.py プロジェクト: phimit/irit-rst-dt

def main(args):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    data_dir = latest_tmp()
    if not os.path.exists(data_dir):
        _exit_ungathered()
    eval_dir, scratch_dir = _create_eval_dirs(args, data_dir)

    with open(os.path.join(eval_dir, "versions.txt"), "w") as stream:
        call(["pip", "freeze"], stdout=stream)

    for corpus in TRAINING_CORPORA:
        dataset = os.path.basename(corpus)
        fold_file = os.path.join(eval_dir,
                                 "folds-%s.json" % dataset)
        lconf = LoopConfig(eval_dir=eval_dir,
                           scratch_dir=scratch_dir,
                           fold_file=fold_file,
                           dataset=dataset)
        _do_corpus(lconf)

コード例 #24

0

ファイルを表示

ファイル: pipeline.py プロジェクト: eipiplusun/irit-stac

 def pyt(self, script, *args, **kwargs):
     "call python on one of our scripts"
     abs_script = self.abspath(script)
     cmd = ["python", abs_script] + list(args)
     call(cmd, **kwargs)

コード例 #25

0

ファイルを表示

ファイル: pipeline.py プロジェクト: popescuv/irit-stac

 def pyt(self, script, *args, **kwargs):
     "call python on one of our scripts"
     abs_script = self.abspath(script)
     cmd = ["python", abs_script] + list(args)
     call(cmd, **kwargs)