Ejemplo n.º 1
0
def _create_tstamped_dir(prefix, suffix):
    """
    Given a path prefix (eg. 'foo/bar') and a new suffix
    (eg. quux),

    If the desired path (eg. 'foo/bar-quux') already exists,
    return False.
    Otherwise:

    1. Create a directory at the desired path
    2. Rename any existing prefix-'current' link
       to prefix-'previous'
    3. Link prefix-suffix to prefix-'current'
    4. Return True
    """
    old = prefix + '-previous'
    new = prefix + '-current'
    actual_new = prefix + '-' + suffix
    if fp.exists(actual_new):
        return False
    else:
        os.makedirs(actual_new)
        if fp.exists(new):
            actual_old = fp.realpath(prefix + '-current')
            force_symlink(fp.basename(actual_old), old)
        force_symlink(fp.basename(actual_new), new)
        return True
Ejemplo n.º 2
0
def _create_tstamped_dir(prefix, suffix):
    """
    Given a path prefix (eg. 'foo/bar') and a new suffix
    (eg. quux),

    If the desired path (eg. 'foo/bar-quux') already exists,
    return False.
    Otherwise:

    1. Create a directory at the desired path
    2. Rename any existing prefix-'current' link
       to prefix-'previous'
    3. Link prefix-suffix to prefix-'current'
    4. Return True
    """
    old = prefix + '-previous'
    new = prefix + '-current'
    actual_new = prefix + '-' + suffix
    if fp.exists(actual_new):
        return False
    else:
        os.makedirs(actual_new)
        if fp.exists(new):
            actual_old = fp.realpath(prefix + '-current')
            force_symlink(fp.basename(actual_old), old)
        force_symlink(fp.basename(actual_new), new)
        return True
Ejemplo n.º 3
0
def _create_eval_dirs(args, data_dir):
    """
    Return eval and scatch directory paths
    """

    eval_current = fp.join(data_dir, "eval-current")
    scratch_current = fp.join(data_dir, "scratch-current")

    if args.resume:
        if not fp.exists(eval_current) or not fp.exists(scratch_current):
            sys.exit("No currently running evaluation to resume!")
        else:
            return eval_current, scratch_current
    else:
        tstamp = "TEST" if _DEBUG else timestamp()
        eval_dir = fp.join(data_dir, "eval-" + tstamp)
        if not fp.exists(eval_dir):
            os.makedirs(eval_dir)
            _link_data_files(data_dir, eval_dir)
            force_symlink(fp.basename(eval_dir), eval_current)
        elif not _DEBUG:
            sys.exit("Try again in literally one second")

        scratch_dir = fp.join(data_dir, "scratch-" + tstamp)
        if not fp.exists(scratch_dir):
            os.makedirs(scratch_dir)
            force_symlink(fp.basename(scratch_dir), scratch_current)

        return eval_dir, scratch_dir
Ejemplo n.º 4
0
def main(args):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    if args.skip_training:
        tdir = latest_tmp()
    else:
        tdir = current_tmp()
        extract_features(TRAINING_CORPUS, tdir, strip_mode=args.strip_mode)

    if TEST_CORPUS is not None:
        vocab_path = fp.join(tdir,
                             (fp.basename(TRAINING_CORPUS) +
                              '.relations.sparse.vocab'))
        extract_features(TEST_CORPUS, tdir,
                         vocab_path=vocab_path,
                         strip_mode=args.strip_mode)

    with open(os.path.join(tdir, "versions-gather.txt"), "w") as stream:
        call(["pip", "freeze"], stdout=stream)

    if not args.skip_training:
        latest_dir = latest_tmp()
        force_symlink(fp.basename(tdir), latest_dir)
Ejemplo n.º 5
0
def main(args):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    if args.skip_training:
        tdir = latest_tmp()
    else:
        tdir = current_tmp()
        extract_features(TRAINING_CORPUS, tdir, args.coarse,
                         args.fix_pseudo_rels)
    if TEST_CORPUS is not None:
        train_path = fp.join(tdir, fp.basename(TRAINING_CORPUS))
        label_path = train_path + '.relations.sparse'
        vocab_path = label_path + '.vocab'
        extract_features(TEST_CORPUS, tdir, args.coarse,
                         args.fix_pseudo_rels,
                         vocab_path=vocab_path,
                         label_path=label_path)
    with open(os.path.join(tdir, "versions-gather.txt"), "w") as stream:
        call(["pip", "freeze"], stdout=stream)
    if not args.skip_training:
        latest_dir = latest_tmp()
        force_symlink(fp.basename(tdir), latest_dir)
Ejemplo n.º 6
0
def _create_snapshot_dir(data_dir):
    """
    Instantiate a snapshot dir and return its path
    """

    bname = fp.basename(os.readlink(data_dir))
    snap_dir = fp.join(SNAPSHOTS, bname)
    if not fp.exists(snap_dir):
        os.makedirs(snap_dir)
        link_files(data_dir, snap_dir)
        force_symlink(bname, latest_snap())
    with open(fp.join(snap_dir, "versions-model.txt"), "w") as stream:
        call(["pip", "freeze"], stdout=stream)
    return snap_dir
Ejemplo n.º 7
0
def _create_snapshot_dir(data_dir):
    """
    Instantiate a snapshot dir and return its path
    """

    bname = fp.basename(os.readlink(data_dir))
    snap_dir = fp.join(SNAPSHOTS, bname)
    if not fp.exists(snap_dir):
        os.makedirs(snap_dir)
        link_files(data_dir, snap_dir)
        force_symlink(bname, latest_snap())
    with open(fp.join(snap_dir, "versions-model.txt"), "w") as stream:
        call(["pip", "freeze"], stdout=stream)
    return snap_dir
Ejemplo n.º 8
0
def main(_):
    """
    Subcommand main.

    You shouldn't need to call this yourself if you're using
    `config_argparser`
    """
    tdir = current_tmp()
    call(["rst-dt-learning", "extract", TRAINING_CORPUS, PTB_DIR, tdir,
          '--feature_set', FEATURE_SET])
    with open(os.path.join(tdir, "versions-gather.txt"), "w") as stream:
        call(["pip", "freeze"], stdout=stream)
    latest_dir = latest_tmp()
    force_symlink(os.path.basename(tdir), latest_dir)
Ejemplo n.º 9
0
def _format_decoder_output(lconf, log):
    """
    Convert decoder output to Glozz (for visualisation really)
    and copy it to resultcorpus
    """
    makedirs(minicorpus_doc_path(lconf, result=True))
    # unannotated
    force_symlink(unannotated_dir_path(lconf),
                  unannotated_dir_path(lconf, result=True))

    # parsed, postagged
    for section in ["parsed", "pos-tagged"]:
        force_symlink(minicorpus_stage_path(lconf, section),
                      minicorpus_stage_path(lconf, section,
                                            result=True))

    for econf in lconf.evaluations:
        # units/foo
        src_units_dir = minicorpus_stage_path(lconf, "units")
        tgt_units_dir = minicorpus_stage_path(lconf, "units",
                                              result=True)
        makedirs(tgt_units_dir)
        force_symlink(fp.join(src_units_dir, 'simple-da'),
                      fp.join(tgt_units_dir, parsed_bname(lconf, econf)))

        # discourse
        lconf.pyt("parser/parse-to-glozz",
                  minicorpus_path(lconf),
                  attelo_result_path(lconf, econf),
                  minicorpus_path(lconf, result=True),
                  stderr=log)
Ejemplo n.º 10
0
def _format_decoder_output(lconf, log):
    """
    Convert decoder output to Glozz (for visualisation really)
    and copy it to resultcorpus
    """
    makedirs(minicorpus_doc_path(lconf, result=True))
    # unannotated
    force_symlink(unannotated_dir_path(lconf),
                  unannotated_dir_path(lconf, result=True))

    # parsed, postagged
    for section in ["parsed", "pos-tagged"]:
        force_symlink(minicorpus_stage_path(lconf, section),
                      minicorpus_stage_path(lconf, section, result=True))

    for econf in lconf.evaluations:
        # units/foo
        src_units_dir = minicorpus_stage_path(lconf, "units")
        tgt_units_dir = minicorpus_stage_path(lconf, "units", result=True)
        makedirs(tgt_units_dir)
        force_symlink(fp.join(src_units_dir, 'simple-da'),
                      fp.join(tgt_units_dir, parsed_bname(lconf, econf)))

        # discourse
        lconf.pyt("parser/parse-to-glozz",
                  minicorpus_path(lconf),
                  attelo_result_path(lconf, econf),
                  minicorpus_path(lconf, result=True),
                  stderr=log)
Ejemplo n.º 11
0
def _create_eval_dirs(args, data_dir, jumpstart):
    """
    Return eval and scatch directory paths
    """

    eval_current = fp.join(data_dir, "eval-current")
    scratch_current = fp.join(data_dir, "scratch-current")
    stage = args_to_stage(args)

    if args.resume or stage in [ClusterStage.main,
                                ClusterStage.combined_models,
                                ClusterStage.end]:
        if not fp.exists(eval_current) or not fp.exists(scratch_current):
            sys.exit("No currently running evaluation to resume!")
        else:
            return eval_current, scratch_current
    else:
        tstamp = "TEST" if _DEBUG else timestamp()
        eval_dir = fp.join(data_dir, "eval-" + tstamp)
        if not fp.exists(eval_dir):
            os.makedirs(eval_dir)
            _link_data_files(data_dir, eval_dir)
            force_symlink(fp.basename(eval_dir), eval_current)
        elif not _DEBUG:
            sys.exit("Try again in one minute")

        scratch_dir = fp.join(data_dir, "scratch-" + tstamp)
        if not fp.exists(scratch_dir):
            os.makedirs(scratch_dir)
            if jumpstart:
                _link_model_files(scratch_current, scratch_dir)
            force_symlink(fp.basename(scratch_dir), scratch_current)

        with open(fp.join(eval_dir, "versions-evaluate.txt"), "w") as stream:
            call(["pip", "freeze"], stdout=stream)

        return eval_dir, scratch_dir