Esempio n. 1
0
def _init_corpus(hconf):
    """Start evaluation; generate folds if needed

    :rtype: DataConfig or None
    """
    can_skip_folds = fp.exists(hconf.fold_file)
    msg_skip_folds = ('Skipping generation of fold files '
                      '(must have been jumpstarted)')

    if hconf.runcfg.stage is None:
        # standalone: we always have to load the datapack
        # because we'll need it for further stages
        mpack = _load_harness_multipack(hconf)
        if can_skip_folds:
            print(msg_skip_folds, file=sys.stderr)
            fold_dict = load_fold_dict(hconf.fold_file)
        else:
            fold_dict = hconf.create_folds(mpack)
        return DataConfig(pack=mpack, folds=fold_dict)
    elif hconf.runcfg.stage == ClusterStage.start:
        if can_skip_folds:
            # if we are just running --start and the fold file already
            # exists we can even bail out before reading the datapacks
            # because that's all we wanted them for
            print(msg_skip_folds, file=sys.stderr)
        else:
            mpack = _load_harness_multipack(hconf)
            hconf.create_folds(mpack)
        return None
    else:
        # any other stage: fold files have already been
        # created so we just read them in
        return DataConfig(pack=_load_harness_multipack(hconf),
                          folds=load_fold_dict(hconf.fold_file))
Esempio n. 2
0
def _init_corpus(hconf):
    """Start evaluation; generate folds if needed

    :rtype: DataConfig or None
    """
    can_skip_folds = fp.exists(hconf.fold_file)
    msg_skip_folds = ('Skipping generation of fold files '
                      '(must have been jumpstarted)')

    if hconf.runcfg.stage is None:
        # standalone: we always have to load the datapack
        # because we'll need it for further stages
        mpack = _load_harness_multipack(hconf)
        if can_skip_folds:
            print(msg_skip_folds, file=sys.stderr)
            fold_dict = load_fold_dict(hconf.fold_file)
        else:
            fold_dict = hconf.create_folds(mpack)
        return DataConfig(pack=mpack, folds=fold_dict)
    elif hconf.runcfg.stage == ClusterStage.start:
        if can_skip_folds:
            # if we are just running --start and the fold file already
            # exists we can even bail out before reading the datapacks
            # because that's all we wanted them for
            print(msg_skip_folds, file=sys.stderr)
        else:
            mpack = _load_harness_multipack(hconf)
            hconf.create_folds(mpack)
        return None
    else:
        # any other stage: fold files have already been
        # created so we just read them in
        return DataConfig(pack=_load_harness_multipack(hconf),
                          folds=load_fold_dict(hconf.fold_file))
Esempio n. 3
0
def _do_corpus(lconf):
    "Run evaluation on a corpus"
    print(_corpus_banner(lconf), file=sys.stderr)

    edus_file = edu_input_path(lconf)
    if not os.path.exists(edus_file):
        exit_ungathered()

    has_stripped = (lconf.stage in [ClusterStage.end, ClusterStage.start]
                    and fp.exists(features_path(lconf, stripped=True)))
    dpack = load_data_pack(edus_file,
                           pairings_path(lconf),
                           features_path(lconf, stripped=has_stripped),
                           verbose=True)

    if _is_standalone_or(lconf, ClusterStage.start):
        _generate_fold_file(lconf, dpack)

    dconf = DataConfig(pack=dpack,
                       folds=load_fold_dict(lconf.fold_file))

    if _is_standalone_or(lconf, ClusterStage.main):
        foldset = lconf.folds if lconf.folds is not None\
            else frozenset(dconf.folds.values())
        for fold in foldset:
            _do_fold(lconf, dconf, fold)

    if _is_standalone_or(lconf, ClusterStage.combined_models):
        mk_combined_models(lconf, dconf)

    if _is_standalone_or(lconf, ClusterStage.end):
        mk_global_report(lconf, dconf)
Esempio n. 4
0
 def create_folds(self, mpack):
     """
     Generate the folds file; return the resulting folds
     """
     if FIXED_FOLD_FILE is None:
         rng = mk_rng()
         fold_dict = make_n_fold(mpack, 10, rng)
     else:
         fold_dict = load_fold_dict(FIXED_FOLD_FILE)
     save_fold_dict(fold_dict, self.fold_file)
     return fold_dict
Esempio n. 5
0
 def create_folds(self, mpack):
     """
     Generate the folds file; return the resulting folds
     """
     if FIXED_FOLD_FILE is None:
         rng = mk_rng()
         fold_dict = make_n_fold(mpack, 10, rng)
     else:
         fold_dict = load_fold_dict(FIXED_FOLD_FILE)
     save_fold_dict(fold_dict, self.fold_file)
     return fold_dict