예제 #1
0
def label_learner_dp_perc():
    "return a keyed instance of perceptron learner"
    return Keyed(
        'dp-perc',
        SklearnLabelClassifier(
            Perceptron(n_iter=LOCAL_N_ITER,
                       verbose=VERBOSE,
                       average=LOCAL_AVG,
                       use_prob=LOCAL_USE_PROB)))
예제 #2
0
def label_learner_dp_pa():
    "return a keyed instance of passive aggressive learner"
    return Keyed(
        'dp-pa',
        SklearnLabelClassifier(
            PassiveAggressive(C=LOCAL_C,
                              n_iter=LOCAL_N_ITER,
                              verbose=VERBOSE,
                              average=LOCAL_AVG,
                              use_prob=LOCAL_USE_PROB)))
예제 #3
0
파일: tests.py 프로젝트: moreymat/attelo
 def test_postlabel_parser(self):
     learners = LEARNERS + [
         Team(attach=StructuredPerceptron(
             MST_DECODER, n_iter=3, average=True, use_prob=False),
              label=SklearnLabelClassifier(LogisticRegression())),
     ]
     for lrn, dcd in itr.product(learners, DECODERS):
         parser = PostlabelPipeline(learner_attach=lrn.attach,
                                    learner_label=lrn.label,
                                    decoder=dcd)
         self._test_parser(parser)
예제 #4
0
파일: tests.py 프로젝트: moreymat/attelo
 def test_intra_parsers(self):
     'test all intra/inter parsers on a dpack'
     learner_intra = Team(
         attach=SklearnAttachClassifier(LogisticRegression()),
         label=SklearnLabelClassifier(LogisticRegression()))
     learner_inter = Team(
         attach=SklearnAttachClassifier(LogisticRegression()),
         label=SklearnLabelClassifier(LogisticRegression()))
     # note: these are chosen a bit randomly
     p_intra = JointPipeline(learner_attach=learner_intra.attach,
                             learner_label=learner_intra.label,
                             decoder=MST_DECODER)
     p_inter = PostlabelPipeline(learner_attach=learner_inter.attach,
                                 learner_label=learner_inter.label,
                                 decoder=MST_DECODER)
     parsers = [
         mk_p(IntraInterPair(intra=p_intra, inter=p_inter))
         for mk_p in [SentOnlyParser, SoftParser, HeadToHeadParser]
     ]
     for parser in parsers:
         self._test_parser(parser)
예제 #5
0
 def test_postlabel_parser(self):
     learners = LEARNERS +\
         [
              Team(attach=StructuredPerceptron(MST_DECODER,
                                               LOCAL_PERC_ARGS),
                   label=SklearnLabelClassifier(LogisticRegression())),
         ]
     for l, d in itr.product(learners, DECODERS):
         parser = PostlabelPipeline(learner_attach=l.attach,
                                    learner_label=l.label,
                                    decoder=d)
         self._test_parser(parser)
예제 #6
0
def label_learner_dp_pa():
    "return a keyed instance of passive aggressive learner"
    return Keyed('dp-pa',
                 SklearnLabelClassifier(PassiveAggressive(LOCAL_PA_ARGS)))
예제 #7
0
def label_learner_dp_perc():
    "return a keyed instance of perceptron learner"
    return Keyed('dp-perc',
                 SklearnLabelClassifier(Perceptron(LOCAL_PERC_ARGS)))
예제 #8
0
def label_learner_pa():
    "return a keyed instance of passive aggressive learner"
    learner = sk.PassiveAggressiveClassifier(n_iter=LOCAL_PA_ARGS.iterations)
    return Keyed('pa', SklearnLabelClassifier(learner))
예제 #9
0
def label_learner_perc():
    "return a keyed instance of perceptron learner"
    learner = sk.Perceptron(n_iter=LOCAL_PERC_ARGS.iterations)
    return Keyed('perc', SklearnLabelClassifier(learner))
예제 #10
0
파일: local.py 프로젝트: jrmyp/irit-stac
def label_learner_rndforest():
    "return a keyed instance of decision tree learner"
    return Keyed('rndforest', SklearnLabelClassifier(RandomForestClassifier()))
예제 #11
0
파일: local.py 프로젝트: jrmyp/irit-stac
def label_learner_dectree():
    "return a keyed instance of decision tree learner"
    return Keyed('dectree', SklearnLabelClassifier(DecisionTreeClassifier()))
예제 #12
0
def label_learner_pa():
    "return a keyed instance of passive aggressive learner"
    learner = sk.PassiveAggressiveClassifier(C=LOCAL_C,
                                             n_iter=LOCAL_N_ITER,
                                             class_weight=LOCAL_CLASS_WEIGHT)
    return Keyed('pa', SklearnLabelClassifier(learner))
예제 #13
0
def label_learner_perc():
    "return a keyed instance of perceptron learner"
    learner = sk.Perceptron(n_iter=LOCAL_N_ITER,
                            class_weight=LOCAL_CLASS_WEIGHT)
    return Keyed('perc', SklearnLabelClassifier(learner))
예제 #14
0
def label_learner_rndforest():
    "return a keyed instance of decision tree learner"
    return Keyed('rndforest',
                 SklearnLabelClassifier(RandomForestClassifier(
                     n_estimators=100, n_jobs=1)))
예제 #15
0
# load the data
mpack = load_multipack(PREFIX + '.edus',
                       PREFIX + '.pairings',
                       PREFIX + '.features.sparse',
                       PREFIX + '.features.sparse.vocab',
                       verbose=True)

# divide the dataset into folds
num_folds = min((10, len(mpack)))
fold_dict = make_n_fold(mpack, num_folds, mk_rng())

# select a decoder and a learner team
decoder = MstDecoder(root_strategy=MstRootStrategy.fake_root)
learners = Team(attach=SklearnAttachClassifier(LogisticRegression()),
                label=SklearnLabelClassifier(LogisticRegression()))

# put them together as a parser
parser = JointPipeline(learner_attach=learners.attach,
                       learner_label=learners.label,
                       decoder=decoder)

# run cross-fold evaluation
scores = []
for fold in range(num_folds):
    print(">>> doing fold ", fold + 1, file=sys.stderr)
    print("training ... ", file=sys.stderr)
    # learn a model for the training data for this fold
    train_packs = select_training(mpack, fold_dict, fold).values()
    parser.fit(train_packs, [x.target for x in train_packs])
예제 #16
0
class TinyHarness(Harness):
    """Example harness that runs on the example data
    """
    _maxent_a = Keyed('maxent', SklearnAttachClassifier(LogisticRegression()))
    _maxent_l = Keyed('maxent', SklearnLabelClassifier(LogisticRegression()))
    _maxent = LearnerConfig(attach=_maxent_a, label=_maxent_l)
    _decoder1 = MstDecoder(root_strategy=MstRootStrategy.fake_root)
    _decoder2 = LocallyGreedy()
    _parser1 = Keyed(
        "mst-j",
        JointPipeline(_maxent.attach.payload, _maxent.label.payload,
                      _decoder1))
    _parser2 = Keyed(
        "greedy-p",
        PostlabelPipeline(_maxent.attach.payload, _maxent.label.payload,
                          _decoder2))
    _evaluations = [
        EvaluationConfig(key="maxent-mst-j",
                         settings=Keyed('j', None),
                         learner=_maxent,
                         parser=_parser1),
        EvaluationConfig(key="maxent-greedy-p",
                         settings=Keyed('p', None),
                         learner=_maxent,
                         parser=_parser2)
    ]

    def __init__(self):
        self._datadir = mkdtemp()
        for cpath in glob.glob('doc/example-corpus/*'):
            shutil.copy(cpath, self._datadir)
        super(TinyHarness, self).__init__('tiny', None)

    def run(self):
        """Run the evaluation
        """
        runcfg = RuntimeConfig.empty()
        eval_dir, scratch_dir = prepare_dirs(runcfg, self._datadir)
        self.load(runcfg, eval_dir, scratch_dir)
        evaluate_corpus(self)

    @property
    def evaluations(self):
        return self._evaluations

    @property
    def test_evaluation(self):
        return None

    def create_folds(self, mpack):
        return attelo.fold.make_n_fold(mpack, 2, None)

    def mpack_paths(self, _, stripped=False):
        """Return a dict of paths needed to read a datapack.

        The 2nd argument denoted by '_' is test_data, which is unused in
        this example.
        """
        core_path = fp.join(self._datadir, 'tiny')
        return {
            'edu_input': core_path + '.edus',
            'pairings': core_path + '.pairings',
            'features': core_path + '.features.sparse',
            'vocab': core_path + '.features.sparse.vocab'
        }

    def _model_basename(self, rconf, mtype, ext):
        "Basic filename for a model"

        if 'attach' in mtype:
            rsubconf = rconf.attach
        else:
            rsubconf = rconf.label

        template = '{dataset}.{learner}.{task}.{ext}'
        return template.format(dataset=self.dataset,
                               learner=rsubconf.key,
                               task=mtype,
                               ext=ext)

    def model_paths(self, rconf, fold, parser):
        if fold is None:
            parent_dir = self.combined_dir_path()
        else:
            parent_dir = self.fold_dir_path(fold)

        def _eval_model_path(mtype):
            "Model for a given loop/eval config and fold"
            bname = self._model_basename(rconf, mtype, 'model')
            return fp.join(parent_dir, bname)

        return {
            'attach': _eval_model_path("attach"),
            'label': _eval_model_path("relate")
        }
예제 #17
0
파일: local.py 프로젝트: jrmyp/irit-stac
def label_learner_maxent():
    "return a keyed instance of maxent learner"
    return Keyed('maxent', SklearnLabelClassifier(LogisticRegression()))