Ejemplo n.º 1
0
 def _extract_discr(mpaths):
     "extract discriminating features"
     dpack0 = dconf.pack.values()[0]
     labels = dpack0.labels
     vocab = dpack0.vocab
     models = Team(attach=mpaths['attach'],
                   label=mpaths['label']).fmap(joblib.load)
     return discriminating_features(models, labels, vocab, _top_n)
Ejemplo n.º 2
0
 def test_postlabel_parser(self):
     learners = LEARNERS + [
         Team(attach=StructuredPerceptron(
             MST_DECODER, n_iter=3, average=True, use_prob=False),
              label=SklearnLabelClassifier(LogisticRegression())),
     ]
     for lrn, dcd in itr.product(learners, DECODERS):
         parser = PostlabelPipeline(learner_attach=lrn.attach,
                                    learner_label=lrn.label,
                                    decoder=dcd)
         self._test_parser(parser)
Ejemplo n.º 3
0
 def test_intra_parsers(self):
     'test all intra/inter parsers on a dpack'
     learner_intra = Team(
         attach=SklearnAttachClassifier(LogisticRegression()),
         label=SklearnLabelClassifier(LogisticRegression()))
     learner_inter = Team(
         attach=SklearnAttachClassifier(LogisticRegression()),
         label=SklearnLabelClassifier(LogisticRegression()))
     # note: these are chosen a bit randomly
     p_intra = JointPipeline(learner_attach=learner_intra.attach,
                             learner_label=learner_intra.label,
                             decoder=MST_DECODER)
     p_inter = PostlabelPipeline(learner_attach=learner_inter.attach,
                                 learner_label=learner_inter.label,
                                 decoder=MST_DECODER)
     parsers = [
         mk_p(IntraInterPair(intra=p_intra, inter=p_inter))
         for mk_p in [SentOnlyParser, SoftParser, HeadToHeadParser]
     ]
     for parser in parsers:
         self._test_parser(parser)
Ejemplo n.º 4
0
 def test_postlabel_parser(self):
     learners = LEARNERS +\
         [
              Team(attach=StructuredPerceptron(MST_DECODER,
                                               LOCAL_PERC_ARGS),
                   label=SklearnLabelClassifier(LogisticRegression())),
         ]
     for l, d in itr.product(learners, DECODERS):
         parser = PostlabelPipeline(learner_attach=l.attach,
                                    learner_label=l.label,
                                    decoder=d)
         self._test_parser(parser)
Ejemplo n.º 5
0
    os.makedirs(TMP_OUTPUT)

# load the data
mpack = load_multipack(PREFIX + '.edus',
                       PREFIX + '.pairings',
                       PREFIX + '.features.sparse',
                       PREFIX + '.features.sparse.vocab',
                       verbose=True)

# divide the dataset into folds
num_folds = min((10, len(mpack)))
fold_dict = make_n_fold(mpack, num_folds, mk_rng())

# select a decoder and a learner team
decoder = MstDecoder(root_strategy=MstRootStrategy.fake_root)
learners = Team(attach=SklearnAttachClassifier(LogisticRegression()),
                label=SklearnLabelClassifier(LogisticRegression()))

# put them together as a parser
parser = JointPipeline(learner_attach=learners.attach,
                       learner_label=learners.label,
                       decoder=decoder)

# run cross-fold evaluation
scores = []
for fold in range(num_folds):
    print(">>> doing fold ", fold + 1, file=sys.stderr)
    print("training ... ", file=sys.stderr)
    # learn a model for the training data for this fold
    train_packs = select_training(mpack, fold_dict, fold).values()
    parser.fit(train_packs, [x.target for x in train_packs])