Ejemplo n.º 1
0
def _mk_basic_intras(klearner, kconf):
    """Intra/inter parser based on a single core parser
    """
    return [
        combine_intra(IntraInterPair(x, x), kconf)
        for x in _core_parsers(klearner)
    ]
Ejemplo n.º 2
0
def _mk_dorc_intras(klearner, kconf):
    """Intra/inter parsers based on a single core parser
    and a document oracle
    """
    parsers = [IntraInterPair(intra=x, inter=y) for x, y in
               zip(_core_parsers(klearner, unique_real_root=False),
                   _core_parsers(ORACLE))]
    return [combine_intra(p, kconf, primary='intra') for p in parsers]
Ejemplo n.º 3
0
def _mk_sorc_intras(klearner, kconf):
    """Intra/inter parsers based on a single core parser
    and a sentence oracle
    """
    parsers = [
        IntraInterPair(intra=x, inter=y)
        for x, y in zip(_core_parsers(ORACLE), _core_parsers(klearner))
    ]
    return [combine_intra(p, kconf, primary='inter') for p in parsers]
Ejemplo n.º 4
0
def _mk_basic_intras(klearner, kconf):
    """Intra/inter parser based on a single core parser
    """
    # NEW intra parsers are explicitly authorized to have more than one
    # real root (necessary for the Eisner decoder, maybe other decoders too)
    parsers = [IntraInterPair(intra=x, inter=y) for x, y in
               zip(_core_parsers(klearner, unique_real_root=False),
                   _core_parsers(klearner))]
    return [combine_intra(p, kconf) for p in parsers]
Ejemplo n.º 5
0
def _mk_last_intras(klearner, kconf):
    """Intra/inter parsers based on a single core parser
    and the last baseline
    """
    kconf = Keyed(key=combined_key('last', kconf), payload=kconf.payload)
    econf_last = mk_post(klearner, decoder_last())
    return [
        combine_intra(IntraInterPair(intra=econf_last, inter=p),
                      kconf,
                      primary='inter') for p in _core_parsers(klearner)
    ]
Ejemplo n.º 6
0
def _mk_last_intras(klearner, kconf):
    """Parsers using "last" for intra and a core decoder for inter.
    """
    if ((not klearner.attach.payload.can_predict_proba or
         not klearner.label.payload.can_predict_proba)):
        return []

    kconf = Keyed(key=combined_key('last', kconf),
                  payload=kconf.payload)
    econf_last = mk_joint(klearner, decoder_last())
    parsers = [IntraInterPair(intra=econf_last, inter=y) for y in
               _core_parsers(klearner)]
    return [combine_intra(p, kconf, primary='inter') for p in parsers]
Ejemplo n.º 7
0
def _evaluations():
    "the evaluations we want to run"
    res = []

    # == one-step (global) parsers ==
    learners = []
    learners.extend(_LOCAL_LEARNERS)
    # current structured learners don't do probs, hence non-prob decoders
    nonprob_eisner = EisnerDecoder(use_prob=False)
    learners.extend(l(nonprob_eisner) for l in _STRUCTURED_LEARNERS)
    # MST is disabled by default, as it does not output projective trees
    # nonprob_mst = MstDecoder(MstRootStrategy.fake_root, False)
    # learners.extend(l(nonprob_mst) for l in _STRUCTURED_LEARNERS)
    global_parsers = itr.chain.from_iterable(_core_parsers(l)
                                             for l in learners)
    res.extend(global_parsers)

    # == two-step parsers: intra then inter-sentential ==
    ii_learners = []  # (intra, inter) learners
    ii_learners.extend((copy.deepcopy(klearner), copy.deepcopy(klearner))
                       for klearner in _LOCAL_LEARNERS
                       if klearner != ORACLE)
    # keep pointer to intra and inter oracles
    ii_oracles = (copy.deepcopy(ORACLE), ORACLE_INTER)
    ii_learners.append(ii_oracles)
    # structured learners, cf. supra
    intra_nonprob_eisner = EisnerDecoder(use_prob=False,
                                         unique_real_root=True)
    inter_nonprob_eisner = EisnerDecoder(use_prob=False,
                                         unique_real_root=True)
    ii_learners.extend((copy.deepcopy(l)(intra_nonprob_eisner),
                        copy.deepcopy(l)(inter_nonprob_eisner))
                       for l in _STRUCTURED_LEARNERS)
    # couples of learners with either sentence- or document-level oracle
    sorc_ii_learners = [
        (ii_oracles[0], inter_lnr) for intra_lnr, inter_lnr in ii_learners
        if (ii_oracles[0], inter_lnr) not in ii_learners
    ]
    dorc_ii_learners = [
        (intra_lnr, ii_oracles[1]) for intra_lnr, inter_lnr in ii_learners
        if (intra_lnr, ii_oracles[1]) not in ii_learners
    ]
    # enumerate pairs of (intra, inter) parsers
    ii_pairs = []
    for intra_lnr, inter_lnr in itr.chain(ii_learners,
                                          sorc_ii_learners,
                                          dorc_ii_learners):
        # NEW intra parsers are explicitly authorized (in fact, expected)
        # to have more than one real root ; this is necessary for the
        # Eisner decoder and probably others, with "hard" strategies
        ii_pairs.extend(IntraInterPair(intra=x, inter=y) for x, y in
                        zip(_core_parsers(intra_lnr, unique_real_root=True),  # TODO add unique_real_root to hyperparameters in grid search
                            _core_parsers(inter_lnr, unique_real_root=True)))
    # cross-product: pairs of parsers x intra-/inter- configs
    ii_parsers = [combine_intra(p, kconf,
                                primary=('inter' if p.intra.settings.oracle
                                         else 'intra'),
                                verbose=_VERBOSE_INTRA_INTER)
                  for p, kconf
                  in itr.product(ii_pairs, _INTRA_INTER_CONFIGS)]
    res.extend(ii_parsers)

    return [x for x in res if not _is_junk(x)]