Ejemplo n.º 1
0
def full_model_accuracy():
    # train a classifier on the entire training data
    train_parses = depparse.read_conllu(
        "UD_English-EWT/en_ewt-ud-train.conllu")
    classifier = depparse.Classifier(train_parses)

    # test the classifier on the development set
    correct = 0
    total = 0
    for deps in depparse.read_conllu("UD_English-EWT/en_ewt-ud-dev.conllu"):
        total += len(deps)

        # clear out all the head information
        orig_heads = clear_heads(deps)

        # parse using the classifier to predict actions
        depparse.parse(deps, classifier)

        # count how many of the heads have been correctly restored
        for dep, orig_head in zip(deps, orig_heads):
            if dep.head == orig_head:
                correct += 1

    # return the accuracy
    return correct / total
Ejemplo n.º 2
0
def test_parse():
    # consider a specific sentence from the training data

    # # sent_id = weblog-blogspot.com_alaindewitt_20040929103700_ENG_20040929_103700-0026
    # # text = The future president joined the Guard in May 1968.
    # 1    The    the    DET    DT    Definite=Def|PronType=Art    3    det    3:det    _
    # 2    future    future    ADJ    JJ    Degree=Pos    3    amod    3:amod    _
    # 3    president    president    NOUN    NN    Number=Sing    4    nsubj    4:nsubj    _
    # 4    joined    join    VERB    VBD    Mood=Ind|Tense=Past|VerbForm=Fin    0    root    0:root    _
    # 5    the    the    DET    DT    Definite=Def|PronType=Art    6    det    6:det    _
    # 6    Guard    Guard    PROPN    NNP    Number=Sing    4    obj    4:obj    _
    # 7    in    in    ADP    IN    _    8    case    8:case    _
    # 8    May    May    PROPN    NNP    Number=Sing    4    obl    4:obl:in    _
    # 9    1968    1968    NUM    CD    NumType=Card    8    nummod    8:nummod    SpaceAfter=No
    # 10    .    .    PUNCT    .    _    4    punct    4:punct    _
    parses = depparse.read_conllu("UD_English-EWT/en_ewt-ud-train.conllu")
    [deps] = itertools.islice(parses, 352, 353)

    # clear out all the head information
    orig_heads = clear_heads(deps)

    # run the parser with the oracle list of actions
    depparse.parse(
        deps,
        IterActions([
            Action.SHIFT,
            Action.SHIFT,
            Action.SHIFT,
            Action.LEFT_ARC,
            Action.LEFT_ARC,
            Action.SHIFT,
            Action.LEFT_ARC,
            Action.SHIFT,
            Action.SHIFT,
            Action.LEFT_ARC,
            Action.RIGHT_ARC,
            Action.SHIFT,
            Action.SHIFT,
            Action.LEFT_ARC,
            Action.SHIFT,
            Action.RIGHT_ARC,
            Action.RIGHT_ARC,
            Action.SHIFT,
            Action.RIGHT_ARC,
        ]))

    # make sure that the original heads have been restored by the parser
    assert [dep.head for dep in deps] == orig_heads
Ejemplo n.º 3
0
def test_oracle():
    # consider a specific sentence from the training data

    # # sent_id = answers-20111108085734AATXy0E_ans-0004
    # # text = Plaster of Paris does two things
    # 1    Plaster    plaster    NOUN    NN    Number=Sing    4    nsubj    4:nsubj    _
    # 2    of    of    ADP    IN    _    3    case    3:case    _
    # 3    Paris    Paris    PROPN    NNP    Number=Sing    1    nmod    1:nmod:of    _
    # 4    does    do    VERB    VBZ    Mood=Ind|Number=Sing|Person=3|Tense=Pres|VerbForm=Fin    0    root    0:root    _
    # 5    two    two    NUM    CD    NumType=Card    6    nummod    6:nummod    _
    # 6    things    thing    NOUN    NNS    Number=Plur    4    obj    4:obj    _
    parses = depparse.read_conllu("UD_English-EWT/en_ewt-ud-train.conllu")
    [deps] = itertools.islice(parses, 7475, 7476)

    # create an oracle for the sentence and try a few actions
    oracle = depparse.Oracle(deps)
    # shift on an empty stack
    assert oracle([], deps) == Action.SHIFT
    # shift on a stack with only one entry
    assert oracle(deps[:1], deps[1:]) == Action.SHIFT
    # shift because "Plaster" and "of" are not in a head-dependent relation
    assert oracle(deps[:2], deps[2:]) == Action.SHIFT
    # left-arc because "Paris" is the head of "of"
    assert oracle(deps[:3], deps[3:]) == Action.LEFT_ARC
    # right-arc because "Plaster" is the head of "Paris"
    assert oracle(deps[:1] + deps[2:3], deps[3:]) == Action.RIGHT_ARC

    # create a new oracle for the same sentence and extract all the actions
    oracle = depparse.Oracle(deps)
    depparse.parse(deps, oracle)

    assert oracle.actions == [
        Action.SHIFT,  #1
        Action.SHIFT,  #2
        Action.SHIFT,  #3
        Action.LEFT_ARC,  #4
        Action.RIGHT_ARC,  #5
        Action.SHIFT,  #6
        Action.LEFT_ARC,  #7
        Action.SHIFT,  #8
        Action.SHIFT,  #9
        Action.LEFT_ARC,  #10
        Action.RIGHT_ARC,  #11
    ]
    '''print([(dep.form, dep.head, dep.id) for dep in deps])
Ejemplo n.º 4
0
def test_oracle_round_trip():
    # take the first 50 parses from the training data
    parses = depparse.read_conllu("UD_English-EWT/en_ewt-ud-train.conllu")
    for i, deps in enumerate(itertools.islice(parses, 50)):

        # skip the non-projective parses
        if i in {4, 21, 25, 31}:
            continue

        # collect the head for each word
        orig_heads = [dep.head for dep in deps]

        # run the oracle to determine the sequence of actions
        oracle = depparse.Oracle(deps)
        depparse.parse(deps, oracle)

        # clear out all the head information
        clear_heads(deps)

        # feed the oracle-identified actions in, one at a time
        depparse.parse(deps, IterActions(oracle.actions))

        # make sure that the original heads have been restored by the parser
        assert [dep.head for dep in deps] == orig_heads