Ejemplo n.º 1
0
def test_extract_nicknames_from_after_symbols():
    text = "The architecture consists of SYMBOL dense layers trained with SYMBOL learning rate."
    symbol_texs = {29: "L_d", 62: r"\alpha"}
    tokens, pos = list(
        zip(*[
            ("The", "DT"),
            ("architecture", "NN"),
            ("consists", "VBZ"),
            ("of", "IN"),
            ("SYMBOL", "NN"),
            ("dense", "JJ"),
            ("layers", "NNS"),
            ("trained", "VBN"),
            ("with", "IN"),
            ("SYMBOL", "NN"),
            ("learning", "NN"),
            ("rate", "NN"),
            (".", "."),
        ]))

    symbol_nickname_pairs = get_symbol_nickname_pairs(text, tokens, pos,
                                                      symbol_texs)
    assert len(symbol_nickname_pairs) == 2

    nickname0 = symbol_nickname_pairs[0]
    assert nickname0.term_text == "L_d"
    assert nickname0.definition_text == "dense layers"

    nickname1 = symbol_nickname_pairs[1]
    assert nickname1.term_text == r"\alpha"
    assert nickname1.definition_text == "learning rate"
Ejemplo n.º 2
0
def test_extract_nicknames_from_before_symbols():
    text = "The agent acts with a policy SYMBOL in each timestep SYMBOL."
    symbol_texs = {29: r"\pi", 53: "t"}
    tokens, pos = list(
        zip(*[
            ("The", "DT"),
            ("agent", "NN"),
            ("acts", "VBZ"),
            ("with", "IN"),
            ("a", "DT"),
            ("policy", "NN"),
            ("SYMBOL", "NN"),
            ("in", "IN"),
            ("each", "DT"),
            ("timestep", "NN"),
            ("SYMBOL", "NN"),
            (".", "."),
        ]))

    symbol_nickname_pairs = get_symbol_nickname_pairs(text, tokens, pos,
                                                      symbol_texs)
    assert len(symbol_nickname_pairs) == 2

    nickname0 = symbol_nickname_pairs[0]
    assert nickname0.term_text == r"\pi"
    assert nickname0.definition_text == "policy"

    nickname1 = symbol_nickname_pairs[1]
    assert nickname1.term_text == "t"
    assert nickname1.definition_text == "timestep"
Ejemplo n.º 3
0
def test_extract_nicknames_symbols_parentheses():
    text = "The agent acts with policy (SYMBOL)."
    symbol_texs = {28: r"\pi"}
    tokens, pos = list(
        zip(
            *[
                ("The", "DT"),
                ("agent", "NN"),
                ("acts", "VBZ"),
                ("with", "IN"),
                ("policy", "NN"),
                ("(", "-LRB-"),
                ("SYMBOL", "NN"),
                (")", "-RRB-"),
                (".", "."),
            ]
        )
    )

    symbol_nickname_pairs = get_symbol_nickname_pairs(text, tokens, pos, symbol_texs)
    assert len(symbol_nickname_pairs) == 1

    nickname0 = symbol_nickname_pairs[0]
    assert nickname0.term_text == r"\pi"
    assert nickname0.definition_text == "policy"
Ejemplo n.º 4
0
def test_extract_nicknames_symbols_filter():
    text = "The agent acts with SYMBOL SYMBOL."
    symbol_texs = {20: r"\pi", 27: "p"}
    tokens, pos = list(
        zip(*[
            ("The", "DT"),
            ("agent", "NN"),
            ("acts", "VBZ"),
            ("with", "IN"),
            ("SYMBOL", "NN"),
            ("SYMBOL", "NN"),
            (".", "."),
        ]))

    symbol_nickname_pairs = get_symbol_nickname_pairs(text, tokens, pos,
                                                      symbol_texs)
    assert len(symbol_nickname_pairs) == 0
Ejemplo n.º 5
0
def test_extract_nickname_for_th_index_pattern():
    text = "This process repeats for every SYMBOLth timestep."
    symbol_texs = {31: "k"}
    tokens, pos = list(
        zip(*[
            ("This", "DT"),
            ("process", "NN"),
            ("repeats", "NNS"),
            ("for", "IN"),
            ("every", "DT"),
            ("SYMBOLth", "JJ"),
            ("timestep", "NN"),
            (".", "."),
        ]))

    symbol_nickname_pairs = get_symbol_nickname_pairs(text, tokens, pos,
                                                      symbol_texs)
    assert len(symbol_nickname_pairs) == 1

    nickname0 = symbol_nickname_pairs[0]
    assert nickname0.term_text == "k"
    assert nickname0.definition_text == "timestep"
Ejemplo n.º 6
0
def test_extract_nicknames_symbols_separated_by_colon():
    text = "The agent acts with SYMBOL : policy."
    symbol_texs = {20: r"\pi"}
    tokens, pos = list(
        zip(*[
            ("The", "DT"),
            ("agent", "NN"),
            ("acts", "VBZ"),
            ("with", "IN"),
            ("SYMBOL", "NN"),
            (":", ":"),
            ("policy", "NN"),
            (".", "."),
        ]))

    symbol_nickname_pairs = get_symbol_nickname_pairs(text, tokens, pos,
                                                      symbol_texs)
    assert len(symbol_nickname_pairs) == 1

    nickname0 = symbol_nickname_pairs[0]
    assert nickname0.term_text == r"\pi"
    assert nickname0.definition_text == "policy"