Esempio n. 1
0
top = [(v, k) for k, v in entities.items() if " " not in k]
top = sorted(top, reverse=True)[:int(len(lexicon) * 0.4)] # percentage
top = [k for v, k in top]
for ne in top:
    if ne not in seen:
        lexicon.append(ne+" "+NE)
lexicon = sorted(lexicon)

open("brill-lexicon.txt", "w").write("\n".join(lexicon))

#### TEST ##########################################################################################
# Create a Pattern Brill tagger and evaluate accuracy on the test data.

# 11) Load lexicon data (it is a lazy-loading object).
lexicon = Lexicon()
lexicon.path = "brill-lexicon.txt"
lexicon.lexical_rules.path = "brill-lexical.txt"
lexicon.contextual_rules.path = "brill-contextual.txt"
lexicon.named_entities.tag = "NP"
lexicon.load()
lexicon.lexical_rules.load()
lexicon.contextual_rules.load()
lexicon.named_entities.load()

# For testing with or without lexical and contextual rules:
#for i in reversed(range(len(lexicon.lexical_rules)-1)):
#    del lexicon.lexical_rules[i]
#for i in reversed(range(len(lexicon.contextual_rules)-1)):
#    del lexicon.contextual_rules[i]

# For random test data:
Esempio n. 2
0
top = [(v, k) for k, v in entities.items() if " " not in k]
top = sorted(top, reverse=True)[:int(len(lexicon) * 0.4)]  # percentage
top = [k for v, k in top]
for ne in top:
    if ne not in seen:
        lexicon.append(ne + " " + NE)
lexicon = sorted(lexicon)

open("brill-lexicon.txt", "w").write("\n".join(lexicon))

#### TEST ##########################################################################################
# Create a Pattern Brill tagger and evaluate accuracy on the test data.

# 11) Load lexicon data (it is a lazy-loading object).
lexicon = Lexicon()
lexicon.path = "brill-lexicon.txt"
lexicon.lexical_rules.path = "brill-lexical.txt"
lexicon.contextual_rules.path = "brill-contextual.txt"
lexicon.named_entities.tag = "NP"
lexicon.load()
lexicon.lexical_rules.load()
lexicon.contextual_rules.load()
lexicon.named_entities.load()

# For testing with or without lexical and contextual rules:
#for i in reversed(range(len(lexicon.lexical_rules)-1)):
#    del lexicon.lexical_rules[i]
#for i in reversed(range(len(lexicon.contextual_rules)-1)):
#    del lexicon.contextual_rules[i]

# For random test data: