top = [(v, k) for k, v in entities.items() if " " not in k] top = sorted(top, reverse=True)[:int(len(lexicon) * 0.4)] # percentage top = [k for v, k in top] for ne in top: if ne not in seen: lexicon.append(ne+" "+NE) lexicon = sorted(lexicon) open("brill-lexicon.txt", "w").write("\n".join(lexicon)) #### TEST ########################################################################################## # Create a Pattern Brill tagger and evaluate accuracy on the test data. # 11) Load lexicon data (it is a lazy-loading object). lexicon = Lexicon() lexicon.path = "brill-lexicon.txt" lexicon.lexical_rules.path = "brill-lexical.txt" lexicon.contextual_rules.path = "brill-contextual.txt" lexicon.named_entities.tag = "NP" lexicon.load() lexicon.lexical_rules.load() lexicon.contextual_rules.load() lexicon.named_entities.load() # For testing with or without lexical and contextual rules: #for i in reversed(range(len(lexicon.lexical_rules)-1)): # del lexicon.lexical_rules[i] #for i in reversed(range(len(lexicon.contextual_rules)-1)): # del lexicon.contextual_rules[i] # For random test data:
top = [(v, k) for k, v in entities.items() if " " not in k] top = sorted(top, reverse=True)[:int(len(lexicon) * 0.4)] # percentage top = [k for v, k in top] for ne in top: if ne not in seen: lexicon.append(ne + " " + NE) lexicon = sorted(lexicon) open("brill-lexicon.txt", "w").write("\n".join(lexicon)) #### TEST ########################################################################################## # Create a Pattern Brill tagger and evaluate accuracy on the test data. # 11) Load lexicon data (it is a lazy-loading object). lexicon = Lexicon() lexicon.path = "brill-lexicon.txt" lexicon.lexical_rules.path = "brill-lexical.txt" lexicon.contextual_rules.path = "brill-contextual.txt" lexicon.named_entities.tag = "NP" lexicon.load() lexicon.lexical_rules.load() lexicon.contextual_rules.load() lexicon.named_entities.load() # For testing with or without lexical and contextual rules: #for i in reversed(range(len(lexicon.lexical_rules)-1)): # del lexicon.lexical_rules[i] #for i in reversed(range(len(lexicon.contextual_rules)-1)): # del lexicon.contextual_rules[i] # For random test data: