def __init__(self, vocabulary, max_completions=5): """Create a new trie instance.""" self._trie = Trie() self.vocabulary = vocabulary self.max_completions = max_completions self.word_list = [] for word in self.vocabulary: self._trie.insert(word)
class Autocomplete(object): """ Class based Autocomplete per class instructions. insert() adds a list of vocabulary words into the trie. autocomplete() traverses the trie and returns all words starting with the input string, limited by the number of max completions allowed by the user. """ def __init__(self, vocabulary, max_completions=5): """Create a new trie instance.""" self._trie = Trie() self.vocabulary = vocabulary self.max_completions = max_completions self.word_list = [] for word in self.vocabulary: self._trie.insert(word) def autocomplete(self, start=None): """ Return a list of words in the subtree starting with the input string. The list will be limited by the number of max completions. """ self.word_list = [] current = self._trie.root if start: for letter in start: if letter in current.children: current = current.children[letter] else: return self._autocomplete(current) return self.word_list[:self.max_completions] def _autocomplete(self, node): """Hidden method to traverse the trie.""" if not node.children: return node for key, value in node.children.items(): if key == '$': if value not in self.word_list: self.word_list.append(value) else: continue else: self._autocomplete(value) return self.word_list
class TestTireTree(unittest.TestCase): def setUp(self): self.trie = Trie() def tearDown(self): del self.trie def test_sanity(self): self.trie['abc'] = '10' self.trie['abcd'] = '11' self.assertEqual(self.trie.match('abcdefghijk'), '11') self.assertEqual(self.trie.match('abcefg'), '10') def test_hole(self): self.trie['135'] = '10' self.trie['1351008'] = '100' self.assertEqual(self.trie.match('13510071234'), '10') self.assertEqual(self.trie.match('13510081234'), '100') def test_load(self): trie = Trie(mapping={'135': '100', '136': '101'}) self.assertEqual(trie.match('13510000000'), '100') self.assertEqual(trie.match('13610000000'), '101') def test_only_one(self): self.trie['1'] = '0.02' self.trie['1358'] = '11' self.assertEqual(self.trie.match('135810001000'), '11') self.assertEqual(self.trie.match('135210001000'), '0.02') def test_modify_tree(self): self.trie['abc'] = '10' self.trie['abcd'] = '11' self.assertEqual(self.trie.match('abcdefghijk'), '11') self.assertEqual(self.trie.match('abcefg'), '10') self.trie['abc'] = '12' self.assertEqual(self.trie.match('abcdefghijk'), '11') self.assertEqual(self.trie.match('abcefg'), '12')
class SPO_Extractor(object): def __init__(self, args): self.args = args self.nlp = StanfordCoreNLP(args.corenlp_path) self.trieTree = Trie() # private function def detect_index(self, token_list, trig_words): for i in range(len(token_list) - 1): if token_list[i:i + len(trig_words)] == trig_words: return [i, i + len(trig_words) - 1] return [-1] def reform_dependency_patterns(self, sent, trig_str): """ reform a sentence into syntactic dependency tag list with CAUSE label inside :type sent: string :type trig_str: string :rtype: list,list,int """ trig_words = trig_str.split(' ') # reform the sent: dependency tag list, term list label_list, term_list = [], [] token_list, dep_lookup = CoreNLP.get_token_POS(self.nlp, sent) cause_index = self.detect_index(token_list, trig_words) for i in range(len(token_list)): # generalize cause label if i in cause_index: if i == cause_index[0]: label_list.append(utils.CAUSE_LABEL) term_list.append(trig_str) else: label_list.append(dep_lookup[i]) term_list.append(token_list[i]) return label_list, term_list, cause_index def extract_spo_sentence(self, sent): """ Extract SPO triples from a sentence :type sent: string :rtype: list """ res = [] for trig in utils.TRIGGERS: index = utils.contains(sent, trig) if index > 0: # print(trig,'==> ',sent) # print('trigger word:',sent[index:index+len(trig)]) trig_str = sent[index:index + len(trig)].strip() label_list, term_list, cause_index = self.reform_dependency_patterns( sent, trig_str) # print(label_list) # deliver label_list to trie tree to search; for i in range(0, cause_index[0] - 1): ans = self.trieTree.search(label_list[i:]) if ans: # print(ans) sub = ' '.join([term_list[i + j] for j in ans[0]]) cause_label = term_list[i + ans[1]] for objx in ans[2:]: obj = ' '.join([term_list[i + j] for j in objx]) # add to res res.append([sub, cause_label, obj]) # gready stop, avoid shorter subject break # avoid repeat detection, e.g. cause of and cause break return res def extract_spo_text(self, text): res = [] sentences = nltk.sent_tokenize(text) for sent in sentences: temp_res = self.extract_spo_sentence(sent) if temp_res: res += temp_res return res
def __init__(self, args): self.args = args self.nlp = StanfordCoreNLP(args.corenlp_path) self.trieTree = Trie()
def setUp(self): self.trie = Trie()
def test_load(self): trie = Trie(mapping={'135': '100', '136': '101'}, node_class=RedisNode) self.assertEqual(trie.match('13510000000'), '100') self.assertEqual(trie.match('13610000000'), '101')
def setUp(self): self.rds = redis.Redis() RedisNode.init_redis(self.rds, "RedisPrefix") self.trie = Trie(node_class=RedisNode)
def test_load(self): trie = Trie(mapping={'135': '100', '136': '101'}) self.assertEqual(trie.match('13510000000'), '100') self.assertEqual(trie.match('13610000000'), '101')