Example #1
0
    def __init__(self, vocabulary, max_completions=5):
        """Create a new trie instance."""
        self._trie = Trie()
        self.vocabulary = vocabulary
        self.max_completions = max_completions
        self.word_list = []

        for word in self.vocabulary:
            self._trie.insert(word)
Example #2
0
class Autocomplete(object):
    """
    Class based Autocomplete per class instructions.

    insert() adds a list of vocabulary words into the trie.

    autocomplete() traverses the trie and returns all words starting with the
    input string, limited by the number of max completions allowed by the user.
    """
    def __init__(self, vocabulary, max_completions=5):
        """Create a new trie instance."""
        self._trie = Trie()
        self.vocabulary = vocabulary
        self.max_completions = max_completions
        self.word_list = []

        for word in self.vocabulary:
            self._trie.insert(word)

    def autocomplete(self, start=None):
        """
        Return a list of words in the subtree starting with the input
        string. The list will be limited by the number of max completions.
        """
        self.word_list = []
        current = self._trie.root
        if start:
            for letter in start:
                if letter in current.children:
                    current = current.children[letter]
                else:
                    return
        self._autocomplete(current)
        return self.word_list[:self.max_completions]

    def _autocomplete(self, node):
        """Hidden method to traverse the trie."""
        if not node.children:
            return node
        for key, value in node.children.items():
            if key == '$':
                if value not in self.word_list:
                    self.word_list.append(value)
                else:
                    continue
            else:
                self._autocomplete(value)
        return self.word_list
Example #3
0
class TestTireTree(unittest.TestCase):

    def setUp(self):
        self.trie = Trie()

    def tearDown(self):
        del self.trie

    def test_sanity(self):
        self.trie['abc'] = '10'
        self.trie['abcd'] = '11'
        self.assertEqual(self.trie.match('abcdefghijk'), '11')
        self.assertEqual(self.trie.match('abcefg'), '10')

    def test_hole(self):
        self.trie['135'] = '10'
        self.trie['1351008'] = '100'
        self.assertEqual(self.trie.match('13510071234'), '10')
        self.assertEqual(self.trie.match('13510081234'), '100')

    def test_load(self):
        trie = Trie(mapping={'135': '100', '136': '101'})
        self.assertEqual(trie.match('13510000000'), '100')
        self.assertEqual(trie.match('13610000000'), '101')

    def test_only_one(self):
        self.trie['1'] = '0.02'
        self.trie['1358'] = '11'
        self.assertEqual(self.trie.match('135810001000'), '11')
        self.assertEqual(self.trie.match('135210001000'), '0.02')

    def test_modify_tree(self):
        self.trie['abc'] = '10'
        self.trie['abcd'] = '11'
        self.assertEqual(self.trie.match('abcdefghijk'), '11')
        self.assertEqual(self.trie.match('abcefg'), '10')

        self.trie['abc'] = '12'
        self.assertEqual(self.trie.match('abcdefghijk'), '11')
        self.assertEqual(self.trie.match('abcefg'), '12')
Example #4
0
class TestTireTree(unittest.TestCase):
    def setUp(self):
        self.trie = Trie()

    def tearDown(self):
        del self.trie

    def test_sanity(self):
        self.trie['abc'] = '10'
        self.trie['abcd'] = '11'
        self.assertEqual(self.trie.match('abcdefghijk'), '11')
        self.assertEqual(self.trie.match('abcefg'), '10')

    def test_hole(self):
        self.trie['135'] = '10'
        self.trie['1351008'] = '100'
        self.assertEqual(self.trie.match('13510071234'), '10')
        self.assertEqual(self.trie.match('13510081234'), '100')

    def test_load(self):
        trie = Trie(mapping={'135': '100', '136': '101'})
        self.assertEqual(trie.match('13510000000'), '100')
        self.assertEqual(trie.match('13610000000'), '101')

    def test_only_one(self):
        self.trie['1'] = '0.02'
        self.trie['1358'] = '11'
        self.assertEqual(self.trie.match('135810001000'), '11')
        self.assertEqual(self.trie.match('135210001000'), '0.02')

    def test_modify_tree(self):
        self.trie['abc'] = '10'
        self.trie['abcd'] = '11'
        self.assertEqual(self.trie.match('abcdefghijk'), '11')
        self.assertEqual(self.trie.match('abcefg'), '10')

        self.trie['abc'] = '12'
        self.assertEqual(self.trie.match('abcdefghijk'), '11')
        self.assertEqual(self.trie.match('abcefg'), '12')
Example #5
0
class SPO_Extractor(object):
    def __init__(self, args):
        self.args = args
        self.nlp = StanfordCoreNLP(args.corenlp_path)
        self.trieTree = Trie()

    # private function
    def detect_index(self, token_list, trig_words):
        for i in range(len(token_list) - 1):
            if token_list[i:i + len(trig_words)] == trig_words:
                return [i, i + len(trig_words) - 1]
        return [-1]

    def reform_dependency_patterns(self, sent, trig_str):
        """ reform a sentence into syntactic dependency tag list with CAUSE label inside
		:type sent: string
		:type trig_str: string
		:rtype: list,list,int
		"""
        trig_words = trig_str.split(' ')
        # reform the sent: dependency tag list, term list
        label_list, term_list = [], []

        token_list, dep_lookup = CoreNLP.get_token_POS(self.nlp, sent)
        cause_index = self.detect_index(token_list, trig_words)

        for i in range(len(token_list)):
            # generalize cause label
            if i in cause_index:
                if i == cause_index[0]:
                    label_list.append(utils.CAUSE_LABEL)
                    term_list.append(trig_str)
            else:
                label_list.append(dep_lookup[i])
                term_list.append(token_list[i])
        return label_list, term_list, cause_index

    def extract_spo_sentence(self, sent):
        """ Extract SPO triples from a sentence
		:type sent: string
		:rtype: list
		"""
        res = []
        for trig in utils.TRIGGERS:
            index = utils.contains(sent, trig)
            if index > 0:
                # print(trig,'==> ',sent)
                # print('trigger word:',sent[index:index+len(trig)])
                trig_str = sent[index:index + len(trig)].strip()
                label_list, term_list, cause_index = self.reform_dependency_patterns(
                    sent, trig_str)
                # print(label_list)
                # deliver label_list to trie tree to search;
                for i in range(0, cause_index[0] - 1):
                    ans = self.trieTree.search(label_list[i:])
                    if ans:
                        # print(ans)
                        sub = ' '.join([term_list[i + j] for j in ans[0]])
                        cause_label = term_list[i + ans[1]]
                        for objx in ans[2:]:
                            obj = ' '.join([term_list[i + j] for j in objx])
                            # add to res
                            res.append([sub, cause_label, obj])
                        # gready stop, avoid shorter subject
                        break
                # avoid repeat detection, e.g. cause of and cause
                break
        return res

    def extract_spo_text(self, text):
        res = []
        sentences = nltk.sent_tokenize(text)
        for sent in sentences:
            temp_res = self.extract_spo_sentence(sent)
            if temp_res: res += temp_res
        return res
Example #6
0
 def __init__(self, args):
     self.args = args
     self.nlp = StanfordCoreNLP(args.corenlp_path)
     self.trieTree = Trie()
Example #7
0
 def setUp(self):
     self.trie = Trie()
Example #8
0
 def test_load(self):
     trie = Trie(mapping={'135': '100', '136': '101'}, node_class=RedisNode)
     self.assertEqual(trie.match('13510000000'), '100')
     self.assertEqual(trie.match('13610000000'), '101')
Example #9
0
 def setUp(self):
     self.rds = redis.Redis()
     RedisNode.init_redis(self.rds, "RedisPrefix")
     self.trie = Trie(node_class=RedisNode)
Example #10
0
 def test_load(self):
     trie = Trie(mapping={'135': '100', '136': '101'})
     self.assertEqual(trie.match('13510000000'), '100')
     self.assertEqual(trie.match('13610000000'), '101')
Example #11
0
 def setUp(self):
     self.trie = Trie()
Example #12
0
 def test_load(self):
     trie = Trie(mapping={'135': '100', '136': '101'}, node_class=RedisNode)
     self.assertEqual(trie.match('13510000000'), '100')
     self.assertEqual(trie.match('13610000000'), '101')
Example #13
0
 def test_load(self):
     trie = Trie(mapping={'135': '100', '136': '101'})
     self.assertEqual(trie.match('13510000000'), '100')
     self.assertEqual(trie.match('13610000000'), '101')