Esempio n. 1
0
def add_words_to_nx_graph(graph, phrase):
    # TODO: decide if 50 is the right choice here
    for sentence in tokenize_sentences(phrase, 50, transform_call=lambda s: s.lower()):
        words = sentence.split()
    
        # gets "start" symbol
        start_node = graph.find_node_by_key(Symbols.START)
        if not start_node:
            start_node = graph.add_node(Symbols.START)
    
        # adds first word of phrase as root
        head = words.pop(0)
        prev_node = graph.add_node(head)
        start_node.add_neighbor(prev_node)
    
        for w in words:
            node = graph.add_node(w)
            prev_node.add_neighbor(node)
            # graph.add_edge(prev_word, w)
            prev_node = node

        # adds "end" child to last word of phrase
        # prev_node.set(end=True)
        end_node = graph.find_node_by_key(Symbols.END)
        if not end_node:
            end_node = graph.add_node(Symbols.END)
        
        prev_node.add_neighbor(end_node)
Esempio n. 2
0
def add_words_to_nx_graph(graph, phrase):
    # TODO: decide if 50 is the right choice here
    for sentence in tokenize_sentences(phrase,
                                       50,
                                       transform_call=lambda s: s.lower()):
        words = sentence.split()

        # gets "start" symbol
        start_node = graph.find_node_by_key(Symbols.START)
        if not start_node:
            start_node = graph.add_node(Symbols.START)

        # adds first word of phrase as root
        head = words.pop(0)
        prev_node = graph.add_node(head)
        start_node.add_neighbor(prev_node)

        for w in words:
            node = graph.add_node(w)
            prev_node.add_neighbor(node)
            # graph.add_edge(prev_word, w)
            prev_node = node

        # adds "end" child to last word of phrase
        # prev_node.set(end=True)
        end_node = graph.find_node_by_key(Symbols.END)
        if not end_node:
            end_node = graph.add_node(Symbols.END)

        prev_node.add_neighbor(end_node)
Esempio n. 3
0
 def ingest(self, phrase):
     for sentence in tokenize_sentences(phrase, 50, lowercase=True):
         phrase_words = sentence.split()
         phrase_words.append(Symbols.END)
         phrase_len = len(phrase_words)
     
         # phrases under 3 are of no use to a 2nd-order chain
         if phrase_len < 3:
             return
     
         # grabs first 2 words of phrase
         self.__heads.increment((phrase_words[0], phrase_words[1]))
     
         for i in range(phrase_len - 2):
             w1 = phrase_words[i]
             w2 = phrase_words[i + 1]
             w3 = phrase_words[i + 2]
         
             w_pair = (w1, w2)
             if w_pair in self.__words:
                 trailing_words = self.__words[w_pair]
                 if w3 in trailing_words:
                     trailing_words[w3] = trailing_words[w3] + 1
                 else:
                     trailing_words[w3] = 1
             else:
                 trailing_words = {w3: 1}
                 self.__words[w_pair] = trailing_words
Esempio n. 4
0
    def ingest(self, phrase):
        for sentence in tokenize_sentences(phrase, 50, lowercase=True):
            phrase_words = sentence.split()
            phrase_words.append(Symbols.END)
            phrase_len = len(phrase_words)

            # phrases under 3 are of no use to a 2nd-order chain
            if phrase_len < 3:
                return

            # grabs first 2 words of phrase
            self.__heads.increment((phrase_words[0], phrase_words[1]))

            for i in range(phrase_len - 2):
                w1 = phrase_words[i]
                w2 = phrase_words[i + 1]
                w3 = phrase_words[i + 2]

                w_pair = (w1, w2)
                if w_pair in self.__words:
                    trailing_words = self.__words[w_pair]
                    if w3 in trailing_words:
                        trailing_words[w3] = trailing_words[w3] + 1
                    else:
                        trailing_words[w3] = 1
                else:
                    trailing_words = {w3: 1}
                    self.__words[w_pair] = trailing_words