def createNetFromSentences(self, sentences): "This function creates the network starting from a set of sentences" text_filter = TextFilter() for sentence in sentences: filtered_sentence = text_filter.filter_all(sentence) tokens = nltk.word_tokenize(filtered_sentence) single_tokens = list(set(tokens)) for token in single_tokens: if not self.gr.has_node(token): self.gr.add_node(str(token)) for i, token in enumerate(tokens): if i != 0: edge = (tokens[i - 1], token) if not self.gr.has_edge(edge): self.gr.add_edge(edge, wt=1.0, label=START_OCCURRENCES_NUM) else: #If the edge exists, the weight of the edge shall divided by the number of occurrences of the couple of terms. #Therefore, we shall keep memory of the number of occurrences for each couple of terms. number_of_occurrences = self.gr.edge_label(edge) new_number_of_occurrences = number_of_occurrences + 1 self.gr.set_edge_label(edge, new_number_of_occurrences) self.gr.set_edge_weight(edge, wt=1.0 / new_number_of_occurrences)
from SentenceNetVisitor import SentenceNetVisitor from XMLReqManager import XMLReqManager from SentenceNetCreator import SentenceNetCreator from irutils.TextFilter import TextFilter s1 = SentenceNetCreator() n1 = s1.get_net() v1 = SentenceNetVisitor(n1, s1.get_edge_start_weight(), s1.get_start_occurrences_num()) xml_doc_handler = XMLReqManager('req_document.xsd', '2007 - eirene fun 7.xml') req_document = xml_doc_handler.get_requirements_text() terms_filter = TextFilter() for sent in req_document: filtered_sent = terms_filter.filter_all(sent) path1, path_weight1 = v1.search_A_star(filtered_sent) print 'now producing a random sentence according to the document learnt...' print v1.get_random_sentence('network', 100)