def generateMargChain(self, key_word): """Return a Margarine Chain as Sentence Inspired by the Open Source project Margarine which uses a similar concept to the Markov Chain. MargChains go both ways, starting from the keyword """ if not key_word: return None base_margs = Markov.select(Markov.q.second_wordID == key_word) base_margs = list(base_margs) if not base_margs: return None #base is random for now!! base = base_margs[int(random.random() * len(base_margs))] # Create the forward chain (keyword -> end) first_word = base.first_word.id second_word = base.second_word.id third_word = base.third_word.id second_half = [base.second_word.id] while third_word != 1 and len(second_half) < 12: #Loop until it hits EOF = id(1) second_half.append(third_word) #swap things forward first_word = second_word second_word = third_word #self.debug('first_word:%s second_word:%s' % (first_word, second_word)) hits = Markov.select( AND(Markov.q.first_wordID == first_word, Markov.q.second_wordID == second_word)) hits = list(hits) choice = hits[int(random.random() * len(hits))] #choice = self.pickBestChoice(hits) second_word = choice.second_word.id third_word = choice.third_word.id # Next the reverese chain keyword -> start first_half = [] first_word = base.first_word.id second_word = base.second_word.id while first_word != 1: first_half.append(first_word) hits = Markov.select( AND(Markov.q.second_wordID == first_word, Markov.q.third_wordID == second_word)) hits = list(hits) choice = hits[int(random.random() * len(hits))] first_word = choice.first_word.id second_word = choice.second_word.id #Merge the halves together first_half.reverse() first_half.extend(second_half) #self.debug('first_half:%s' % first_half) return first_half
def createMarkovChains(self, increment=False): """ Generate Markov Chains and store it in the database """ mt = self._markovify() for entry in mt: (first, second, third) = [x.id for x in entry] #Frist the markov chain mw = Markov.select( AND(Markov.q.first_wordID == first, Markov.q.second_wordID == second, Markov.q.third_wordID == third)) markov = list(mw) if markov and increment == True: markov[0].increment() else: Markov(first_word=first, second_word=second, third_word=third) #Next the Markov Lex chain (first, second, third) = [x.main_type.id for x in entry] ml = MarkovLex.select( AND(MarkovLex.q.first_lexID == first, MarkovLex.q.second_lexID == second, MarkovLex.q.third_lexID == third)) markovlex = list(ml) if markovlex and increment == True: markovlex[0].increment() else: MarkovLex(first_lex=first, second_lex=second, third_lex=third)
def main(): text = "" option = int(raw_input("1. URL, 2. plain text:")) if option == 1: if DEBUG: url = "http://www.cs.drexel.edu/~jpopyack/Kasparov.html" else: url = raw_input("Enter the URL: ") text = summartUtil.getHtmlText(url) elif option == 2: filename = raw_input("Enter the filename: ") fin = file(filename) for line in fin: text += line + "\n" fin.close() else: print "Please enter the right option" sys.exit(2) MAXGEN = int(raw_input("Enter number of population: ")) sentences = summartUtil.getSentences(text) print "Analyzing text..." for s in sentences: analyze_sentence(s) # Generate graph with # dot -Tpng -o test.png test.dot outputDot("word.dot", word_dictionary) outputDot("grammer.dot", grammer_dictionary) markov = Markov(start_word, end_word, start_grammer, end_grammer) markov.set_original(text) markov.set_blackList(BLACK_LIST) print "Generating summary..." # print markov.GenerateSentence() population = markov.GenerateSummary(MAXGEN) print markov.select(population)
def testMarkovChainOccurence(self): # We should have 3 for occurence where second word = 名前 word = Word.byAppeared_name('名前') hits = Markov.select(Markov.q.second_wordID == word.id) check = list(hits)[0] self.assertEquals(check.occurence, 3)
def testMarkovChainDuplicates(self): # We should only have 1 instance where second word = 名前 word = Word.byAppeared_name('名前') hits = Markov.select(Markov.q.second_wordID == word.id) check = list(hits) self.assertEquals(len(check), 1)