class Brain: def __init__(self, dispatcher): """ Juna's brain. Handles messages and tries to make a reply. """ self.dispatcher = dispatcher #Callbacks self.dispatcher += Event('rcv', self.learn) self.dispatcher += Event('speak_request', self.getMessageString) self.parser = MessageParser() self.postprocessor = Postprocessor() self.DEBUG = DEBUG def learn(self, message, speaker=''): """ The learning Mechanism At the moment, it doesn't really learn per se It only adds stuff to the database """ # We take the message and convert it to a pseudo sentence pseudo_sentence = self.parser.parseSentence(message) if not pseudo_sentence: return None #Convert the pseudo sentence to a real sentence object sentence = Sentence() sentence.pseudo2real(pseudo_sentence, increment=True) #add the lex entry lex_string = sentence.lexString() try: lex = Lex.byEntry(lex_string) except SQLObjectNotFound: lex = Lex(increment=True, entry=lex_string) #add the log entry log_string = sentence.logString() Log(entry=log_string, lex=lex.id) #Finally, the marcov table sentence.createMarkovChains(increment=True) def getMessageString(self, message_queue, my_queue, topic_queue): """This is the main algo to speak""" markov_candidates = [] if topic_queue: #Make Marg Chains from the keywords and covert them to Senteces for keyword in topic_queue: marg_chain = self.generateMargChain(keyword) if marg_chain: sentence = Sentence() sentence.createFromIDs(marg_chain) markov_candidates.append(sentence) if markov_candidates: #Filter out things we spoke/heard recently. mc = [x.readable() for x in markov_candidates] mc = list(set(mc) - (set(message_queue) | set(my_queue))) #mc is list of real text, so we convert it back to our Sentence object if mc: final_candidates = [] for c in mc: s = Sentence() s.pseudo2real(self.parser.parseSentence(c)) final_candidates.append(s) #Do the grammar check best_choice = self.checkGrammar(final_candidates) #Postprocess the output final_output = self.postprocessor.postProcess( best_choice.readable()) self.debug('final_output:%s' % final_output) #Dispatch the final output self.dispatcher('speak', final_output) def checkGrammar(self, choice_list): """Uses the MarkovLex chain to pick the sentence with highest grammatical probablility TODO refactor this part and the one below. """ if len(choice_list) == 1: return choice_list[0] scores = [] for choice in choice_list: if len(choice) < 3: scores.append(5.0) else: #We run it thro the markov check anchor = Word.byAppeared_name('EOS') copy = choice[:] copy.append(anchor) scores.append(self._generateScore(copy)) #We take the best scoring sentence print scores return choice_list[scores.index(max(scores))] def _generateScore(self, sentence, score=[], position=0): """Score the Sentence using the markov chain""" (first, second, third) = [ sentence[position + 0].main_type.id, sentence[position + 1].main_type.id, sentence[position + 2].main_type.id ] tm = MarkovLex.select( AND(MarkovLex.q.first_lexID == first, MarkovLex.q.second_lexID == second, MarkovLex.q.third_lexID == third)) if not tm: return 0 this_mlex = list(tm)[0] mlex = MarkovLex.select( AND(MarkovLex.q.first_lexID == first, MarkovLex.q.second_lexID == second)) mlex_hits = list(mlex) total_occurences = reduce(lambda x, y: x + y, [x.occurence for x in mlex_hits]) prob = float(float(this_mlex.occurence) / float(total_occurences)) score.append(prob) if third != 1: #We continue the chain position += 1 return self._generateScore(sentence, score=score, position=position) else: #We are done so we return the average score return reduce(lambda x, y: x + y, score) / float(len(score)) def generateMargChain(self, key_word): """Return a Margarine Chain as Sentence Inspired by the Open Source project Margarine which uses a similar concept to the Markov Chain. MargChains go both ways, starting from the keyword """ if not key_word: return None base_margs = Markov.select(Markov.q.second_wordID == key_word) base_margs = list(base_margs) if not base_margs: return None #base is random for now!! base = base_margs[int(random.random() * len(base_margs))] # Create the forward chain (keyword -> end) first_word = base.first_word.id second_word = base.second_word.id third_word = base.third_word.id second_half = [base.second_word.id] while third_word != 1 and len(second_half) < 12: #Loop until it hits EOF = id(1) second_half.append(third_word) #swap things forward first_word = second_word second_word = third_word #self.debug('first_word:%s second_word:%s' % (first_word, second_word)) hits = Markov.select( AND(Markov.q.first_wordID == first_word, Markov.q.second_wordID == second_word)) hits = list(hits) choice = hits[int(random.random() * len(hits))] #choice = self.pickBestChoice(hits) second_word = choice.second_word.id third_word = choice.third_word.id # Next the reverese chain keyword -> start first_half = [] first_word = base.first_word.id second_word = base.second_word.id while first_word != 1: first_half.append(first_word) hits = Markov.select( AND(Markov.q.second_wordID == first_word, Markov.q.third_wordID == second_word)) hits = list(hits) choice = hits[int(random.random() * len(hits))] first_word = choice.first_word.id second_word = choice.second_word.id #Merge the halves together first_half.reverse() first_half.extend(second_half) #self.debug('first_half:%s' % first_half) return first_half def pickBestChoice(self, choice_list): """Return the most grammatically sound choice from the list TODO refactor me """ if len(choice_list) == 1: return choice_list[0] probabilities = [] for choice in choice_list: (first, second, third) = [ choice.first_word.main_type.id, choice.second_word.main_type.id, choice.third_word.main_type.id ] tm = MarkovLex.select( AND(MarkovLex.q.first_lexID == first, MarkovLex.q.second_lexID == second, MarkovLex.q.third_lexID == third)) this_mlex = list(tm)[0] mlex = MarkovLex.select( AND(MarkovLex.q.first_lexID == first, MarkovLex.q.second_lexID == second)) mlex_hits = list(mlex) total_occurences = reduce(lambda x, y: x + y, [x.occurence for x in mlex_hits]) #self.debug('my occurences:%d' % this_mlex.occurence) #self.debug('total_occurences:%d' % total_occurences) prob = float(float(this_mlex.occurence) / float(total_occurences)) #self.debug('prob:%f' % prob) probabilities.append(prob) if probabilities: best_choice = probabilities.index(max(probabilities)) return choice_list[best_choice] else: return '' def debug(self, debug_string): """Output debug string """ if self.DEBUG == 1: print debug_string return None