コード例 #1
0
ファイル: Brain.py プロジェクト: japanvik/Juna
class Brain:
    def __init__(self, dispatcher):
        """ Juna's brain. Handles messages and tries to make a reply.
        """
        self.dispatcher = dispatcher
        #Callbacks
        self.dispatcher += Event('rcv', self.learn)
        self.dispatcher += Event('speak_request', self.getMessageString)

        self.parser = MessageParser()
        self.postprocessor = Postprocessor()
        self.DEBUG = DEBUG

    def learn(self, message, speaker=''):
        """ The learning Mechanism
            At the moment, it doesn't really learn per se
            It only adds stuff to the database
        """
        # We take the message and convert it to a pseudo sentence
        pseudo_sentence = self.parser.parseSentence(message)
        if not pseudo_sentence:
            return None
        #Convert the pseudo sentence to a real sentence object
        sentence = Sentence()
        sentence.pseudo2real(pseudo_sentence, increment=True)
        #add the lex entry
        lex_string = sentence.lexString()
        try:
            lex = Lex.byEntry(lex_string)
        except SQLObjectNotFound:
            lex = Lex(increment=True, entry=lex_string)

        #add the log entry
        log_string = sentence.logString()
        Log(entry=log_string, lex=lex.id)

        #Finally, the marcov table
        sentence.createMarkovChains(increment=True)

    def getMessageString(self, message_queue, my_queue, topic_queue):
        """This is the main algo to speak"""
        markov_candidates = []
        if topic_queue:
            #Make Marg Chains from the keywords and covert them to Senteces
            for keyword in topic_queue:
                marg_chain = self.generateMargChain(keyword)
                if marg_chain:
                    sentence = Sentence()
                    sentence.createFromIDs(marg_chain)
                    markov_candidates.append(sentence)

        if markov_candidates:
            #Filter out things we spoke/heard recently.
            mc = [x.readable() for x in markov_candidates]
            mc = list(set(mc) - (set(message_queue) | set(my_queue)))
            #mc is list of real text, so we convert it back to our Sentence object
            if mc:
                final_candidates = []
                for c in mc:
                    s = Sentence()
                    s.pseudo2real(self.parser.parseSentence(c))
                    final_candidates.append(s)
                #Do the grammar check
                best_choice = self.checkGrammar(final_candidates)
                #Postprocess the output
                final_output = self.postprocessor.postProcess(
                    best_choice.readable())
                self.debug('final_output:%s' % final_output)
                #Dispatch the final output
                self.dispatcher('speak', final_output)

    def checkGrammar(self, choice_list):
        """Uses the MarkovLex chain to pick the sentence with highest grammatical probablility
           TODO refactor this part and the one below.
        """
        if len(choice_list) == 1: return choice_list[0]
        scores = []
        for choice in choice_list:
            if len(choice) < 3:
                scores.append(5.0)
            else:
                #We run it thro the markov check
                anchor = Word.byAppeared_name('EOS')
                copy = choice[:]
                copy.append(anchor)
                scores.append(self._generateScore(copy))
        #We take the best scoring sentence
        print scores
        return choice_list[scores.index(max(scores))]

    def _generateScore(self, sentence, score=[], position=0):
        """Score the Sentence using the markov chain"""
        (first, second, third) = [
            sentence[position + 0].main_type.id,
            sentence[position + 1].main_type.id,
            sentence[position + 2].main_type.id
        ]

        tm = MarkovLex.select(
            AND(MarkovLex.q.first_lexID == first,
                MarkovLex.q.second_lexID == second,
                MarkovLex.q.third_lexID == third))
        if not tm: return 0
        this_mlex = list(tm)[0]

        mlex = MarkovLex.select(
            AND(MarkovLex.q.first_lexID == first,
                MarkovLex.q.second_lexID == second))
        mlex_hits = list(mlex)
        total_occurences = reduce(lambda x, y: x + y,
                                  [x.occurence for x in mlex_hits])
        prob = float(float(this_mlex.occurence) / float(total_occurences))
        score.append(prob)
        if third != 1:
            #We continue the chain
            position += 1
            return self._generateScore(sentence,
                                       score=score,
                                       position=position)
        else:
            #We are done so we return the average score
            return reduce(lambda x, y: x + y, score) / float(len(score))

    def generateMargChain(self, key_word):
        """Return a Margarine Chain as Sentence
           Inspired by the Open Source project Margarine
           which uses a similar concept to the Markov Chain.
           MargChains go both ways, starting from the keyword
        """
        if not key_word: return None
        base_margs = Markov.select(Markov.q.second_wordID == key_word)
        base_margs = list(base_margs)
        if not base_margs: return None

        #base is random for now!!
        base = base_margs[int(random.random() * len(base_margs))]
        # Create the forward chain (keyword -> end)
        first_word = base.first_word.id
        second_word = base.second_word.id
        third_word = base.third_word.id

        second_half = [base.second_word.id]
        while third_word != 1 and len(second_half) < 12:
            #Loop until it hits EOF = id(1)
            second_half.append(third_word)
            #swap things forward
            first_word = second_word
            second_word = third_word
            #self.debug('first_word:%s second_word:%s' % (first_word, second_word))
            hits = Markov.select(
                AND(Markov.q.first_wordID == first_word,
                    Markov.q.second_wordID == second_word))
            hits = list(hits)
            choice = hits[int(random.random() * len(hits))]
            #choice = self.pickBestChoice(hits)
            second_word = choice.second_word.id
            third_word = choice.third_word.id
        # Next the reverese chain keyword -> start
        first_half = []
        first_word = base.first_word.id
        second_word = base.second_word.id
        while first_word != 1:
            first_half.append(first_word)
            hits = Markov.select(
                AND(Markov.q.second_wordID == first_word,
                    Markov.q.third_wordID == second_word))
            hits = list(hits)
            choice = hits[int(random.random() * len(hits))]
            first_word = choice.first_word.id
            second_word = choice.second_word.id
        #Merge the halves together
        first_half.reverse()
        first_half.extend(second_half)
        #self.debug('first_half:%s' % first_half)
        return first_half

    def pickBestChoice(self, choice_list):
        """Return the most grammatically sound choice from the list
           TODO refactor me
        """
        if len(choice_list) == 1: return choice_list[0]
        probabilities = []
        for choice in choice_list:
            (first, second, third) = [
                choice.first_word.main_type.id,
                choice.second_word.main_type.id, choice.third_word.main_type.id
            ]
            tm = MarkovLex.select(
                AND(MarkovLex.q.first_lexID == first,
                    MarkovLex.q.second_lexID == second,
                    MarkovLex.q.third_lexID == third))
            this_mlex = list(tm)[0]

            mlex = MarkovLex.select(
                AND(MarkovLex.q.first_lexID == first,
                    MarkovLex.q.second_lexID == second))
            mlex_hits = list(mlex)
            total_occurences = reduce(lambda x, y: x + y,
                                      [x.occurence for x in mlex_hits])
            #self.debug('my occurences:%d' % this_mlex.occurence)
            #self.debug('total_occurences:%d' % total_occurences)
            prob = float(float(this_mlex.occurence) / float(total_occurences))
            #self.debug('prob:%f' % prob)
            probabilities.append(prob)
        if probabilities:
            best_choice = probabilities.index(max(probabilities))
            return choice_list[best_choice]
        else:
            return ''

    def debug(self, debug_string):
        """Output debug string
        """
        if self.DEBUG == 1: print debug_string
        return None