Exemple #1
0
    def ask(self, question):
        # Repetition
        if self.previn and clean_string(question) == clean_string(self.previn[0]):
            res = random.choice(self.repetitiondb)
            res = res.replace('----', "'%s'"%' '.join(self.prevout[0]))
            yield self.macro.subst(res)
            return
        
        answers = []
        lang = detect_lang(question, self.lastlang)
        if lang is None:
            lang = self.lastlang
        #print 'Language:', lang
        #print 'Last Language:', self.lastlang
        if lang != self.lastlang:
            tran = translate(question, lang).encode('utf-8')
            lasttran = translate(question, self.lastlang).encode('utf-8')
            lang, prob, tran = max([(lang, ratio_correct(tran), tran),
                                    (self.lastlang, ratio_correct(lasttran), lasttran)],
                                   key=lambda x: x[1])
            question = tran
            #print 'Chosen:', lang, 'prob:', prob
        #print 'Question:', question

        is_foreign = lang != 'en'
        #if is_foreign:
        #    question = language.translate(question, lang).encode('utf-8')
        
        if question is None:
            return
        
        #check = check_blank(question)
        #if check is not None:
        #    return [check]
        
        if check_for_end(question):
            self.running = False
            return
        
        #question = self._clean_text(question)
        
        for sentence in sentence_split(question):
            answer = None
            for handler in self.handlers:
                if handler.check(sentence):
                    try:
                        ans = handler.answer(sentence)
                    except HALcannotHandle:
                        continue
                    else:
                        answer = ans
                        break
            if answer is None:
                answer = random.choice(self.generic)
            answer = self.macro.subst(answer)
            if self.semantics:
                lang, inter, answer = HALtran.transform(answer)
            if self.rndname:
                answer = rndname(answer, self.user)
            if is_foreign:
                #print 'Answer:', answer
                answer = translate(answer, 'en', lang)
            yield answer
            answers.append(answer)
        #answers = [self.macro.subst(answer) for answer in answers]
        self.do_speech(' '.join(answers))
        self.previn.appendleft(question)
        self.prevout.appendleft(answers)
Exemple #2
0
def wikipedia(key):
    bing = urllib2.urlopen('http://www.bing.com/search?q=%s'%quote_plus(key+' wikipedia')).read()
    #result = re.findall('http://en.wikipedia.org/wiki/([^"#]+?)"', bing)[:2]
    result = re.findall('http://en.wikipedia.org/wiki/([^"#]+?)"', bing)
    if not result:
        return None
    result = result[0].replace('_', ' ')
    #print result
    pages = {}
    #for page in result:
    for page in [result]:
        head = None
        buf = ''
        a = cached_fetch_wiki(page)
        if a is None:
            continue
        page, p = a
        #print page
        if p is None:
            continue
        for line in p.split('\n'):
            if line and line[0] == '=':
                if head is not None:
                    id = '%s %s'%(page, head)
                else:
                    id = page
                pages[id] = buf
                head = line.replace('=', '').strip()
                buf = ''
            elif line and line[0] in '|<{}[]~!@#$%^&*()_+=-\\`/:;.,<>?':
                continue
            else:
                buf += line + '\n'
        if head is not None:
            id = '%s %s'%(page, head)
        else:
            id = page
        pages[id] = buf
    if not pages:
        return None
    selected = most_relavent(pages.keys(), key)
    #print 'Selected:', selected
    # page = clean_wikipedia(pages[selected])
    # sentences = [i for i in sentence_split(page) if i]
    # return ''.join(sentences)
    content = pages[selected]
    firstline = ''
    for line in content.split('\n'):
        if not line:
            continue
        if line[0] in ' |<{}[]~!@#$%^&*()_+=-\\`/:;.,<>?':
            continue
        if line[-1] in ':':
            continue
        firstline = line
        break
    sentences = sentence_split(clean_wikipedia(firstline))
    #sentences = resentence.split(clean_wikipedia(firstline))
    sentences = [i for i in sentences if i and i[0] != '.']
    str = ' '.join(sentences[:2]).strip()
    if str and str[-1] not in '!.,?:':
        str += '.'
    return str