def ask(self, question): # Repetition if self.previn and clean_string(question) == clean_string(self.previn[0]): res = random.choice(self.repetitiondb) res = res.replace('----', "'%s'"%' '.join(self.prevout[0])) yield self.macro.subst(res) return answers = [] lang = detect_lang(question, self.lastlang) if lang is None: lang = self.lastlang #print 'Language:', lang #print 'Last Language:', self.lastlang if lang != self.lastlang: tran = translate(question, lang).encode('utf-8') lasttran = translate(question, self.lastlang).encode('utf-8') lang, prob, tran = max([(lang, ratio_correct(tran), tran), (self.lastlang, ratio_correct(lasttran), lasttran)], key=lambda x: x[1]) question = tran #print 'Chosen:', lang, 'prob:', prob #print 'Question:', question is_foreign = lang != 'en' #if is_foreign: # question = language.translate(question, lang).encode('utf-8') if question is None: return #check = check_blank(question) #if check is not None: # return [check] if check_for_end(question): self.running = False return #question = self._clean_text(question) for sentence in sentence_split(question): answer = None for handler in self.handlers: if handler.check(sentence): try: ans = handler.answer(sentence) except HALcannotHandle: continue else: answer = ans break if answer is None: answer = random.choice(self.generic) answer = self.macro.subst(answer) if self.semantics: lang, inter, answer = HALtran.transform(answer) if self.rndname: answer = rndname(answer, self.user) if is_foreign: #print 'Answer:', answer answer = translate(answer, 'en', lang) yield answer answers.append(answer) #answers = [self.macro.subst(answer) for answer in answers] self.do_speech(' '.join(answers)) self.previn.appendleft(question) self.prevout.appendleft(answers)
def wikipedia(key): bing = urllib2.urlopen('http://www.bing.com/search?q=%s'%quote_plus(key+' wikipedia')).read() #result = re.findall('http://en.wikipedia.org/wiki/([^"#]+?)"', bing)[:2] result = re.findall('http://en.wikipedia.org/wiki/([^"#]+?)"', bing) if not result: return None result = result[0].replace('_', ' ') #print result pages = {} #for page in result: for page in [result]: head = None buf = '' a = cached_fetch_wiki(page) if a is None: continue page, p = a #print page if p is None: continue for line in p.split('\n'): if line and line[0] == '=': if head is not None: id = '%s %s'%(page, head) else: id = page pages[id] = buf head = line.replace('=', '').strip() buf = '' elif line and line[0] in '|<{}[]~!@#$%^&*()_+=-\\`/:;.,<>?': continue else: buf += line + '\n' if head is not None: id = '%s %s'%(page, head) else: id = page pages[id] = buf if not pages: return None selected = most_relavent(pages.keys(), key) #print 'Selected:', selected # page = clean_wikipedia(pages[selected]) # sentences = [i for i in sentence_split(page) if i] # return ''.join(sentences) content = pages[selected] firstline = '' for line in content.split('\n'): if not line: continue if line[0] in ' |<{}[]~!@#$%^&*()_+=-\\`/:;.,<>?': continue if line[-1] in ':': continue firstline = line break sentences = sentence_split(clean_wikipedia(firstline)) #sentences = resentence.split(clean_wikipedia(firstline)) sentences = [i for i in sentences if i and i[0] != '.'] str = ' '.join(sentences[:2]).strip() if str and str[-1] not in '!.,?:': str += '.' return str