def get(self): wikipedia.set_lang(u'ru') try: page = wikipedia.page(u'Проект:Города_России/Списки_улиц/Казани') streets = [] for link in page.links: nlink = unicode(link).encode('utf-8').strip().decode('utf-8') norm_name = normalize(nlink) try: street_info = StreetInfo.get_by_norm_name(norm_name) if not street_info: street_info = StreetInfo() street_page = wikipedia.page(nlink) street_info.name = nlink street_info.norm_name = norm_name street_info.info = unicode(street_page.summary).encode('utf-8').strip() street_info.images = [Image(url=x) for x in street_page.images] street_info.city = u'Казань'.encode('utf-8').strip() street_info.lang=u'ru'.encode('utf-8').strip() street_info.put() except Exception, e: print nlink.encode('utf-8') except DeadlineExceededError: pass self.response.headers['Content-Type'] = "text/html; charset=utf-8" self.response.write(json.dumps({'success':True}))
def test_redirect_normalization(self): """Test that a page redirect loads correctly with or without a query normalization""" capital_party = wikipedia.page("Communist Party", auto_suggest=False) lower_party = wikipedia.page("communist Party", auto_suggest=False) self.assertIsInstance(capital_party, wikipedia.WikipediaPage) self.assertIsInstance(lower_party, wikipedia.WikipediaPage) self.assertEqual(capital_party.title, "Communist party") self.assertEqual(capital_party, lower_party)
def get(self): wikipedia.set_lang(u"ru") page = wikipedia.page(u"Проект:Города_России/Списки_улиц/Казани") streets = [] for link in page.links: nlink = unicode(link).encode("utf-8").strip() try: street_page = wikipedia.page(nlink) streets.append( {"name": nlink, "info": street_page.summary, "images": street_page.images, "city": u"Казань"} ) except Exception, e: print nlink
def wiki(bot, event, *args): """ *Wikipedia:* Usage: /wiki <keywords to search for> <optional: sentences to display [defaults to 3]> Purpose: Get summary from Wikipedia on keywords. """ from wikipedia import wikipedia, PageError, DisambiguationError yield from bot.send_typing(event.conv) def summary(self, sentences=3): if not getattr(self, '_summary', False): query_params = { 'prop': 'extracts', 'explaintext': '', 'exintro': '', } query_params['exsentences'] = sentences if not getattr(self, 'title', None) is None: query_params['titles'] = self.title else: query_params['pageids'] = self.pageid request = wikipedia._wiki_request(query_params) self._summary = request['query']['pages'][self.pageid]['extract'] return self._summary wikipedia.WikipediaPage.summary = summary try: sentences = 3 try: if args[-1].isdigit(): sentences = args[-1] args = args[:-1] page = wikipedia.page(' '.join(args)) except DisambiguationError as e: page = wikipedia.page(wikipedia.search(e.options[0], results=1)[0]) segments = [ hangups.ChatMessageSegment(page.title, hangups.SegmentType.LINK, is_bold=True, link_target=page.url), hangups.ChatMessageSegment('\n', hangups.SegmentType.LINE_BREAK), hangups.ChatMessageSegment(page.summary(sentences=sentences)) ] yield from bot.send_message_segments(event.conv, segments) except PageError: yield from bot.send_message( event.conv, "Couldn't find \"{}\". Try something else.".format(' '.join(args)))
def test_disambiguate(self): """Test that page raises an error when a disambiguation page is reached.""" try: ram = wikipedia.page("Dodge Ram (disambiguation)", auto_suggest=False, redirect=False) error_raised = False except wikipedia.DisambiguationError as e: error_raised = True options = e.options self.assertTrue(error_raised) self.assertEqual( options, [ u"Dodge Ramcharger", u"Dodge Ram Van", u"Dodge Mini Ram", u"Dodge Caravan C/V", u"Dodge Caravan C/V", u"Ram C/V", u"Dodge Ram 50", u"Dodge D-Series", u"Dodge Rampage", u"Ram (brand)", ], )
def main(): _tmpb = get_biographies(CATEGORY, STARTTIME, ENDTIME) print(str(len(_tmpb))) if len(_tmpb) == 0: sys.exit() table_data = {} tableservice = table_service() requests.packages.urllib3.disable_warnings() for r in _tmpb: try: print(r) p = wikipedia.page(title=None, pageid=str(r['pageid'])) _revs = [ r for r in get_revisions(str(p.pageid)) if "delet" in str(r) ] #table_data.update({str(r['pageid']):{'PAGEID': str(p.pageid),'TOUCHED': str(p.touched),'URL': str(p.url),'TITLE': str(p.title)}}) _task = create_task(str(DATASET_MARKER), str(r['timestamp']), str(p.pageid), str(random.randint(100000, 99999999)), str(p.pageid), str(p.title), _revs, str(p.url)) print(_task) tableservice.insert_entity(AZURE_TABLE, _task) except Exception as e: print("Error: %s" % e) continue
def test_auto_suggest(self): """Test that auto_suggest properly corrects a typo.""" # yum, butter. butterfly = wikipedia.page("butteryfly") self.assertEqual(butterfly.title, "Butterfly") self.assertEqual(butterfly.url, "http://en.wikipedia.org/wiki/Butterfly")
def test_outlinks(self): links = self.dao.fetch_outlinks("Paris", 15) all_links = (wiki.page("Paris")).links self.assertEqual(len(links), len(set(links))) for link in links: self.assertIn(link, all_links) self.assertEqual(len(links), 15)
def test_redirect_true(self): """Test that a page successfully redirects a query.""" # no error should be raised if redirect is test_redirect_true mp = wikipedia.page("Menlo Park, New Jersey") self.assertEqual(mp.title, "Edison, New Jersey") self.assertEqual(mp.url, "http://en.wikipedia.org/wiki/Edison,_New_Jersey")
def test_find_icd_section_title(self): wikipedia_client: WikipediaClient = WikipediaClient("en") parsed_response_content: dict = wikipedia_client.search_title("ICD-10") icd_list_page_title: str = parsed_response_content["query"]["search"][0]["title"] icd_list_page_html: str = str(wikipedia.page(icd_list_page_title).html()) disease_group_page_title: str = HtmlParser.find_icd_section_title(icd_list_page_html, "E10.3") self.assertEqual("ICD-10 Chapter IV: Endocrine, nutritional and metabolic diseases", disease_group_page_title)
def title(query: str) -> str: wikipedia.set_lang('en') search = wikipedia.search(query)[0] return json.dumps( filterResult( wikipedia.page( search['title'] )))
def tell_me_about(topic): try: ny = wikipedia.page(topic) res = str(ny.content[:500].encode('utf-8')) return res except Exception as e: print(e) return "Sorry sir"
def import_images(): image_collection.remove(source='wiki') wikipedia.set_lang('ru') root_page = wikipedia.page('Экспонаты эрмитажа') for link in root_page.links: import_images_from_page(link)
def pageid(query: str) -> str: print(query) wikipedia.set_lang('en') search = wikipedia.search(query)[0] return json.dumps( filterResult( wikipedia.page( None, search['pageid'] )))
def wiki(bot, event, *args): """ **Wikipedia:** Usage: /wiki <keywords to search for> <optional: sentences to display [defaults to 3]> Purpose: Get summary from Wikipedia on keywords. """ from wikipedia import wikipedia, PageError, DisambiguationError def summary(self, sentences=3): if not getattr(self, '_summary', False): query_params = { 'prop': 'extracts', 'explaintext': '', 'exintro': '', } query_params['exsentences'] = sentences if not getattr(self, 'title', None) is None: query_params['titles'] = self.title else: query_params['pageids'] = self.pageid request = wikipedia._wiki_request(query_params) self._summary = request['query']['pages'][self.pageid]['extract'] return self._summary wikipedia.WikipediaPage.summary = summary try: sentences = 3 try: if args[-1].isdigit(): sentences = args[-1] args = args[:-1] page = wikipedia.page(' '.join(args)) except DisambiguationError as e: page = wikipedia.page(wikipedia.search(e.options[0], results=1)[0]) segments = [ hangups.ChatMessageSegment(page.title, hangups.SegmentType.LINK, is_bold=True, link_target=page.url), hangups.ChatMessageSegment('\n', hangups.SegmentType.LINE_BREAK), hangups.ChatMessageSegment(page.summary(sentences=sentences))] bot.send_message_segments(event.conv, segments) except PageError: bot.send_message(event.conv, "Couldn't find \"{}\". Try something else.".format(' '.join(args)))
def test_find_disease_name_and_link(self): wikipedia_client: WikipediaClient = WikipediaClient("en") chapter_title: str = "ICD-10 Chapter IV: Endocrine, nutritional and metabolic diseases" parsed_response_content: dict = wikipedia_client.search_title(chapter_title) icd_disease_group_page_title: str = parsed_response_content["query"]["search"][0]["title"] icd_disease_group_page_html: str = str(wikipedia.page(icd_disease_group_page_title).html()) link, title = HtmlParser.find_disease_name_and_link(icd_disease_group_page_html, "E10.3") self.assertEqual("/wiki/Diabetic_retinopathy", link) self.assertEqual("Diabetic retinopathy", title)
def _get_icd_chapter_article_page(self, title: str) -> str: if title in self.wikipedia_pages_cache: return self.wikipedia_pages_cache[title] result: str = str(wikipedia.page(title).html()) self.wikipedia_pages_cache[title] = result if len(self.wikipedia_pages_cache) > 4: # Removes oldest page in cache self.wikipedia_pages_cache.popitem(False) return result
def test_disambiguate(self): """Test that page raises an error when a disambiguation page is reached.""" try: ram = wikipedia.page("Smith", auto_suggest=False, redirect=False) error_raised = False except wikipedia.DisambiguationError as e: error_raised = True options = e.options self.assertTrue(error_raised) self.assertEqual(options, [u'Dodge Ramcharger', u'Dodge Ram Van', u'Dodge Mini Ram', u'Dodge Caravan C/V', u'Dodge Caravan C/V', u'Ram C/V', u'Dodge Ram 50', u'Dodge D-Series', u'Dodge Rampage', u'Ram (brand)'])
def wikiSearch(self, command): reg_ex = re.search( 'tell me about (.*)', command) # Get the user is trying to search after the keywords try: if reg_ex: topic = reg_ex.group(1) wikiResponse = wikipedia.page(topic) s.AIResponse(str(wikiResponse.content[:500].encode('utf-8'))) except Exception as e: print(e)
async def w(self, ctx, *, sq: str): try: s = wikipedia.search(sq, results=1) except Exception: await ctx.send("I didn't find anything.") else: try: p = wikipedia.page(s) except Exception: await ctx.send("Hmm. Can you be more a little more specific?") else: await ctx.send(p.url)
def import_images_from_page(title): print("Importing from [" + title + "]") try: p = wikipedia.page(title) except wikipedia.PageError as e: print("could not load the page: " + str(e)) return query_params = { 'generator': 'images', 'gimlimit': 'max', 'prop': 'imageinfo', 'iiprop': 'url', 'titles': p.title, } try: request = wikipedia._wiki_request(**query_params) image_keys = request['query']['pages'].keys() images = (request['query']['pages'][key] for key in image_keys) urls_and_desc = filter( lambda x: re.search(r'(?:jpg|jpeg)$', x[0].lower()), ((image['imageinfo'][0]['url'], image['imageinfo'][0]['descriptionurl']) for image in images if image.get('imageinfo')) ) except KeyError or URLError as e: print("could not load page images: " + str(e)) return processed = set() for item in urls_and_desc: if item[0] in processed: continue match = re.search(r'File:(.*?)(?:[0-9]{3})?\.(?:jpg|jpeg)$', unquote(item[1])) if match is None: continue file_title = re.sub(r'[_-]+', ' ', match.group(1)).strip() image = Image.create_from_dict({ 'title': file_title, 'image_url': item[0], 'description_url': item[1], 'source': 'wiki', }) image_collection.insert(image) processed.add(item[0])
def get(self): self.response.headers["Content-Type"] = "text/html; charset=utf-8" wikipedia.set_lang(u"ru") page = wikipedia.page(u"Проект:Города_России/Списки_улиц/Казани") text = page.content alphabet = re.split("\n\n\n==\s*...\s*==\n", text) byline = [] for line in alphabet: byline.append(re.split("\n", line)) byline.remove(byline[0]) # byline[0] - names with number # byline[1..]- names with corresponding letter for line in byline[0]: self.response.write(json.dumps(line))
def has_date(item): try: _page = wikipedia.page(title=None, pageid=item) _dt = datetime.strptime(_page.touched, '%Y-%m-%dT%H:%M:%SZ') if _dt.year >= 2015: _revs = get_revisions(_page.pageid, False) _tmpdt = datetime.strptime(_revs[0]['timestamp'], '%Y-%m-%dT%H:%M:%SZ') if _tmpdt.year >= 2015 and _tmpdt.year <= 2017: return _page else: return "" else: return "" except: return "Error"
def extract_actor_from_wikipedia(lastname, firstname): wikipedia.set_lang("fr") searchs = wikipedia.search(lastname + " " + firstname) for search in searchs: page = wikipedia.page(search) rc = {"links": list({"title": "wikipedia", "url": page.url})} if lastname in page.title and firstname in page.title: rc = dict({"links": [], "name": firstname + " " + lastname}) for img in page.images: if img.endswith(".jpg"): rc["photo"] = img save_domains = [ "unifrance.org", "www.lefilmfrancais", "www.allocine.fr", "catalogue.bnf.fr", "www.allmovie.com" ] libs = [ "UniFrance", "Le Film Francais", "Allocine", "La BNF", "All movie" ] try: for ref in page.references: domain = urlparse(ref).netloc try: idx = save_domains.index(domain) rc["links"].append({"title": libs[idx], "url": ref}) except: pass except: pass html: wikipedia.BeautifulSoup = wikipedia.BeautifulSoup( page.html(), "html5lib") #Recherche de la section des films # for link in html.findAll('a', attrs={'href': wikipedia.re.compile("^http://")}): # if "film" in link.text: # pass rc["summary"] = page.summary rc["title"] = page.title rc["url"] = page.url return rc return None
def get_articles(lat, lon): """ :type lat: str :type lon: str :return: list of dicts representing articles """ # Use really large radius, in case very far away from somewhere. # Results are sorted by distance and limited so that works fine. radius = 20000 # Upper limit landmark_articles = wikilocation.articles(lat, lon, radius, 10, "landmark") # event_articles = wikilocation.articles(lat, lon, radius, 5, "event") if len(landmark_articles) == 0: OLD_STREET_ROUNDABOUT = ("51.525603", "-0.087558") lat, lon = OLD_STREET_ROUNDABOUT landmark_articles = wikilocation.articles(lat, lon, radius, 10, "landmark") # wikilocation_articles = event_articles + landmark_articles # wikilocation_articles = random.sample(wikilocation_articles, 5) # wikilocation_articles = _interleave(landmark_articles, event_articles) wikilocation_articles = landmark_articles wikilocation_articles = _remove_lists(wikilocation_articles) articles = [] for wikilocation_article in wikilocation_articles: article = {} title = wikilocation_article["title"] article["title"] = title # first_sentence = wikipedia.summary(title, sentences=1) page = wikipedia.page(title) # article["first_sentence"] = first_sentence article["summary"] = page.summary article[ "image"] = "http://upload.wikimedia.org/wikipedia/commons/3/3c/Stonehenge2007_07_30.jpg" article["url"] = page.url articles.append(article) return articles
def get_articles(lat, lon): """ :type lat: str :type lon: str :return: list of dicts representing articles """ # Use really large radius, in case very far away from somewhere. # Results are sorted by distance and limited so that works fine. radius = 20000 # Upper limit landmark_articles = wikilocation.articles(lat, lon, radius, 10, "landmark") # event_articles = wikilocation.articles(lat, lon, radius, 5, "event") if len(landmark_articles) == 0: OLD_STREET_ROUNDABOUT = ("51.525603", "-0.087558") lat, lon = OLD_STREET_ROUNDABOUT landmark_articles = wikilocation.articles(lat, lon, radius, 10, "landmark") # wikilocation_articles = event_articles + landmark_articles # wikilocation_articles = random.sample(wikilocation_articles, 5) # wikilocation_articles = _interleave(landmark_articles, event_articles) wikilocation_articles = landmark_articles wikilocation_articles = _remove_lists(wikilocation_articles) articles = [] for wikilocation_article in wikilocation_articles: article = {} title = wikilocation_article["title"] article["title"] = title # first_sentence = wikipedia.summary(title, sentences=1) page = wikipedia.page(title) # article["first_sentence"] = first_sentence article["summary"] = page.summary article["image"] = "http://upload.wikimedia.org/wikipedia/commons/3/3c/Stonehenge2007_07_30.jpg" article["url"] = page.url articles.append(article) return articles
def extract(self, request: ExtractorRequest) -> ExtractorResponse: try: # last url path segment should be our page name, e.g. "Patellar_dislocation" page_name = request.url.split("/")[-1] page = wikipedia.page(page_name) text = page.content meta = { "source": "wikipedia", "source_url": page.url, "title": page.title, "summary": page.summary, "images": page.images, "references": page.references, } # construct response response_meta = {**(request.meta or {}), **meta} response = ExtractorResponse(meta=response_meta, text=text or "") except Exception as e: msg = f"Error using wikipedia extractor: {str(e)}" log.error(msg) response = ExtractorResponse(error=msg) return response
def test_redirect_false(self): """Test that page raises an error on a redirect when redirect == False.""" mp = lambda: wikipedia.page("Menlo Park, New Jersey", auto_suggest=False, redirect=False) self.assertRaises(wikipedia.RedirectError, mp)
def setUp(self): # one of the shortest wikipedia articles that includes images self.celtuce = wikipedia.page("Celtuce")
def test_missing(self): """Test that page raises a PageError for a nonexistant page.""" # Callicarpa? purpleberry = lambda: wikipedia.page("purpleberry", auto_suggest=False) self.assertRaises(wikipedia.PageError, purpleberry)
def test_disambiguation_error_page_function(self): with self.assertRaises(Exception) as context: wikipedia.page("New York") self.assertFalse( "wikipedia.exceptions.DisambiguationError: \"{0}\" may refer to: \n{1}" in str(context.exception))
def setUp(self): # shortest wikipedia articles with images and sections self.celtuce = wikipedia.page("Celtuce") self.cyclone = wikipedia.page("Tropical Depression Ten (2005)") self.great_wall_of_china = wikipedia.page("Great Wall of China")
def test_from_page_id(self): """Test loading from a page id""" self.assertEqual(self.celtuce, wikipedia.page(pageid=1868108))
print 'layers down = ' + str(args.NumLayers) print 'sentences in = ' + str(args.NumSentences) if args.NumLayers < 0 or args.NumLayers > 10: print 'Too many or too few layers' exit() if args.NumSentences < 1 or args.NumSentences > 10: print 'Too many or too few sentences' exit() # randomly choose a page to start at from the list pageName = choice(SOURCE_PAGE_NAMES) # get the page page = wikipedia.page(pageName) # for each depth to traverse, # randomly choose a link to go down page = traverseDepth(page, args.NumLayers) # randomly choose a section. Keep trying until you find one that has content # (they may be empty). Dont try toooo many times sectionTitle = '' section = '' remaining = len(page.sections) while remaining > 0: print '.' sectionTitle = choice(page.sections) section = page.section(sectionTitle)
def get(self): return wikipedia_api.page(self._number)
def setUp(self): # shortest wikipedia articles with images and sections self.celtuce = wikipedia.page("Celtuce") self.cyclone = wikipedia.page("Tropical Depression Ten (2005)")
def test_something3(self): x = wikipedia.page('Cathode Rays', auto_suggest=False) print x.pageid print x.content.encode('utf-8')
print('\n') print("Digging for company info...") a = len(dive) b = 0 c = [] for thing in dive: b = b + 1 sys.stdout.write('\r') sys.stdout.write('%.0f%% complete' % (b / a * 100, )) sys.stdout.flush() thing = str(thing) # manually storing desired URL url1 = wikipedia.page(thing) url2 = url1.url c.append(url2) req = requests.get(url2) store = etree.fromstring(req.text) output = store.xpath( '//*[@id="mw-content-text"]/div/table[1]/tbody/tr[11]') #print(output) print('\n') for thing in c: print(thing) #'//*[@id="mw-content-text"]/div/table[1]/tbody/tr[17]/th' #'//*[@id="mw-content-text"]/div/table[1]/tbody/tr[17]/td' #'//*[@id="mw-content-text"]/div/table[1]/tbody/tr[11]/td/span/text()'
def test(): form = LoginForm() if form.validate_on_submit(): flash(form.openid.data, 'Question') text = form.openid.data.lower() data = form.openid.data.lower() data1 = form.openid.data text = text.split() negator = [ 'not', 'never', 'not possible', 'does not', 'abort', 'neither', 'nor', 'no', 'negative', 'negate' ] assertor = ['may be', 'can be', 'not sure', 'might', 'may'] preposition = [ 'have', 'is', 'are', 'about', 'above', 'across', 'after', 'against', 'along', 'among', 'around', 'at', 'before', 'behind', 'below', 'beneath', 'beside', 'between', 'by', 'down', 'during', 'except', 'for', 'from', 'front', 'inside', 'instead', 'into', 'like', 'near', 'of', 'off', 'on', 'onto', 'top', 'out', 'outside', 'over', 'past', 'since', 'through', 'to', 'toward', 'under', 'underneath', 'until', 'up', 'upon', 'with', 'within', 'without' ] wh = [ 'why', 'what', 'how', 'Who', 'whoever', 'whom', 'whomever', 'whose', 'which' ] pronoun = [ 'i', 'me', 'you', 'she', 'her', 'he', 'him', 'it', 'we', 'us', 'you', 'they', 'them', 'my', 'mine', 'your', 'yours', 'hers', 'his', 'its', 'yours', 'ours', 'theirs', 'myself', 'yourself', 'himself', 'herself', 'itself', 'all', 'another', 'any', 'anybody', 'anyone', 'anything', 'both', 'each', 'either', 'everybody', 'everyone', 'everything', 'few', 'many', 'neither', 'nobody', 'none', 'nothing', 'one', 'several', 'some', 'somebody', 'someone', 'something', 'this', 'that', 'these', 'those' ] # Removing Wh Question wh_q = '' for ser in text: inflag = 0 for w in wh: if w == ser: inflag = 1 if inflag == 0: wh_q = wh_q + ser + ' ' # Removing Prepostion wh_q = wh_q.split() prep_q = '' for ser in wh_q: inflag = 0 for prep in preposition: if ser == prep: inflag = 1 if inflag == 0: prep_q = prep_q + ser + ' ' # Removing Pronoun prep_q = prep_q.split() pro_q = '' for ser in prep_q: inflag = 0 for pro in pronoun: if ser == pro: inflag = 1 if inflag == 0: pro_q = pro_q + ser + ' ' text = pro_q text = text.split() data = pro_q.strip() flag = 0 answer = 0 wikiflag = 0 ans = 0 data = '' asser = 0 nege = 0 posi = 0 #Assertive Section for ser in text: inflag = 0 for ass in assertor: if ser == ass and flag == 0 or data.find( ass) != -1 and flag == 0: inflag = 1 asser = 1 flash('Assertive', 'Answer') flag = 1 if inflag == 0: data = data + ser + ' ' if asser == 1: data = data.strip() abc = models.Assertive.query.all() for a in abc: if (data.find(a.question.lower()) != -1 or a.question.lower().find(data) != -1 ) and len(data) >= 4: ans = 1 break if ans == 0: answer = 0 else: answer = 1 if answer == 0: flash( 'Answer not in database... Lets search Wikipedia Database', 'Answer') wikiflag = 1 #return redirect ('http://www.lmgtfy.com/?q=' + data) else: finalans = a.answer flash(a.answer, 'Answer') #Negative Section if asser == 0: data = '' for ser in text: inflag = 0 for neg in negator: if ser == neg and flag == 0 or data.find( neg) != -1 and flag == 0: inflag = 1 nege = 1 flash('Negative', 'Answer') flag = 1 if inflag == 0: data = data + ser + ' ' if nege == 1: data = data.strip() abc = models.Negative.query.all() for a in abc: if (data.find(a.question.lower()) != -1 or a.question.lower().find(data) != -1 ) and len(data) >= 4: ans = 1 break if ans == 0: answer = 0 else: answer = 1 if answer == 0: flash( 'Answer not in database... Lets search Wikipedia Database', 'Answer') wikiflag = 1 #return redirect ('http://www.lmgtfy.com/?q=' + data) else: finalans = a.answer flash(a.answer, 'Answer') #Postive Section if flag == 0: data = form.openid.data.lower() flash('Positive', 'Answer') abc = models.Positive.query.all() for a in abc: if (data.find(a.question.lower()) != -1 or a.question.lower().find(data) != -1 ) and len(data) >= 4: ans = 1 break if ans == 0: answer = 0 else: answer = 1 if answer == 0: flash( 'Answer not in database... Lets search Wikipedia Database', 'Answer') wikiflag = 1 #return redirect ('http://www.lmgtfy.com/?q=' + data) else: finalans = a.answer flash(a.answer, 'Answer') #Wiki Section ans = 0 if wikiflag == 1: abc = models.Wikipedia.query.all() for a in abc: if (data.find(a.question.lower()) != -1 or a.question.lower().find(data) != -1 ) and len(data) >= 4: ans = 1 break if ans == 0: answer = 0 else: answer = 1 if answer == 0: flash( 'Answer not in Wikipedia database... Lets search Wikipedia Internet', 'Answer') ny = wikipedia.search(data) if ny == []: return redirect('http://www.lmgtfy.com/?q=' + data1) else: try: ny1 = wikipedia.summary(data1, chars=0, auto_suggest=True, redirect=True, sentences=3) finalans = ny1 flash(ny1, 'Answer') ny2 = wikipedia.page(data1) flash('Source: ' + ny2.url, 'Answer') #u = models.Wikipedia(question=data, answer=ny1) #db.session.add(u) #db.session.commit() except Exception as inst: flash( 'Your question is either out of scope of very trival for me to answer', 'Answer') finalans = 'Your question is either out of scope of very trival for me to answer' else: finalans = a.answer flash(a.answer, 'Answer') display = '\n' s = models.Chats.query.all() for chat in reversed(s): flash('Question: ' + chat.question, 'Display') flash('Answer: ' + chat.answer, 'Display') flash('.', 'Display') u = models.Chats(question=data1, answer=finalans) db.session.add(u) db.session.commit() return redirect('/test') return render_template("index2.html", title='ChatterBot', form=form)
def test_redirect_with_normalization(self): """Test that a page redirect with a normalized query loads correctly""" the_party = wikipedia.page("communist Party", auto_suggest=False) self.assertIsInstance(the_party, wikipedia.WikipediaPage) self.assertEqual(the_party.title, "Communist party")
def wiki_response(request_text): try: txt = str(wikipedia.page(request_text).content[:1000]) except Exception: txt = 'По данному запросу ничего не найдено.' return txt
def test(): form = LoginForm() if form.validate_on_submit(): flash(form.openid.data , 'Question') text = form.openid.data.lower() data = form.openid.data.lower() data1 = form.openid.data text = text.split() negator = ['not', 'never', 'not possible', 'does not', 'abort', 'neither', 'nor', 'no', 'negative', 'negate'] assertor = ['may be', 'can be', 'not sure', 'might', 'may'] preposition = ['have', 'is', 'are', 'about', 'above', 'across', 'after', 'against', 'along', 'among', 'around', 'at', 'before', 'behind', 'below', 'beneath', 'beside', 'between', 'by', 'down', 'during', 'except', 'for', 'from', 'front', 'inside', 'instead', 'into', 'like', 'near', 'of', 'off', 'on', 'onto', 'top', 'out', 'outside', 'over', 'past', 'since', 'through', 'to', 'toward', 'under', 'underneath', 'until', 'up', 'upon', 'with', 'within', 'without'] wh = ['why', 'what', 'how', 'Who', 'whoever', 'whom', 'whomever', 'whose', 'which'] pronoun = ['i', 'me', 'you', 'she', 'her', 'he', 'him', 'it', 'we', 'us', 'you', 'they', 'them', 'my', 'mine', 'your', 'yours', 'hers', 'his', 'its', 'yours', 'ours', 'theirs', 'myself', 'yourself', 'himself', 'herself', 'itself', 'all', 'another', 'any', 'anybody', 'anyone', 'anything', 'both', 'each', 'either', 'everybody', 'everyone', 'everything', 'few', 'many', 'neither', 'nobody', 'none', 'nothing', 'one', 'several', 'some', 'somebody', 'someone', 'something', 'this', 'that', 'these', 'those'] # Removing Wh Question wh_q='' for ser in text: inflag = 0 for w in wh: if w == ser: inflag = 1 if inflag == 0: wh_q = wh_q + ser + ' ' # Removing Prepostion wh_q = wh_q.split() prep_q = '' for ser in wh_q: inflag = 0 for prep in preposition: if ser == prep: inflag = 1 if inflag == 0: prep_q = prep_q + ser + ' ' # Removing Pronoun prep_q = prep_q.split() pro_q = '' for ser in prep_q: inflag = 0 for pro in pronoun: if ser == pro: inflag = 1 if inflag == 0: pro_q = pro_q + ser + ' ' text = pro_q text = text.split() data = pro_q.strip() flag = 0 answer = 0 wikiflag = 0 ans = 0 data = '' asser = 0 nege = 0 posi = 0 #Assertive Section for ser in text: inflag = 0 for ass in assertor: if ser == ass and flag == 0 or data.find(ass) != -1 and flag == 0: inflag = 1 asser = 1 flash('Assertive', 'Answer') flag=1 if inflag == 0: data = data + ser + ' ' if asser == 1: data = data.strip() abc = models.Assertive.query.all() for a in abc: if (data.find(a.question.lower()) != -1 or a.question.lower().find(data) != -1) and len(data) >= 4: ans = 1 break if ans == 0: answer = 0 else: answer = 1 if answer == 0: flash('Answer not in database... Lets search Wikipedia Database', 'Answer') wikiflag = 1 #return redirect ('http://www.lmgtfy.com/?q=' + data) else: finalans=a.answer flash(a.answer, 'Answer') #Negative Section if asser == 0: data = '' for ser in text: inflag = 0 for neg in negator: if ser == neg and flag == 0 or data.find(neg) != -1 and flag == 0: inflag = 1 nege = 1 flash('Negative', 'Answer') flag = 1 if inflag == 0: data = data + ser + ' ' if nege == 1: data = data.strip() abc = models.Negative.query.all() for a in abc: if (data.find(a.question.lower()) != -1 or a.question.lower().find(data) != -1) and len(data) >= 4: ans = 1 break if ans == 0: answer = 0 else: answer = 1 if answer == 0: flash('Answer not in database... Lets search Wikipedia Database', 'Answer') wikiflag = 1 #return redirect ('http://www.lmgtfy.com/?q=' + data) else: finalans=a.answer flash(a.answer, 'Answer') #Postive Section if flag == 0: data = form.openid.data.lower() flash('Positive', 'Answer') abc = models.Positive.query.all() for a in abc: if (data.find(a.question.lower()) != -1 or a.question.lower().find(data) != -1) and len(data) >= 4: ans = 1 break if ans == 0: answer = 0 else: answer = 1 if answer == 0: flash('Answer not in database... Lets search Wikipedia Database', 'Answer') wikiflag = 1 #return redirect ('http://www.lmgtfy.com/?q=' + data) else: finalans=a.answer flash(a.answer, 'Answer') #Wiki Section ans = 0 if wikiflag == 1: abc = models.Wikipedia.query.all() for a in abc: if (data.find(a.question.lower()) != -1 or a.question.lower().find(data) != -1) and len(data) >= 4: ans = 1 break if ans == 0: answer = 0 else: answer = 1 if answer == 0: flash('Answer not in Wikipedia database... Lets search Wikipedia Internet', 'Answer') ny = wikipedia.search(data) if ny == []: return redirect ('http://www.lmgtfy.com/?q=' + data1) else: try: ny1 = wikipedia.summary(data1, chars=0, auto_suggest=True, redirect=True, sentences=3) finalans=ny1 flash(ny1, 'Answer') ny2 = wikipedia.page(data1) flash('Source: '+ ny2.url, 'Answer') #u = models.Wikipedia(question=data, answer=ny1) #db.session.add(u) #db.session.commit() except Exception as inst: flash('Your question is either out of scope of very trival for me to answer', 'Answer') finalans = 'Your question is either out of scope of very trival for me to answer' else: finalans=a.answer flash(a.answer, 'Answer') display = '\n' s = models.Chats.query.all() for chat in reversed(s): flash('Question: ' + chat.question, 'Display') flash('Answer: ' + chat.answer , 'Display') flash('.', 'Display') u = models.Chats(question=data1, answer=finalans) db.session.add(u) db.session.commit() return redirect('/test') return render_template("index2.html", title = 'ChatterBot', form = form)
def test(): form = LoginForm() if form.validate_on_submit(): flash(form.openid.data , 'Question') text = form.openid.data.lower() data = form.openid.data.lower() # for processing of answer(data mining) data1 = form.openid.data # for finding verbs nouns adjectives and number text = text.split() # for finding positive negative and assertive # Finding Nouns tokenized = nltk.word_tokenize(data1) p = nltk.pos_tag(tokenized) flash(p, 'Answer') name = nltk.ne_chunk(p, binary=True) ent = re.findall(r'NE\s(.*?)/', str(name)) chunkGram = r"""Noun: {<NN\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) NNnoun = chunkParser.parse(p) ip_noun = re.findall(r'Noun\s(.*?)/', str(NNnoun)) #noun = re.findall(r'<NN\w?>*', str(p)) #print ent #nouns = '' #for n in ip_noun: # nouns = nouns + n + ' ' #flash ('Nouns: ' + str(nouns), 'Answer') flash ('Nouns list: ' + str(ip_noun), 'Answer') # Finding Verbs tokenized = nltk.word_tokenize(data1) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Verb: {<VB\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) VBverb = chunkParser.parse(p) ip_verb = re.findall(r'Verb\s(.*?)/', str(VBverb)) #noun = re.findall(r'<NN\w?>*', str(p)) #print ent #verbs = '' #for v in ip_verb: # verbs = verbs + v + ' ' #flash ('Verbs: ' + str(verbs), 'Answer') flash ('Verb List: ' + str(ip_verb), 'Answer') # Finding Adjective tokenized = nltk.word_tokenize(data1) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Verb: {<JJ\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) JJAdj = chunkParser.parse(p) ip_adj = re.findall(r'Verb\s(.*?)/', str(JJAdj)) #noun = re.findall(r'<NN\w?>*', str(p)) #print ent #adjs = '' #for a in ip_adj: # adjs = adjs + a + ' ' #flash ('Ajectives: ' + str(adjs), 'Answer') flash ('Adjective list: ' + str(ip_adj), 'Answer') # Finding Numbers tokenized = nltk.word_tokenize(data1) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Number: {<CD\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) CDNumber = chunkParser.parse(p) ip_number = re.findall(r'Number\s(.*?)/', str(CDNumber)) flash ('Number list: ' + str(ip_number), 'Answer') max_check = len(ip_noun) + len(ip_verb) + len(ip_adj) + len(ip_number) #counting the number of max hits # Similar Noun Form simi = models.Similar.query.all() count_n = len(ip_noun) max_n = 0 for noun_sim in ip_noun: for sim in simi: if sim.word1 == noun_sim: ip_noun.append(str(sim.word2)) ip_noun.append(str(sim.word3)) if sim.word2 == noun_sim: ip_noun.append(str(sim.word1)) ip_noun.append(str(sim.word3)) if sim.word3 == noun_sim: ip_noun.append(str(sim.word1)) ip_noun.append(str(sim.word2)) max_n = max_n + 1 if max_n >= count_n: break # Similar Verb Form simi = models.Similar.query.all() count_v = len(ip_verb) max_v = 0 for verb_sim in ip_verb: for sim in simi: if sim.word1 == verb_sim: ip_verb.append(str(sim.word2)) ip_verb.append(str(sim.word3)) if sim.word2 == verb_sim: ip_verb.append(str(sim.word1)) ip_verb.append(str(sim.word3)) if sim.word3 == verb_sim: ip_verb.append(str(sim.word1)) ip_verb.append(str(sim.word2)) max_v = max_v + 1 if max_v >= count_v: break # Similar Adjective Form simi = models.Similar.query.all() count_a = len(ip_adj) max_a = 0 for adj_sim in ip_adj: for sim in simi: if sim.word1 == adj_sim: ip_adj.append(str(sim.word2)) ip_adj.append(str(sim.word3)) if sim.word2 == adj_sim: ip_adj.append(str(sim.word1)) ip_adj.append(str(sim.word3)) if sim.word3 == adj_sim: ip_adj.append(str(sim.word1)) ip_adj.append(str(sim.word2)) max_a = max_a + 1 if max_a >= count_a: break #Printing the new appended list flash ('Nouns list: ' + str(ip_noun), 'Answer') flash ('Verb List: ' + str(ip_verb), 'Answer') flash ('Adjective list: ' + str(ip_adj), 'Answer') flash ('Number list: ' + str(ip_number), 'Answer') ip_total = ip_noun + ip_verb + ip_adj + ip_number ip_total = list(set(ip_total)) negator = ['not', 'never', 'not possible', 'does not', 'abort', 'neither', 'nor', 'negative', 'negate', 'can\'t', 'doesn\'t','can not','cant','doesnt','dont','don\'t'] assertor = ['may be', 'can be', 'not sure', 'might', 'may'] '''preposition = ['have', 'is', 'are', 'about', 'above', 'across', 'after', 'against', 'along', 'among', 'around', 'at', 'before', 'behind', 'below', 'beneath', 'beside', 'between', 'by', 'down', 'during', 'except', 'for', 'from', 'front', 'inside', 'instead', 'into', 'like', 'near', 'of', 'off', 'on', 'onto', 'top', 'out', 'outside', 'over', 'past', 'since', 'through', 'to', 'toward', 'under', 'underneath', 'until', 'up', 'upon', 'with', 'within', 'without'] wh = ['why', 'what', 'how', 'Who', 'whoever', 'whom', 'whomever', 'whose', 'which'] pronoun = ['i', 'me', 'you', 'she', 'her', 'he', 'him', 'it', 'we', 'us', 'you', 'they', 'them', 'my', 'mine', 'your', 'yours', 'hers', 'his', 'its', 'yours', 'ours', 'theirs', 'myself', 'yourself', 'himself', 'herself', 'itself', 'all', 'another', 'any', 'anybody', 'anyone', 'anything', 'both', 'each', 'either', 'everybody', 'everyone', 'everything', 'few', 'many', 'neither', 'nobody', 'none', 'nothing', 'one', 'several', 'some', 'somebody', 'someone', 'something', 'this', 'that', 'these', 'those'] # Removing Wh Question wh_q='' for ser in text: inflag = 0 for w in wh: if w == ser: inflag = 1 if inflag == 0: wh_q = wh_q + ser + ' ' # Removing Prepostion wh_q = wh_q.split() prep_q = '' for ser in wh_q: inflag = 0 for prep in preposition: if ser == prep: inflag = 1 if inflag == 0: prep_q = prep_q + ser + ' ' # Removing Pronoun prep_q = prep_q.split() pro_q = '' for ser in prep_q: inflag = 0 for pro in pronoun: if ser == pro: inflag = 1 if inflag == 0: pro_q = pro_q + ser + ' ' text = pro_q text = text.split() data = pro_q.strip() ''' flag = 0 answer = 0 wikiflag = 0 ans = 0 asser = 0 nege = 0 posi = 0 #Assertive Section for ser in text: for ass in assertor: if ser == ass and flag == 0 or data.find(ass) != -1 and flag == 0: asser = 1 flash('Assertive', 'Answer') flag=1 if asser == 1: display_ans = '' max_value = int(max_check * 0.8 + 0.5) # counting the no of hits abc = models.Positive.query.all() for a in abc: # Noun tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) ent = re.findall(r'NE\s(.*?)/', str(name)) chunkGram = r"""Noun: {<NN\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) NNnoun = chunkParser.parse(p) db_noun = re.findall(r'Noun\s(.*?)/', str(NNnoun)) # Verbs tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Verb: {<VB\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) VBverb = chunkParser.parse(p) db_verb = re.findall(r'Verb\s(.*?)/', str(VBverb)) # Adjective tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Verb: {<JJ\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) JJAdj = chunkParser.parse(p) db_adj = re.findall(r'Verb\s(.*?)/', str(JJAdj)) # Number tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Number: {<CD\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) CDNumber = chunkParser.parse(p) db_number = re.findall(r'Number\s(.*?)/', str(CDNumber)) db_total = db_noun + db_adj + db_verb + db_number db_total = list(set(db_total)) count = 0 for ip in ip_total: for dbs in db_total: db_plural = re.escape(dbs) + 's?' ip_plural = re.escape(ip) + 's?' if re.match(db_plural, ip,flags=0|re.IGNORECASE): count = count + 1 if re.match(ip_plural,dbs,flags=0|re.IGNORECASE): count = count + 1 if ip == dbs: count = count - 1 if max_value < count: display_ans = a.answer max_value = count if display_ans == '': answer = 0 else: answer = 1 if answer == 0: flash('Answer not in database... Lets search Wikipedia Database', 'Answer') wikiflag = 1 else: extra = 'Please be more sure about the problem you are facing so that we can provide you with precise answers. According to me the most relevant solution to your problem is: ' display_ans = extra + '\n' + display_ans flash(display_ans, 'Answer') """for a in abc: if (data.find(a.question.lower()) != -1 or a.question.lower().find(data) != -1) and len(data) >= 4: ans = 1 break if ans == 0: answer = 0 else: answer = 1 if answer == 0: flash('Answer not in database... Lets search Wikipedia Database', 'Answer') wikiflag = 1 #return redirect ('http://www.lmgtfy.com/?q=' + data) else: finalans=a.answer flash(a.answer, 'Answer')""" #Negative Section if asser != 1: for ser in text: for neg in negator: if ser == neg and flag == 0 or data.find(neg) != -1 and flag == 0: nege = 1 flash('Negative', 'Answer') flag = 1 if nege == 1: display_ans = '' max_value = int(max_check * 0.8 + 0.5) # counting the no of hits abc = models.Negative.query.all() for a in abc: # Noun tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) ent = re.findall(r'NE\s(.*?)/', str(name)) chunkGram = r"""Noun: {<NN\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) NNnoun = chunkParser.parse(p) db_noun = re.findall(r'Noun\s(.*?)/', str(NNnoun)) # Verbs tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Verb: {<VB\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) VBverb = chunkParser.parse(p) db_verb = re.findall(r'Verb\s(.*?)/', str(VBverb)) # Adjective tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Verb: {<JJ\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) JJAdj = chunkParser.parse(p) db_adj = re.findall(r'Verb\s(.*?)/', str(JJAdj)) # Number tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Number: {<CD\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) CDNumber = chunkParser.parse(p) db_number = re.findall(r'Number\s(.*?)/', str(CDNumber)) db_total = db_noun + db_adj + db_verb + db_number db_total = list(set(db_total)) count = 0 for ip in ip_total: for dbs in db_total: db_plural = re.escape(dbs) + 's?' ip_plural = re.escape(ip) + 's?' if re.match(db_plural, ip,flags=0|re.IGNORECASE): count = count + 1 if re.match(ip_plural,dbs,flags=0|re.IGNORECASE): count = count + 1 if ip == dbs: count = count - 1 if max_value < count: display_ans = a.answer max_value = count if display_ans == '': answer = 0 else: answer = 1 if answer == 0: flash('Answer not in database... Lets search Wikipedia Database', 'Answer') wikiflag = 1 else: flash(display_ans, 'Answer') """for a in abc: if (data.find(a.question.lower()) != -1 or a.question.lower().find(data) != -1) and len(data) >= 4: ans = 1 break if ans == 0: answer = 0 else: answer = 1 if answer == 0: flash('Answer not in database... Lets search Wikipedia Database', 'Answer') wikiflag = 1 #return redirect ('http://www.lmgtfy.com/?q=' + data) else: finalans=a.answer flash(a.answer, 'Answer')""" #Postive Section if asser != 1 and nege != 1: if flag == 0: data = form.openid.data.lower() flash('Positive', 'Answer') flag = 1 display_ans = '' max_value = int(max_check * 0.8 + 0.5) # counting the no of hits abc = models.Positive.query.all() for a in abc: # Noun tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) ent = re.findall(r'NE\s(.*?)/', str(name)) chunkGram = r"""Noun: {<NN\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) NNnoun = chunkParser.parse(p) db_noun = re.findall(r'Noun\s(.*?)/', str(NNnoun)) # Verbs tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Verb: {<VB\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) VBverb = chunkParser.parse(p) db_verb = re.findall(r'Verb\s(.*?)/', str(VBverb)) # Adjective tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Verb: {<JJ\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) JJAdj = chunkParser.parse(p) db_adj = re.findall(r'Verb\s(.*?)/', str(JJAdj)) # Number tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Number: {<CD\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) CDNumber = chunkParser.parse(p) db_number = re.findall(r'Number\s(.*?)/', str(CDNumber)) db_total = db_noun + db_adj + db_verb + db_number db_total = list(set(db_total)) count = 0 for ip in ip_total: for dbs in db_total: db_plural = re.escape(dbs) + 's?' ip_plural = re.escape(ip) + 's?' if re.match(db_plural, ip,flags=0|re.IGNORECASE): count = count + 1 if re.match(ip_plural,dbs,flags=0|re.IGNORECASE): count = count + 1 if ip == dbs: count = count - 1 if max_value < count: display_ans = a.answer max_value = count if display_ans == '': answer = 0 else: answer = 1 if answer == 0: flash('Answer not in database... Lets search Wikipedia Database', 'Answer') wikiflag = 1 else: flash(display_ans, 'Answer') """abc = models.Positive.query.all() for a in abc: if (data.find(a.question.lower()) != -1 or a.question.lower().find(data) != -1) and len(data) >= 4: ans = 1 break if ans == 0: answer = 0 else: answer = 1 if answer == 0: flash('Answer not in database... Lets search Wikipedia Database', 'Answer') wikiflag = 1 #return redirect ('http://www.lmgtfy.com/?q=' + data) else: finalans=a.answer flash(a.answer, 'Answer')""" #Wiki Section ans = 0 if wikiflag == 1: display_ans = '' max_value = int(max_check * 0.8 + 0.5) # counting the no of hits abc = models.Wikipedia.query.all() for a in abc: # Noun tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) ent = re.findall(r'NE\s(.*?)/', str(name)) chunkGram = r"""Noun: {<NN\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) NNnoun = chunkParser.parse(p) db_noun = re.findall(r'Noun\s(.*?)/', str(NNnoun)) # Verbs tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Verb: {<VB\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) VBverb = chunkParser.parse(p) db_verb = re.findall(r'Verb\s(.*?)/', str(VBverb)) # Adjective tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Verb: {<JJ\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) JJAdj = chunkParser.parse(p) db_adj = re.findall(r'Verb\s(.*?)/', str(JJAdj)) # Number tokenized = nltk.word_tokenize(a.question) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Number: {<CD\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) CDNumber = chunkParser.parse(p) db_number = re.findall(r'Number\s(.*?)/', str(CDNumber)) db_total = db_noun + db_adj + db_verb + db_number db_total = list(set(db_total)) count = 0 for ip in ip_total: for dbs in db_total: db_plural = re.escape(dbs) + 's?' ip_plural = re.escape(ip) + 's?' if re.match(db_plural, ip,flags=0|re.IGNORECASE): count = count + 1 if re.match(ip_plural,dbs,flags=0|re.IGNORECASE): count = count + 1 if ip == dbs: count = count - 1 if max_value < count: display_ans = a.answer max_value = count if display_ans == '': answer = 0 else: answer = 1 """abc = models.Wikipedia.query.all() for a in abc: if (data.find(a.question.lower()) != -1 or a.question.lower().find(data) != -1) and len(data) >= 4: ans = 1 break if ans == 0: answer = 0 else: answer = 1""" if answer == 0: flash('Answer not in Wikipedia database... Lets search Wikipedia Internet', 'Answer') ny = wikipedia.search(data) if ny == []: return redirect ('http://www.lmgtfy.com/?q=' + data1) else: try: ny1 = wikipedia.summary(data1, chars=0, auto_suggest=True, redirect=True, sentences=3) max_value = int(max_check * 0.8 + 0.5) ip_wiki = ny1.encode('ascii','ignore') # Noun tokenized = nltk.word_tokenize(ip_wiki) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) ent = re.findall(r'NE\s(.*?)/', str(name)) chunkGram = r"""Noun: {<NN\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) NNnoun = chunkParser.parse(p) db_noun = re.findall(r'Noun\s(.*?)/', str(NNnoun)) # Verbs tokenized = nltk.word_tokenize(ip_wiki) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Verb: {<VB\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) VBverb = chunkParser.parse(p) db_verb = re.findall(r'Verb\s(.*?)/', str(VBverb)) # Adjective tokenized = nltk.word_tokenize(ip_wiki) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Verb: {<JJ\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) JJAdj = chunkParser.parse(p) db_adj = re.findall(r'Verb\s(.*?)/', str(JJAdj)) # Number tokenized = nltk.word_tokenize(ip_wiki) p = nltk.pos_tag(tokenized) name = nltk.ne_chunk(p, binary=True) chunkGram = r"""Number: {<CD\w?>} """ chunkParser = nltk.RegexpParser(chunkGram) CDNumber = chunkParser.parse(p) db_number = re.findall(r'Number\s(.*?)/', str(CDNumber)) db_total = db_noun + db_adj + db_verb + db_number db_total = list(set(db_total)) count = 0 for ip in ip_total: for dbs in db_total: db_plural = re.escape(dbs) + 's?' ip_plural = re.escape(ip) + 's?' if re.match(db_plural, ip,flags=0|re.IGNORECASE): count = count + 1 if re.match(ip_plural,dbs,flags=0|re.IGNORECASE): count = count + 1 if ip == dbs: count = count - 1 if max_value <= count: display_ans = ny1 if display_ans == '': answer = 0 else: answer = 1 if answer == 0: flash('Answer not precise in wikipedia Interet', 'Answer') flash(ny1, 'Answer') wikiflag = 1 else: display_ans=ny1 flash(ny1, 'Answer') ny2 = wikipedia.page(data1) flash('Source: '+ ny2.url, 'Answer') #u = models.Wikipedia(question=data1, answer=ny1) #db.session.add(u) #db.session.commit() except Exception as inst: flash('Your question is either out of scope of very trival for me to answer ' + str(inst), 'Answer') display_ans = 'Your question is either out of scope of very trival for me to answer' else: flash(display_ans, 'Answer') #s = models.Chats.query.all() #for chat in reversed(s): #flash('Question: ' + chat.question, 'Display') #flash('Answer: ' + chat.answer , 'Display') #flash('.', 'Display') #u = models.Chats(question=data1, answer=display_ans) #db.session.add(u) #db.session.commit() return redirect('/test') return render_template("index2.html", title = 'ChatterBot', form = form)
def test_redirect_false(self): """Test that page raises an error on a redirect when redirect == False.""" mp = lambda: wikipedia.page( "Menlo Park, New Jersey", auto_suggest=False, redirect=False) self.assertRaises(wikipedia.RedirectError, mp)
def test_something_else(self): print wikipedia.page('Menlo Park, New Jersey', auto_suggest=False)
if isinstance(claims, dict) : for super_type in claims.get('P31',[]): super_type_value = super_type['mainsnak']['datavalue']['value']['numeric-id'] if super_type_value in (571,191067,35760,7725634): return True else: item_value= u'Q{}'.format(super_type_value) item_label = dictionary_lookup(item_value) else: print u'non-dict claims in {}'.format(wd_id) except ValueError: return False pgmatch=re.compile(r"gutenberg.org/(etext|ebooks|files)/(\d+)") # this file will contain candidate pages. some of these will not be exactly what we want. fname='./metadata/pg-wikipedia-candidates.txt' with open(fname,'w') as f: for result in embeds('Gutenberg', results=10000): wd_id = get_wikidata_id(result['title']) if is_book(wd_id): pg_ids=[] page = wikipedia.page(pageid=result['pageid']) for match in pgmatch.findall(page.html()): if match[1] not in pg_ids: pg_ids.append( match[1]) line = u'{}\t{}\t{}'.format(result['title'],wd_id, int(match[1])) print line f.write(line.encode('UTF-8')) f.write('\r')
def geo(lat: float, lon: float) -> str: wikipedia.set_lang('en') return json.dumps( filterResult( wikipedia.page( wikipedia.geosearch(lat, lon))))
def test_something(self): print wikipedia.page('Carl D. Anderson', auto_suggest=False)