def index(request): wiki=wikipediaapi.Wikipedia('ko') page_py = wiki.page('파이썬') print("Page - Exists: %s" % page_py.exists()) print("Page - Title: %s" % page_py.title) print("Page - Summary: %s" % page_py.summary[0:100]) wiki = wikipediaapi.Wikipedia( language='ko', extract_format=wikipediaapi.ExtractFormat.WIKI) p_wiki = wiki.page("파이썬") print(p_wiki.text) with open("파이썬.txt", "w") as f: f.write(p_wiki.text) questions = Question.objects.all() context = { 'questions': questions, } return render(request,"app/index.html",context)
def run(self, dispatcher, tracker, domain): # what your action should do #request = requests.get('http://api.icndb.com/jokes/random').json() #make an apie call #joke = request['value']['joke'] #extract a joke from returned json response #tracker clubname = tracker.get_slot("laligaclubs") clubname = clubname.lower() #----- wiki extraction ---- out = "As a bot i am still learning, Can you rephrase you quistion" if clubname is not None: if "club" not in clubname: clubname = clubname + " " + "club" wiki_wiki = wikipediaapi.Wikipedia('en') wiki_wiki = wikipediaapi.Wikipedia('en') page_py = wiki_wiki.page(Wiki.search(clubname)[0]) if page_py.exists(): out = " " out = page_py.summary[0:300].split('/n')[0] # Get the history historysection = page_py.section_by_title("History") out = out + ". REGARDING HISTORY :" + historysection.sections[0].text.split('\n')[0] #--end of wiki extraction--- dispatcher.utter_message(out) #send the message back to ,image_url_club = "alex" the usercluppage.images[0] #dispatcher.utter_template("action_clubhistory",tracker,image_url_club = page_py.images[0] ) return []
def parse_category(self, categories): wiki_car_categories = dict() category_objects = [] for category in categories: for sub_category in categories[category]['category_names']: wiki_wiki = wikipediaapi.Wikipedia(language='de', timeout=None) if "Category" in sub_category: wiki_wiki = wikipediaapi.Wikipedia(language='en', timeout=None) page: WikipediaPage = wiki_wiki.page(sub_category) page_language: WikipediaPage = page.__getattr__('language') page_py_category_members = page.categorymembers if category not in wiki_car_categories and len( page_py_category_members) > 0: wiki_car_categories[category] = dict() wiki_car_categories[category][ page_language] = page_py_category_members if page_language not in wiki_car_categories[category]: wiki_car_categories[category][ page_language] = page_py_category_members elif len(page_py_category_members) > 0: wiki_car_categories[category][page_language].update( page_py_category_members) else: print("Category: {} is empty".format(sub_category)) category_objects.append( CarCategoryObject( category_name_de=categories[category]['de'], category_name_en=categories[category]['en'], category_short_eu=category, )) return wiki_car_categories, category_objects
def SearchInWiki(term): SearchResults=wikipedia.search(term) # search identified Noun in wiki Results=str(SearchResults) ToSearch=SearchResults[0] # if multiple pages like NY city, NY state, NY uni fore Ny, pick first result #print("this can mean "+Results) #print(" I hope you mean to find "+ ToSearch) try: wiki_wiki = wikipediaapi.Wikipedia('en') wiki_html = wikipediaapi.Wikipedia( language='en', extract_format=wikipediaapi.ExtractFormat.WIKI #Using Wki api ) page_py = wiki_wiki.page(ToSearch) if page_py.exists(): #print("Found it") W_summary=wikipedia.summary(ToSearch) FullText=wiki_html.page(ToSearch) #get All text on that page MyText=FullText.text else: print("Could not Wiki page by this title") except wikipedia.exceptions.DisambiguationError: print("Could mean many things, PLease be specific") #handle disambiguation error return MyText
def inlinequery(update, context): """Handle the inline query.""" query = update.inline_query.query output = re.search(r'^[a-zA-Z]+\Z', query) if output: wiki_wiki = wikipediaapi.Wikipedia('en') else: wiki_wiki = wikipediaapi.Wikipedia('fa') page_py = wiki_wiki.page(query) if page_py.exists(): wikimsg = (page_py.fullurl) pagetitle= page_py.title results = [InlineQueryResultArticle( description="Searching for" + " " + query+ " " + "in Wikipedia", id=uuid4(), title=pagetitle, input_message_content=InputTextMessageContent( message_text=wikimsg)) ] update.inline_query.answer(results) else: results = [ InlineQueryResultArticle( id=uuid4(), title="No results", input_message_content=InputTextMessageContent(query) )] update.inline_query.answer(results)
def beginn(data, intents): global lang results = None #load users data into json datas = json.loads(data) #load nlu data into json intents = json.loads(intents) name = getSlotbyName("name", intents) if (name == None): return False lang = intents["lang"] # set lang for wikipedia wiki_wiki = wikipediaapi.Wikipedia(lang) wikipedia.set_lang(lang) page_py = wiki_wiki.page(name) # try to generate answer from wikipedia try: return wikipedia.summary(name, sentences=4).split("\n\n")[0] except: pass # use seconds wikipedia library if first one is not working if (page_py.exists()): sentences = page_py.summary[0:-1].split(". ") answer = "" for x in range(3): try: answer += sentences[x] + ". " except: pass return (answer.split("\n\n")[0]) # try to search on english wikipedia for an answer try: wikipedia.set_lang("en") return wikipedia.summary(name, sentences=4).split("\n\n")[0] except: pass # use seconds wikipedia library if first one is not working if (page_py.exists()): wiki_wiki = wikipediaapi.Wikipedia('en') page_py = wiki_wiki.page(name) sentences = page_py.summary[0:-1].split(". ") answer = "" for x in range(3): try: answer += sentences[x] + ". " except: pass return (answer.split("\n\n")[0]) else: return False return False
def beginn(data, intents): global lang results = None #load users data into json datas = json.loads(data) #load nlu data into json intents = json.loads(intents) name = getSlotbyName("name", intents) if (name == None): return False lang = intents["lang"] wiki_wiki = wikipediaapi.Wikipedia(lang) wikipedia.set_lang(lang) page_py = wiki_wiki.page(name) try: return wikipedia.summary(name, sentences=4).split("\n\n")[0] except: pass if (page_py.exists()): sentences = page_py.summary[0:-1].split(". ") answer = "" for x in range(3): try: answer += sentences[x] + ". " except: pass return (answer.split("\n\n")[0]) try: wikipedia.set_lang("en") return wikipedia.summary(name, sentences=4).split("\n\n")[0] except: pass if (page_py.exists()): wiki_wiki = wikipediaapi.Wikipedia('en') page_py = wiki_wiki.page(name) sentences = page_py.summary[0:-1].split(". ") answer = "" for x in range(3): try: answer += sentences[x] + ". " except: pass return (answer.split("\n\n")[0]) else: return False return False
def echo(update, context): output = re.search(r'^[a-zA-Z]+\Z', update.message.text) if output: wiki_wiki = wikipediaapi.Wikipedia('en') else: wiki_wiki = wikipediaapi.Wikipedia('fa') page_py = wiki_wiki.page(update.message.text) if page_py.exists(): wikimsg = (page_py.fullurl) update.message.reply_text(wikimsg) else: update.message.reply_text("Your search querry had no results.")
def return_article(name): if (isEnglish(name)): wiki_wiki = wikipediaapi.Wikipedia('en') page_py = wiki_wiki.page(name) #check for existing page if (page_py.exists()): # if page exists sections = return_sections(page_py.sections) if ("may refer to:" in page_py.text): # if many many page refers to request print("REFER TO") return ("/refer", sections) # Special cases elif (name == "/start"): #welcome message return ("/start", ["none"]) elif (name == "/lastStatistic"): # get statistic print("Last tatistic case") return ("/lastStatistic", ["none"]) elif (name == "/allStatistic"): # get statistic print("Last tatistic case") return ("/allStatistic", ["none"]) elif (name == "/errorStatistic"): # get statistic print("Last tatistic case") return ("/errorStatistic", ["none"]) else: # page not found return ("/pageNotFound", ["none"]) return (page_py.text, sections) else: #for russian language wiki_wiki = wikipediaapi.Wikipedia('ru') page_py = wiki_wiki.page(name) if (page_py.exists()): # if page exists sections = return_sections(page_py.sections) else: return ("/pageNotFound", ["none"]) # print(page_py.sections[0]) # print(page_py.text[:page_py.text.index("\n")] ) if ("may refer to:" in page_py.text): print("REFER TO") return (page_py.text, sections)
def __init__(self): # select the language and the format self.wiki = wikipediaapi.Wikipedia( language='fr', extract_format=wikipediaapi.ExtractFormat.WIKI) # get the google key self.key = GOOGLE_KEY
class Quiz(): questionCount = 10 subjectArea = "history" ## information sources # wikepedia for simplicity start_urls = ['https://www.wikipedia.org/'] wiki_wiki = wikipediaapi.Wikipedia( language='en', extract_format=wikipediaapi.ExtractFormat.WIKI ) np_arr = [questionCount, 2]; def __init__(self): for j in search(queryGenerator(), tld="co.in", num=questionCount, stop=10, pause=2) return 5 def siteScraper(): return 5 def questionGenerator(): return 5 def queryGenerator(): return "history general patton" if __name__ == "__main__": app = Quiz()
def language_detection(self, question): lang = "en" if (question != ""): lang = detect(question) wikipedia.set_lang(lang) self.wiki = wikipediaapi.Wikipedia(language=lang) return lang
def get_wiki_langs(url): url = urllib.parse.unquote(url) parsed = urllib.parse.urlparse(url) try: lang = parsed.netloc.split(".", maxsplit=1)[0] except (AttributeError, IndexError): log.exception(f"{parsed.netloc} is incorrect.") return None wiki = wikipediaapi.Wikipedia( language=lang, extract_format=wikipediaapi.ExtractFormat.HTML) try: page_name = parsed.path.rsplit("/", maxsplit=1)[-1] except (AttributeError, IndexError): log.exception(f"{parsed.path} is incorrect.") return None page = wiki.page(page_name) my_lang = [ (lang, url), ] try: langlinks = page.langlinks return list( zip(langlinks.keys(), [link.fullurl for link in langlinks.values()])) + my_lang except KeyError as e: log.warning(f"No languages for {url} ({e}).") return my_lang
def get_wiki_info(self, wiki_url): import wikipedia import wikipediaapi import urllib.parse as urlparse wiki_text = "" url_segments = wiki_url.rpartition('/') if "en.wikipedia.org" == url_segments[2]: return wiki_text try: wikipedia.set_lang("en") wikipedia.set_rate_limiting(True, min_wait=datetime.timedelta( 0, 0, 50000)) title_path = url_segments[2] title = urlparse.unquote(title_path) title = title.replace("_", " ") wikiWiki = wikipediaapi.Wikipedia('en') wiki_page = wikiWiki.page(title) #contents += pagePy.summary #wiki_page = wikipedia.page(title) wiki_text = wiki_page.summary except (IndexError, wikipedia.exceptions.WikipediaException): pass finally: return wiki_text
def run(self, dispatcher, tracker, domain): # what your action should do # to be modified by Nltk to get capacity if needed out = "huh! Are you asking about team stadium can you try help me by asking in another way :)" stadium = tracker.get_slot("laligaclubs") stadium = stadium.lower() if stadium is not None: if "stadium" not in stadium: stadiumname = stadium + " stadium" wiki_wiki = wikipediaapi.Wikipedia('en') # prepare out from wiki summary page_py = wiki_wiki.page(Wiki.search(stadium)[0]) if page_py.exists(): out = " " number = random.randint(5890,123123) lstofoption = [". and the size of the stadium is ," + str(number),".the capacity is " + str(number),". stadium can take "+str(number)] secure_random = random.SystemRandom() # to be removed- just for now concat = secure_random.choice(lstofoption) out = page_py.summary[0:300].split('/n')[0] out = out + concat + " Person " dispatcher.utter_message(out) #send the message back to the user #dispatcher.utter_message("Please click this image " + stadiumpage.images[0]) return []
def inspect_wikipedia_tree(language="it", depth=2): wiki_wiki = wikipediaapi.Wikipedia(language) visited_pages = [(INFLUENZA[language], 1)] discovered_pages = {INFLUENZA[language]: 1} while(len(visited_pages) != 0): tmp_page, counter = visited_pages.pop() if counter >= depth: continue if ":" in tmp_page: continue tmp_links = wiki_wiki.page(tmp_page).links for l in tmp_links.keys(): if not l in discovered_pages: if counter < depth: discovered_pages[l] = counter visited_pages.append((l, counter+1)) else: counter_already = discovered_pages[l] if counter_already > counter: discovered_pages[l] = counter visited_pages.append([l, counter]) print(len(discovered_pages), len(visited_pages)) save_obj(discovered_pages, "{}-{}-influenza".format(language, depth)) return discovered_pages
def __init__(self, root_page, max_level, load_from_file, file_name, level_1_categories=None, level_2_categories=None, level_3_categories=None): print("Miner: starts") if load_from_file: print("Miner: loading data from JSON file...") self.data = load_JSON_data(file_name + ".json") else: self.root_page_title = root_page.lower() self.main_topic_categories = wikipediaapi.Wikipedia('en').page( self.root_page_title).categorymembers self.level_1_categories = level_1_categories self.level_2_categories = level_2_categories self.level_3_categories = level_3_categories self.data = None self.__init_data() print("Miner: mining data...") self.mine(max_level=max_level) print("Miner: saving data to JSON file...") save_JSON_data(self.data, file_name + ".json", beautify=True) print("Miner: finished")
def wikipediaContent(self): """ wikipediaContent: Reads the Content of wikipedia page and returns dictionary of title and its content. Input: null Return: content (dict): Dictionary having keys as header on wikipedia page for each section and keys as the content of each section. """ content = {} toSearch=re.sub(".*/", "", self.url) wiki_wiki = wikipediaapi.Wikipedia('en') information = wiki_wiki.page(toSearch) content["Main"] = information.text.split("\n\n")[0] sectionText = information.sections for i in range(len(sectionText)): if (len(sectionText[i].text) > 3) and \ (sectionText[i].title != "References") and \ (sectionText[i].title != "See also") and \ (sectionText[i].title != "External links"): content[sectionText[i].title] = sectionText[i].text return content
def get_target_article(infile, outfile): items = [] with open(infile) as fin: for line in fin: items.append(json.loads(line)) # wiki_en = wikipediaapi.Wikipedia('en') wiki_de = wikipediaapi.Wikipedia('de') with open(outfile, "w") as fout: for item in tqdm(items): # if i < 155: # continue if item["target_title"]: page = wiki_de.page(item["target_title"]) elif item["answers"]: page = wiki_de.page(item["answers"]) item["target_title"] = item["answers"] else: page = DummyPage() if page.exists(): item["target_text"] = page.text item["categories"] = [x.title() for x in page.categories] else: item["target_text"] = "page does not exist" item["categories"] = "page does not exist" json.dump(item, fout, ensure_ascii=False) fout.write("\n") fout.flush()
def open_page(): global supporting_characters, main_characters wiki = wikipediaapi.Wikipedia('en') mutcd = wiki.page('List of Game of Thrones characters') supporting_characters = mutcd.section_by_title( 'Supporting characters').sections main_characters = mutcd.section_by_title('Main characters').sections
def download(directory, url): url = urllib.parse.unquote(url) parsed = urllib.parse.urlparse(url) try: lang = parsed.netloc.split(".", maxsplit=1)[0] except (AttributeError, IndexError): log.exception(f"{parsed.netloc} is incorrect.") return None path = os.path.join(directory, f"{lang}.html") if os.path.exists(path): log.warning(f"{path} already exists.") return None try: page_name = parsed.path.rsplit("/", maxsplit=1)[-1] except (AttributeError, IndexError): log.exception(f"{parsed.path} is incorrect.") return None wiki = wikipediaapi.Wikipedia( language=lang, extract_format=wikipediaapi.ExtractFormat.HTML) page = wiki.page(page_name) text = page.text page_size = len(text) if page_size: os.makedirs(directory, exist_ok=True) text = beautify_page(text, lang) log.info(f"Save to {path} {lang} {page_name} {page_size}.") with open(path, "w") as file: file.write(text) else: log.warning(f"Page {url} is empty. It has not been saved.") return text
def searchmeth(request, query=None): if request.method == 'POST': name = request.POST.get('input_text', '') wiki_wiki = wikipediaapi.Wikipedia('en') page_py = wiki_wiki.page(name) # print("Page - Title: %s" % page_py.title) # Page - Title: Python (programming language) # data = page_py.summary categories = page_py.categories for title in sorted(categories.keys()): print("%s: %s" % (title, categories[title])) # word_counter = {} # for word in categories: # if word in word_counter: # word_counter[word] += 1 # else: # word_counter[word] = 1 # popular_words = sorted(word_counter, key=word_counter.get, reverse=True) # top_3 = popular_words[:3] # print(len(categories)) my_dict = {'insert_me': categories} return render(request, "search/index.html", context=my_dict) else: return HttpResponseRedirect('/index/')
def search_for_definition_on_wikipedia(*args: tuple): """ Поиск в Wikipedia определения с последующим озвучиванием результатов и открытием ссылок :param args: фраза поискового запроса """ if not args[0]: return search_term = " ".join(args[0]) # установка языка (в данном случае используется язык, на котором говорит ассистент) wiki = wikipediaapi.Wikipedia(assistant.speech_language) # поиск страницы по запросу, чтение summary, открытие ссылки на страницу для получения подробной информации wiki_page = wiki.page(search_term) try: if wiki_page.exists(): play_voice_assistant_speech(translator.get("Here is what I found for {} on Wikipedia").format(search_term)) webbrowser.get().open(wiki_page.fullurl) # чтение ассистентом первых двух предложений summary со страницы Wikipedia # (могут быть проблемы с мультиязычностью) play_voice_assistant_speech(wiki_page.summary.split(".")[:2]) else: # открытие ссылки на поисковик в браузере в случае, если на Wikipedia не удалось найти ничего по запросу play_voice_assistant_speech(translator.get( "Can't find {} on Wikipedia. But here is what I found on google").format(search_term)) url = "https://google.com/search?q=" + search_term webbrowser.get().open(url) # поскольку все ошибки предсказать сложно, то будет произведен отлов с последующим выводом без остановки программы except: play_voice_assistant_speech(translator.get("Seems like we have a trouble. See logs for more information")) traceback.print_exc() return
def generate_wiki_links(n=10, start_page='Python', language='ru'): wiki = wikipediaapi.Wikipedia(language) visited_pages = set() new_pages = set() start_page = wiki.page(start_page) assert start_page.exists(), True page = start_page while len(new_pages) < n: links = set(page.links.values()) new_pages.update(links) page = random.sample(new_pages, 1)[0] new_pages = random.sample(new_pages, len(new_pages)) result = [] n_trials = len(new_pages) // n for i in range(n_trials): dask_res = compute(*map(get_wiki_url, new_pages[n * i:n * (i + 1)])) dask_res = list(filter(lambda url: url is not None, dask_res)) result += dask_res if len(result) >= n: break return result[:n]
def check_wiki_page_exst( word): # check if keyphrase exist and return bool condition wiki_wiki = wikipediaapi.Wikipedia('en') page_check = wiki_wiki.page(word) status = page_check.exists() print(f"Page - Exists: {status} - {word}") return status
def wiki(request): wiki_wiki = wikipediaapi.Wikipedia('en') if 'search' in request.GET: search = request.GET['search'] page = wiki_wiki.page(search) links = page.links availableLinks = [] for title in sorted(links.keys()): availableLinks.append(title) if page.exists() == True: context = { "pageExist": "Page Exists", "pageTitle": page.title, "pageSummary": page.summary[0:2000], "pageLinks": availableLinks, "wordValues": list(wordCounter.wordCounter(page.summary[0:2000]).values()), "wordKeys": list(wordCounter.wordCounter(page.summary[0:2000]).keys()), "wordCounter": json.dumps(wordCounter.wordCounter(page.summary[0:2000])), } else: context = {"pageExist": "Invalid Search"} else: context = {"pageExist": "Page Does not Exist"} return render(request, 'api/wiki.html', context)
def wikicategory(interest): symbols = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '(', ')', ','] stoplist = ['of', 'by', 'lists', 'from', 'articles', 'terms'] wiki = wikipediaapi.Wikipedia('en') categorie = [] noise = [] l = wiki.page(interest) a = wiki.categories(l, clshow='!hidden') for k in a.keys(): cat = k.replace("Category:", "") if len(cat.split()) <= 4 and cat != 'Disambiguation pages': categorie.append(cat) for s in symbols: for c in categorie: if s in c.lower(): noise.append(c) for c in categorie: for s in stoplist: if s in c.lower().split(): noise.append(c) noise = list(set(noise)) for n in noise: categorie.remove(n) return categorie
def get_wiki(uni_title): wiki = wikipediaapi.Wikipedia('en') page = wiki.page(titlecase(uni_title)) if page.exists(): return {'url': page.fullurl, 'summary': page.summary[0:1000]} else: return {'summary': "Unable to find wikipedia page "}
def wikisearch(text):#searchkeylis=[tell, me, something ,about ,india] start_time = time.time() key=['what','who','is','tell','me','something','describe','give','some','details','about','when','was','wiki','wikipedia','the','according','to'] searchkeylist=text.split() for x in searchkeylist: #print(x) if x in key: continue else: searchkey=x break print(searchkey) #try: wiki_wiki = wikipediaapi.Wikipedia('en') page_py = wiki_wiki.page(searchkey) webbrowser.open(page_py.fullurl) x = (time.time() - start_time) notification.notify(title="Virtual assistant", message=" we found this in " + str(x) + "seconds", timeout=5) result = wikipedia.summary(searchkey, sentences=2) #print(result) #notification.notify(title="Virtual as # sistant", message=" we found this", timeout=5) writeresults=wikipedia.page(searchkey).content handle=open('scratch.txt',mode='w',encoding='utf-8') handle.write(writeresults) handle.close() root=Tk() buttoncopy=Button(root,text='copy results',command=lambda :os.startfile('scratch.txt')) speakclass.speakmethod(result) buttoncopy.pack() root.mainloop()
def wikifilter(keyword): wiki_wiki = wikipediaapi.Wikipedia('en') candidate = {} for key in keyword.keys(): page_py = wiki_wiki.page(key) if page_py.exists() == True: candidate[key] = keyword[key] elif page_py.exists() == False: singles = singularize(key) page_py = wiki_wiki.page(singles) if page_py.exists() == True: candidate[singles] = keyword[key] # print(candidate) final = {} redirect = {} relation = {} for ca in candidate: query = requests.get( r'https://en.wikipedia.org/w/api.php?action=query&titles={}&&redirects&format=json' .format(ca)) data = json.loads(query.text) PAGES = data["query"]["pages"] for v in PAGES.values(): redirect[ca] = v["title"] relation[v["title"]] = ca final[v["title"]] = 0 for ca in redirect.keys(): final[redirect[ca]] = candidate[ca] # print(final) return relation, final