def get(self): wikipedia.set_lang(u'ru') try: page = wikipedia.page(u'Проект:Города_России/Списки_улиц/Казани') streets = [] for link in page.links: nlink = unicode(link).encode('utf-8').strip().decode('utf-8') norm_name = normalize(nlink) try: street_info = StreetInfo.get_by_norm_name(norm_name) if not street_info: street_info = StreetInfo() street_page = wikipedia.page(nlink) street_info.name = nlink street_info.norm_name = norm_name street_info.info = unicode(street_page.summary).encode('utf-8').strip() street_info.images = [Image(url=x) for x in street_page.images] street_info.city = u'Казань'.encode('utf-8').strip() street_info.lang=u'ru'.encode('utf-8').strip() street_info.put() except Exception, e: print nlink.encode('utf-8') except DeadlineExceededError: pass self.response.headers['Content-Type'] = "text/html; charset=utf-8" self.response.write(json.dumps({'success':True}))
def test_change_through_multiple_langs(self): # set language to Swedish wikipedia.set_lang("sv") self.assertEqual(wikipedia.API_URL, 'http://sv.wikipedia.org/w/api.php') # switch language to German wikipedia.set_lang("de") self.assertEqual(wikipedia.API_URL, 'http://de.wikipedia.org/w/api.php') # switch language to Russian wikipedia.set_lang("ru") self.assertEqual(wikipedia.API_URL, 'http://ru.wikipedia.org/w/api.php') # switch language to Polish wikipedia.set_lang("pl") self.assertEqual(wikipedia.API_URL, 'http://pl.wikipedia.org/w/api.php') # switch language to Portuguese wikipedia.set_lang("pt") self.assertEqual(wikipedia.API_URL, 'http://pt.wikipedia.org/w/api.php')
def searchIntent(self, session: DialogSession): search = self._extractSearchWord(session) if not search: self._whatToSearch(session, 'whatToSearch') return wikipedia.set_lang(self.LanguageManager.activeLanguage) try: with Online(): result = wikipedia.summary(search, sentences=3) except OfflineError: self.endDialog(sessionId=session.sessionId, text=self.randomTalk('offline', skill='system')) except wikipedia.DisambiguationError as e: self.logWarning(msg=e) self._whatToSearch(session, 'ambiguous') except wikipedia.WikipediaException as e: self.logWarning(msg=e) self._whatToSearch(session, 'noMatch') except Exception as e: self.logWarning(msg=e, printStack=True) else: if not result: self._whatToSearch(session, 'noMatch') else: self.endDialog(sessionId=session.sessionId, text=result)
def title(query: str) -> str: wikipedia.set_lang('en') search = wikipedia.search(query)[0] return json.dumps( filterResult( wikipedia.page( search['title'] )))
def test_zh(self): wikipedia.set_lang('zh') wikipedia.set_proxy({ 'http': 'http://localhost:1080', 'https': 'https://localhost:1080' }) wikipedia.set_request_lang('zh-CN,zh;q=0.9,en;q=0.8,da;q=0.7') print(wikipedia.summary('流行性感冒'))
def import_images(): image_collection.remove(source='wiki') wikipedia.set_lang('ru') root_page = wikipedia.page('Экспонаты эрмитажа') for link in root_page.links: import_images_from_page(link)
def pageid(query: str) -> str: print(query) wikipedia.set_lang('en') search = wikipedia.search(query)[0] return json.dumps( filterResult( wikipedia.page( None, search['pageid'] )))
def __init__(self, parent=None): super(SoundThread, self).__init__(parent) self.name = "" home = expanduser("~") self.filename = home + "/Curie.mp3" wikipedia.set_lang("es") self.player = QMediaPlayer(None, QMediaPlayer.StreamPlayback) media = QMediaContent(QUrl.fromLocalFile(self.filename)) self.player.setMedia(media)
def test_set_lang_then_back_to_eng(self): # set language to Spanish wikipedia.set_lang("es") self.assertEqual(wikipedia.API_URL, 'http://es.wikipedia.org/w/api.php') # revert language back to English wikipedia.set_lang("en") self.assertEqual(wikipedia.API_URL, 'http://en.wikipedia.org/w/api.php')
def main(): choice = '' known_language = '' while choice.lower() not in ['y', 'yes', 'n', 'no']: choice = raw_input('Default language - English. You wanna to change this? [y/n]: ') if choice.lower() in ['yes', 'y']: known_language = raw_input('You know your language abbreviation? [y/n]: ') if known_language.lower() in ['no', 'n']: lang_abbreviation = raw_input('Enter a native language naming: ') find_lang(lang_abbreviation) wikipedia.set_lang(raw_input('Enter your choice: ')) name = raw_input('Page for search: ') MyPrettyPrinter().pprint(wikipedia.summary(name))
def searchIntent(self, session: DialogSession, **_kwargs): search = self._extractSearchWord(session) if not search: self._whatToSearch(session, 'whatToSearch') return wikipedia.set_lang(self.LanguageManager.activeLanguage) result = wikipedia.summary(search, sentences=3) if not result: self._whatToSearch(session, 'noMatch') else: self.endDialog(sessionId=session.sessionId, text=result)
def get(self): wikipedia.set_lang(u"ru") page = wikipedia.page(u"Проект:Города_России/Списки_улиц/Казани") streets = [] for link in page.links: nlink = unicode(link).encode("utf-8").strip() try: street_page = wikipedia.page(nlink) streets.append( {"name": nlink, "info": street_page.summary, "images": street_page.images, "city": u"Казань"} ) except Exception, e: print nlink
def get(self): self.response.headers["Content-Type"] = "text/html; charset=utf-8" wikipedia.set_lang(u"ru") page = wikipedia.page(u"Проект:Города_России/Списки_улиц/Казани") text = page.content alphabet = re.split("\n\n\n==\s*...\s*==\n", text) byline = [] for line in alphabet: byline.append(re.split("\n", line)) byline.remove(byline[0]) # byline[0] - names with number # byline[1..]- names with corresponding letter for line in byline[0]: self.response.write(json.dumps(line))
def update_on_priv_msg(self, data, connection): if data['message'].find('.w ') == -1: return i18n_server = i18n() w = wikipedia.set_lang(i18n_server.get_text('wiki_lang', lang=self.config.lang)) q = data['message'].split(' ') query = '' for word in q: if word.strip() != '.w': query += word + ' ' w = wikipedia.search(query) if w.__len__() == 0: # TODO BUG BELOW, ERROR MESSAGE NOT SHOWN! connection.send_back(data['nick'] + ', ' + i18n_server.get_text('wiki_fail', lang=self.config.lang), data) return try: page = wikipedia.WikipediaPage(w.pop(0)) except wikipedia.DisambiguationError as error: print('disambiguation page') page = wikipedia.WikipediaPage(error.args[1][0]) connection.send_back(data['nick'] + ' ' + page.url, data) index = 51 + page.summary[50:350].rfind('. ') if index == 50 or index > 230: index = page.summary[0:350].rfind(' ') connection.send_back(page.summary[0:index], data) else: connection.send_back(page.summary[0:index], data)
def update_on_priv_msg(self, data): if data['message'].find('.w ') == -1: return i18n_server = i18n() w = wikipedia.set_lang(i18n_server.get_text('wiki_lang')) q = data['message'].split(' ') query = '' for word in q: if word.strip() != '.w': query += word + ' ' w = wikipedia.search(query) if w.__len__() == 0: Connection.singleton().send_back(data['nick'] + ', ' + i18n_server.get_text('wiki_fail'), data) return try: page = wikipedia.WikipediaPage(w.pop(0)) except wikipedia.DisambiguationError as error: print('disambiguation page') page = wikipedia.WikipediaPage(error.args[1][0]) Connection.singleton().send_back(data['nick'] + ' ' + page.url, data) index = 51 + page.summary[50:230].find('. ') if index == -1 or index > 230: Connection.singleton().send_back(page.summary[0:230], data) else: Connection.singleton().send_back(page.summary[0:index], data)
def extract_actor_from_wikipedia(lastname, firstname): wikipedia.set_lang("fr") searchs = wikipedia.search(lastname + " " + firstname) for search in searchs: page = wikipedia.page(search) rc = {"links": list({"title": "wikipedia", "url": page.url})} if lastname in page.title and firstname in page.title: rc = dict({"links": [], "name": firstname + " " + lastname}) for img in page.images: if img.endswith(".jpg"): rc["photo"] = img save_domains = [ "unifrance.org", "www.lefilmfrancais", "www.allocine.fr", "catalogue.bnf.fr", "www.allmovie.com" ] libs = [ "UniFrance", "Le Film Francais", "Allocine", "La BNF", "All movie" ] try: for ref in page.references: domain = urlparse(ref).netloc try: idx = save_domains.index(domain) rc["links"].append({"title": libs[idx], "url": ref}) except: pass except: pass html: wikipedia.BeautifulSoup = wikipedia.BeautifulSoup( page.html(), "html5lib") #Recherche de la section des films # for link in html.findAll('a', attrs={'href': wikipedia.re.compile("^http://")}): # if "film" in link.text: # pass rc["summary"] = page.summary rc["title"] = page.title rc["url"] = page.url return rc return None
def searchIntent(self, session: DialogSession): search = self._extractSearchWord(session) if not search: self._whatToSearch(session, 'whatToSearch') return wikipedia.set_lang(self.LanguageManager.activeLanguage) try: result = wikipedia.summary(search, sentences=3) except wikipedia.DisambiguationError as e: self.logWarning(msg='Ambiguous result') self._whatToSearch(session, 'ambiguous') except wikipedia.WikipediaException as e: self.logWarning(msg='No match') self._whatToSearch(session, 'noMatch') except Exception as e: self.logWarning(msg=str(e), printStack=True) else: if not result: self._whatToSearch(session, 'noMatch') else: self.endDialog(sessionId=session.sessionId, text=result)
def update_on_priv_msg(self, data): if data['message'].find('.w ') == -1: return i18n_server = i18n() w = wikipedia.set_lang(i18n_server.get_text('wiki_lang')) q = data['message'].split(' ') query = '' for word in q: if word.strip() != '.w': query += word + ' ' w = wikipedia.search(query) if w.__len__() == 0: Connection.singleton().send_channel(data['nick'] + ', ' + i18n_server.get_text('wiki_fail')) return page = wikipedia.WikipediaPage(w.pop(0)) Connection.singleton().send_channel(data['nick'] + ' ' + page.url) Connection.singleton().send_channel(page.summary[0:230])
def test_lang(self): ''' ensure the url gets updated correctly ''' wikipedia.set_lang("fr") self.assertEqual(wikipedia.WIKIPEDIA_GLOBALS['API_URL'], 'http://fr.wikipedia.org/w/api.php')
def geo(lat: float, lon: float) -> str: wikipedia.set_lang('en') return json.dumps( filterResult( wikipedia.page( wikipedia.geosearch(lat, lon))))
def searchIntent(self, intent: str, session: DialogSession) -> bool: slots = session.slots sessionId = session.sessionId customData = session.customData search = customData.get('userInput', slots.get('what')) if not search: self.continueDialog(sessionId=sessionId, text=self.randomTalk('whatToSearch'), intentFilter=[self._INTENT_USER_ANSWER], previousIntent=self._INTENT_SEARCH, customData={ 'module': self.name, }) return True wikipedia.set_lang(self.LanguageManager.activeLanguage) engine = customData.get('engine', 'wikipedia') try: if engine == 'wikipedia': result = wikipedia.summary(search, sentences=3) else: result = wikipedia.summary(search, sentences=3) if result: self.endDialog(sessionId=sessionId, text=result) else: self.continueDialog( sessionId=sessionId, text=self.TalkManager.randomTalk('noMatch').format(search), intentFilter=[self._INTENT_USER_ANSWER], previousIntent=self._INTENT_SEARCH, customData={ 'module': self.name, 'engine': engine }) except wikipedia.DisambiguationError: self.continueDialog( sessionId=sessionId, text=self.TalkManager.randomTalk('ambiguous').format(search), intentFilter=[self._INTENT_USER_ANSWER], previousIntent=self._INTENT_SEARCH, customData={ 'module': self.name, 'engine': engine }) except wikipedia.WikipediaException: self.continueDialog( sessionId=sessionId, text=self.TalkManager.randomTalk('noMatch').format(search), intentFilter=[self._INTENT_USER_ANSWER], previousIntent=self._INTENT_SEARCH, customData={ 'module': self.name, 'engine': engine }) except Exception as e: self._logger.error(f'Error: {e}') self.endDialog(sessionId=sessionId, text=self.TalkManager.randomTalk('error', module='system')) return True
def main(): kalk_mode = False aut_reg_mode = False quest_mode = False weather_mode = False wiki_mode = False quest_stage = 0 answer_quest_continue = False wikipedia.set_lang('ru') for event in longpoll.listen(): if event.type == VkEventType.MESSAGE_NEW: if event.to_me: request = event.text.lower() if request in ['stop', 'стоп']: # остановить все режимы kalk_mode = False aut_reg_mode = False quest_mode = False weather_mode = False wiki_mode = False write_msg(event.user_id, 'Все режимы остановленны.', create_empty_keyboard()) elif request in ["что ты можешь", 'помощь', 'help']: write_msg(event.user_id, "Вот, что я могу:", create_help_keyboard()) elif request in ['калькулятор', 'kalk'] and not wiki_mode: # Калькулятор kalk_mode = not kalk_mode if kalk_mode: aut_reg_mode = False quest_mode = False weather_mode = False wiki_mode = False write_msg(event.user_id, 'Режим калькулятор запущен.', create_empty_keyboard()) write_msg(event.user_id, "Введите выражение. Используйте знаки из допустимого списка: " "+, -, *, /, **, (, ), %, //. Для остановки режима введите 'stop', 'стоп' " "или слово, использованное для старта.", create_empty_keyboard()) else: write_msg(event.user_id, 'Режим калькулятор остановлен.', create_empty_keyboard()) elif request in ['регион', 'aut_reg', 'номер'] and not wiki_mode: # Регион регистрации автомобиля aut_reg_mode = not aut_reg_mode if aut_reg_mode: kalk_mode = False quest_mode = False weather_mode = False wiki_mode = False write_msg(event.user_id, "Режим поиска региона регистрации автомобиля запущен. " "Для остановки режима введите 'stop', 'стоп' или слово, использованное для старта.", create_empty_keyboard()) write_msg(event.user_id, "Введите номер региона. Исспользуйте только цифры.", create_empty_keyboard()) else: write_msg(event.user_id, 'Режим поиска региона регистрации автомобиля остановлен.', create_empty_keyboard()) elif request in ['квест', 'quest', 'текстовый квест'] and not wiki_mode: # текстовый квест quest_mode = not quest_mode if quest_mode: kalk_mode = False aut_reg_mode = False weather_mode = False wiki_mode = False write_msg(event.user_id, "Режим текстового квеста запущен. Для остановки режима введите " "'stop', 'стоп' или слово, использованное для старта.", create_empty_keyboard()) if quest_stage != 0: write_msg(event.user_id, 'Хотите продолжить с момента, на котором остановились?', create_answer_keyboard('Да', 'Нет')) answer_quest_continue = True continue quest(event.user_id, quest_stage) else: write_msg(event.user_id, 'Режим текстового квеста остановлен.', create_empty_keyboard()) elif request in ['погода', 'weather'] and not wiki_mode: # погода weather_mode = not weather_mode if weather_mode: kalk_mode = False aut_reg_mode = False quest_mode = False wiki_mode = False write_msg(event.user_id, "Режим погода запущен. " "Для остановки режима введите 'stop', 'стоп' или слово, использованное для старта.", create_empty_keyboard()) write_msg(event.user_id, "Введите название города, погоду в котором вы хотите узнать.", create_empty_keyboard()) else: write_msg(event.user_id, 'Режим погода остановлен.', create_empty_keyboard()) elif request == 'wiki' or request == 'поиск': # поиск в wikipedia wiki_mode = not wiki_mode if wiki_mode: kalk_mode = False aut_reg_mode = False quest_mode = False weather_mode = False write_msg(event.user_id, "Режим wikipedia запущен. " "Для остановки режима введите 'stop', 'стоп' или слово, использованное для старта.", create_empty_keyboard()) write_msg(event.user_id, "Введите ваш запрос:", create_empty_keyboard()) else: write_msg(event.user_id, 'Режим wikipedia остановлен.', create_empty_keyboard()) elif kalk_mode: kalk(event.user_id, request) elif aut_reg_mode: find_aut_reg(event.user_id, request) elif quest_mode: if answer_quest_continue is True: answer_quest_continue = False if request == 'да': quest(event.user_id, quest_stage) elif request == 'нет': quest_stage = 0 quest(event.user_id, quest_stage) elif request == 'пойти в дверь №1' and quest_stage == 0: quest_stage = 1 quest(event.user_id, quest_stage) elif request == 'пойти в дверь №2' and quest_stage == 0: quest_stage = 2 quest(event.user_id, quest_stage) elif request == 'выпить из пузырька' and quest_stage == 1: quest_stage = 5 quest(event.user_id, quest_stage) elif request == 'попробовать протиснуться в дверцу' and quest_stage == 1: quest_stage = 6 quest(event.user_id, quest_stage) elif request == 'нырнуть за ключом' and quest_stage == 2: quest_stage = 7 quest(event.user_id, quest_stage) elif request == 'попробовать открыть дверь' and quest_stage == 2: quest_stage = 8 quest(event.user_id, quest_stage) elif request == 'начать с начала': quest_stage = 0 quest(event.user_id, quest_stage) elif request == 'закончить': quest_stage = 0 quest_mode = False write_msg(event.user_id, 'Режим текстового квеста остановлен.', create_empty_keyboard()) elif weather_mode: weather(event.user_id, request) elif wiki_mode: wiki_search(event.user_id, request) # общение с пользователем elif request == "привет" or request == 'hello': write_msg(event.user_id, "Здравствуйте.", create_empty_keyboard()) elif request == "пока" or request == 'goodbye': write_msg(event.user_id, "До свидания.", create_empty_keyboard()) else: write_msg(event.user_id, "Не понимаю вашего ответа... Если нужна помощь введите 'help' " "или 'помощь'.", create_quest_keyboard('Help'))
class WikipediaScraping: """ :Date: 2018-02-16 :Version: 1.2 :Author: Edwin Puertas - Pontificia Universidad Javeriana :Copyright: Por definir :Organization: Centro de Excelencia y Apropiación de Big Data y Data Analytics - CAOBA This class extracts wikipedia content. """ wikipedia.set_lang('es') def __init__(self): """ :Date: 2017-02-08 :Author: Edwin Puertas - Pontificia Universidad Javeriana :Copyright: Por definir :Organization: Centro de Excelencia y Apropiación de Big Data y Data Analytics - CAOBA This function return inicialized wikipedia object article :return: Wikipedia object """ self.support = Support() self.media_wiki = wikipediaapi.Wikipedia('es') print('WikipediaScraping') def search_articles(self, list_word): art_list = [] try: for word in list_word: art_list += wikipedia.search(word) art_list = self.support.normalized_list(art_list) return art_list except: Logging.write_standard_error(sys.exc_info()) def get_content(self, word): try: wiki = wikipediaapi.Wikipedia( language='es', extract_format=wikipediaapi.ExtractFormat.WIKI) article = wiki.page(word) list_tmp = [ 'Historia', 'Véase también', 'Referencias', 'Enlaces externos' ] text = '' list_subtitle = [] if article.exists(): text = article.summary for s in article.sections: if not (s.title in list_tmp): text += s.title + '\n' + s.text list_subtitle.append(s.title) text = self.support.clean_text(text) return list_subtitle, text except: Logging.write_standard_error(sys.exc_info()) def get_article(self, word): """ :Date: 2017-05-09 :Version: 1.2 :Author: Edwin Puertas - Pontificia Universidad Javeriana :Organization: Centro de Excelencia y Apropiación de Big Data y Data Analytics - CAOBA This function return wikipedia object article with sets values :param word: Name of the wikipedia article to extract :type word: Text :rtype: Dictionary :return: Dictionary of synonyms """ art = {} result = None try: article = self.media_wiki.page(word) if article.exists(): art['Id'] = article.pageid # Id art['Title'] = article.title #Title art['SubTitle'] = self.get_content(art['Title'])[0] #SubTitle art['Summary'] = article.summary #Summary art['Content'] = self.get_content(art['Title'])[1] #Content art['Links'] = self.normalized_links(article.links) #Links art['Categories'] = self.get_categories( art['Title']) #Categories art['URL'] = article.canonicalurl #URL art['Weight'] = len(art['Links']) # Peso del Artículos art['Frequency'] = Counter(tb(article.summary + art['Content'])) result = art return result except: Logging.write_standard_error(sys.exc_info()) def get_nearby_articles(self, list_articles): """ :Date: 2017-05-09 :Version: 1.2 :Author: Edwin Puertas - Pontificia Universidad Javeriana :Organization: Centro de Excelencia y Apropiación de Big Data y Data Analytics - CAOBA This function return list of wikipedia article nearby to seed words :param list_articles: List of the wikipedia article :type list_articles: List :rtype: Dictionary :return: Dictionary of nearby articles """ try: dic_temp = {} for art in list_articles: key = self.media_wiki.page(art) if key.exists(): dic_temp[str(key.title).lower()] = self.get_categories( key.title) return dic_temp except: Logging.write_standard_error(sys.exc_info()) def get_categories(self, word): """ :Date: 2017-05-09 :Version: 1.2 :Author: Edwin Puertas - Pontificia Universidad Javeriana :Organization: Centro de Excelencia y Apropiación de Big Data y Data Analytics - CAOBA This function return list of wikipedia article nearby to seed words :param word: Name of the wikipedia article to extract :type word: Text :rtype: List :return: List of categories by article """ try: list_categories = self.media_wiki.page(word).categories new_list = [] for item in list_categories: item = str(item).lower() item = self.support.clean_text(item) if not ('wiki' in item) and item != '': item = item[10:len(item)] item = item.strip() new_list.append(item) new_list = self.support.normalized_list(new_list) return new_list except: Logging.write_standard_error(sys.exc_info()) def normalized_links(self, list_links): """ :Date: 2017-05-09 :Version: 1.2 :Author: Edwin Puertas - Pontificia Universidad Javeriana :Organization: Centro de Excelencia y Apropiación de Big Data y Data Analytics - CAOBA This function return list of wikipedia article nearby to seed words :param list_links: list of word :type list: Text :rtype: List :return: List any repeats words """ list_tmp = [] try: for item in list_links: text = self.support.clean_text(item).strip() if text != '': list_tmp.append(text) list_tmp = self.support.normalized_list(list_tmp) return list_tmp except: Logging.write_standard_error(sys.exc_info()) def compare_categories(self, main_categories, second_categories): """ :Date: 2017-05-09 :Version: 1.2 :Author: Edwin Puertas - Pontificia Universidad Javeriana :Organization: Centro de Excelencia y Apropiación de Big Data y Data Analytics - CAOBA This function return True or False if the words in main_categories exist in second_categories. :param main_categories: list of words by main categories :type list: Text :param second_categories: list of words by second categories :type list: Text :rtype: Boolean :return: True or False """ try: val = False list_tmp = [] for i in main_categories: if (i not in list_tmp) and (i in second_categories): list_tmp.append(i) if len(list_tmp) > 0: val = True return val except: Logging.write_standard_error(sys.exc_info()) def creted_corpus_wikipedia(self, seed_words): """ :Date: 2017-05-09 :Version: 1.2 :Author: Edwin Puertas - Pontificia Universidad Javeriana :Organization: Centro de Excelencia y Apropiación de Big Data y Data Analytics - CAOBA This function create a corpus by articles in Wikipedia by seed words. :param seed_words: list of seed words :type list: Text :rtype: XML :return: Corpus of Wikipedia Articles """ parent_categories = [] list_article_invalid = [] try: print('Searching Wikipedia with the seed {0}'.format( str(seed_words))) print('Searching Articles in Wikipedia, wait a moment please.') list_nearby_parent_articles = self.search_articles(seed_words) print('Found articles:{0}'.format(list_nearby_parent_articles)) print('Building main domain region, wait a moment please.') #Saved articles seed words list_articles = [] list_titles = [] for word in seed_words: art = self.get_article(word) if art is not None: list_articles.append({ 'resource': 'Wikipedia', 'title': art['Title'], 'subtitle': art['SubTitle'], 'content': art['Content'] }) list_titles.append(art['Title']) parent_categories += self.get_categories(str(word)) parent_categories = self.support.normalized_list(parent_categories) print('Main domain region {0}'.format(parent_categories)) for art in list_nearby_parent_articles: child = self.get_article(art) child_categories = child['Categories'] if self.compare_categories(parent_categories, child_categories): if (art is not None) and not (child['Title'] in list_titles): print( 'Retrieving Information from [{0}], [{1}]'.format( child['Title'], child['URL'])) list_articles.append({ 'resource': 'Wikipedia', 'title': child['Title'], 'subtitle': child['SubTitle'], 'content': child['Content'] }) list_titles.append(child['Title']) dict_grand_child = {} print('Validating articles children of {0}'.format( str(child['Title']))) print( 'Validating domain region for articles {0}, wait a moment please.' .format(len(child['Links']))) dict_grand_child = self.get_nearby_articles( child['Links']) for k, v in dict_grand_child.items(): grand_child_categories = v if self.compare_categories(parent_categories, grand_child_categories): grand_child = self.get_article(k) if grand_child is not None: if not (grand_child['Title'] in list_titles): print( 'Retrieving Information from [{0}], [{1}]' .format(grand_child['Title'], grand_child['URL'])) list_articles.append({ 'resource': 'Wikipedia', 'title': grand_child['Title'], 'subtitle': grand_child['SubTitle'], 'content': grand_child['Content'] }) list_titles.append( grand_child['Title']) else: print('Article [{0}] excluded!'.format( grand_child['Title'])) if grand_child['Title'] != '': list_article_invalid.append( grand_child['Title']) excluded_items = len(list_article_invalid) recovered_items = len(list_articles) total_items = excluded_items + recovered_items print( '\n# Article excluded: [{0}] \n# Articles recovered: [{1}] \nTotal Articles: [{2}]' .format(excluded_items, recovered_items, total_items)) return list_articles except: Logging.write_standard_error(sys.exc_info())
def test_lang(self): wikipedia.set_lang("fr") self.assertEqual(wikipedia.API_URL, 'https://fr.wikipedia.org/w/api.php')
import vk_api from vk_api.bot_longpoll import VkBotLongPoll, VkBotEventType import random from wikipedia import wikipedia vk_session = vk_api.VkApi(token=TOKEN) longpoll = VkBotLongPoll(vk_session, GROUP_ID) vk = vk_session.get_api() wikipedia.set_lang('ru') def wiki_response(request_text): return str(wikipedia.page(request_text).content[:1000]) def help(): return f"What do you want to ask Wikipedia?" def main(): flag_wiki, flag_help = False, True for event in longpoll.listen(): if event.type == VkBotEventType.MESSAGE_NEW and flag_help: flag_wiki = not flag_wiki flag_help = not flag_help vk.messages.send(user_id=event.obj.message['from_id'], message=help(), random_id=random.randint(0, 2**64))
def test_lang(self): wikipedia.set_lang("fr") self.assertEqual(wikipedia.api_url, 'http://fr.wikipedia.org/w/api.php')
def test_set_lang_it(self): wikipedia.set_lang("zh") self.assertEqual(wikipedia.API_URL, 'http://zh.wikipedia.org/w/api.php')