def lookup_wiktionary(word): logger = logging.getLogger(__name__) try: wikipedia = MediaWiki() wikipedia.set_api_url('https://en.wiktionary.org/w/api.php') matches = {} search_results = wikipedia.opensearch(word) if len(search_results) > 0: page_title = search_results[0][0] page = wikipedia.page(page_title) parts = page.content.split("\n") i = 0 while i < len(parts): definition = "" part = parts[i].strip() if part.startswith("=== Verb ===") or part.startswith( "=== Noun ===") or part.startswith( "=== Adjective ==="): #print(part) # try to skip the first two lines after the marker if (i + 1) < len(parts): definition = parts[i + 1] if (i + 2) < len(parts) and len(parts[i + 2].strip()) > 0: definition = parts[i + 2] if (i + 3) < len(parts) and len(parts[i + 3].strip()) > 0: definition = parts[i + 3] if part.startswith( "=== Adjective ===") and not 'adjective' in matches: matches['adjective'] = definition if part.startswith("=== Noun ===") and not 'noun' in matches: matches['noun'] = definition if part.startswith("=== Verb ===") and not 'verb' in matches: matches['verb'] = definition i = i + 1 final = "" # prefer verb, noun then adjective if matches.get('adjective', False): final = matches.get('adjective') if matches.get('noun', False): final = matches.get('noun') if matches.get('verb', False): final = matches.get('verb') # strip leading bracket comment if final[0] == '(': close = final.index(")") + 1 final = final[close:] matches['definition'] = final return matches except: e = sys.exc_info() logger.debug(e)
async def send_to_wikipedia(word, site): logger = logging.getLogger(__name__) try: # lookup in wiktionary and send display message wikipedia = MediaWiki() wikipedia.set_api_url('https://en.wiktionary.org/w/api.php') matches = {} search_results = wikipedia.opensearch(word) # logger.debug(search_results) if len(search_results) > 0: page_title = search_results[0][0] page_link = search_results[0][2] # page = wikipedia.page(page_title) # parts = page.content.split("\n") # logger.debug([page_title,page_link]) await publish('hermod/' + site + '/display/show', {'frame': page_link}) except: e = sys.exc_info() logger.debug(e)
"""Named-entity recognition with SpaCy""" from collections import defaultdict from sys import argv import spacy from mediawiki import MediaWiki wikipedia = MediaWiki() wikipedia.set_api_url('https://fr.wikipedia.org/w/api.php') nlp = spacy.load('fr_core_news_sm') def mediawiki(ask): search = wikipedia.search(ask) if len(search) >= 1: try: print(f'[ {search[0]} ]') p = wikipedia.page(str(search[0])) except: print(f'[ {search[1]} ]') p = wikipedia.page(str(search[1])) print(p.summary) if len(search) > 1: print(f'{ask} may refer to : {search}') else: print(f'There were no results matching with {ask}.') def ren(input): text = open(f"{dataP}{input}").read()[:1000000] doc = nlp(text) people = defaultdict(int)