Beispiel #1
0
def lookup_wiktionary(word):
    logger = logging.getLogger(__name__)
    try:
        wikipedia = MediaWiki()
        wikipedia.set_api_url('https://en.wiktionary.org/w/api.php')
        matches = {}
        search_results = wikipedia.opensearch(word)
        if len(search_results) > 0:
            page_title = search_results[0][0]
            page = wikipedia.page(page_title)
            parts = page.content.split("\n")
            i = 0
            while i < len(parts):
                definition = ""
                part = parts[i].strip()

                if part.startswith("=== Verb ===") or part.startswith(
                        "=== Noun ===") or part.startswith(
                            "=== Adjective ==="):
                    #print(part)
                    # try to skip the first two lines after the marker
                    if (i + 1) < len(parts):
                        definition = parts[i + 1]
                    if (i + 2) < len(parts) and len(parts[i + 2].strip()) > 0:
                        definition = parts[i + 2]
                    if (i + 3) < len(parts) and len(parts[i + 3].strip()) > 0:
                        definition = parts[i + 3]

                if part.startswith(
                        "=== Adjective ===") and not 'adjective' in matches:
                    matches['adjective'] = definition
                if part.startswith("=== Noun ===") and not 'noun' in matches:
                    matches['noun'] = definition
                if part.startswith("=== Verb ===") and not 'verb' in matches:
                    matches['verb'] = definition

                i = i + 1
            final = ""

            # prefer verb, noun then adjective
            if matches.get('adjective', False):
                final = matches.get('adjective')
            if matches.get('noun', False):
                final = matches.get('noun')
            if matches.get('verb', False):
                final = matches.get('verb')
            # strip leading bracket comment
            if final[0] == '(':
                close = final.index(")") + 1
                final = final[close:]
            matches['definition'] = final
        return matches
    except:
        e = sys.exc_info()
        logger.debug(e)
Beispiel #2
0
async def send_to_wikipedia(word, site):
    logger = logging.getLogger(__name__)
    try:
        # lookup in wiktionary and send display message
        wikipedia = MediaWiki()
        wikipedia.set_api_url('https://en.wiktionary.org/w/api.php')
        matches = {}
        search_results = wikipedia.opensearch(word)
        # logger.debug(search_results)

        if len(search_results) > 0:
            page_title = search_results[0][0]
            page_link = search_results[0][2]
            # page = wikipedia.page(page_title)
            # parts = page.content.split("\n")
            # logger.debug([page_title,page_link])
            await publish('hermod/' + site + '/display/show',
                          {'frame': page_link})
    except:
        e = sys.exc_info()
        logger.debug(e)
Beispiel #3
0
"""Named-entity recognition with SpaCy"""

from collections import defaultdict
from sys import argv
import spacy
from mediawiki import MediaWiki
wikipedia = MediaWiki()
wikipedia.set_api_url('https://fr.wikipedia.org/w/api.php')
nlp = spacy.load('fr_core_news_sm')


def mediawiki(ask):
    search = wikipedia.search(ask)
    if len(search) >= 1:
        try:
            print(f'[ {search[0]} ]')
            p = wikipedia.page(str(search[0]))
        except:
            print(f'[ {search[1]} ]')
            p = wikipedia.page(str(search[1]))
        print(p.summary)
        if len(search) > 1:
            print(f'{ask} may refer to : {search}')
    else:
        print(f'There were no results matching with {ask}.')


def ren(input):
    text = open(f"{dataP}{input}").read()[:1000000]
    doc = nlp(text)
    people = defaultdict(int)