Esempio n. 1
0
def keywordExtraction():                                    # Extract the keyword from user's input

    while True:
        keyword_sentence = raw_input()

        if " is " in keyword_sentence:                       # for questions like "Who is - ?"
            keyword_sentences = keyword_sentence.split("is ")
        elif " are " in keyword_sentence:                    # for questions like "Who are the - ?"
            keyword_sentences = keyword_sentence.split("are ")
        elif " about " in keyword_sentence:                  # for questions like "What do you know about - ?"
            keyword_sentences = keyword_sentence.split("about ")
        elif " in " in keyword_sentence:                     # for sentences like "I'm interested in - "
            keyword_sentences = keyword_sentence.split("in ")
        elif " when " in keyword_sentence:                   # for sentences like "I want to know when - "
            keyword_sentences = keyword_sentence.split("when ")
        elif " where " in keyword_sentence:                  # for sentences like "I want to know where - "
            keyword_sentences = keyword_sentence.split("where ")
        else:                                               # if it is not one of the patterned questions
            try:                                            # check if the question has a page on wikipedia (for example if the user inputs only the keyword)
                wikipedia_mediawiki = MediaWiki()
                wikiPage = wikipedia_mediawiki.page(keyword_sentence, auto_suggest=False)       # check without auto-suggest
                return [keyword_sentence, False]            # False = auto-suggest OFF
            except:
                try:
                    wikiPage = wikipedia_mediawiki.page(keyword_sentence)                       # check with auto-suggest
                    return [keyword_sentence, True]         # True = auto-suggest ON
                except:
                    print("I'm sorry the information you want are not available on wikipedia! Try with something else!")
                    log("keywordEX,null")
                    continue

        keyword_sentences = keyword_sentences[1].split("?")
        [page, auto_suggest] = checkWiki(keyword_sentences[0])
        if page:           # if it exists a wikipedia page about the keyword
            return [keyword_sentences[0], auto_suggest]
Esempio n. 2
0
def checkWiki(keyword):                     # Check if it exists a wikipedia page corresponding to the keyword
    try:
        wikipedia_mediawiki = MediaWiki()   # check if exists a page corresponding to the keyword
        wikiPage = wikipedia_mediawiki.page(keyword, auto_suggest=False)
        return [True, False]                # True = the page exists, False = auto-suggest OFF
    except:
        try:
            wikiPage = wikipedia_mediawiki.page(keyword)    # check with auto-suggest
            return [True, True]             # True = the page exists, True = auto-suggest ON
        except:
            print("I'm sorry the information you want are not available on wikipedia! Try with something else!")
            return [False, False]           # False = the page doesn't exist
Esempio n. 3
0
def get_wikipedia_article(s_word):
    try:
        wikipedia = MediaWiki(url=wikiurl)
        wp_words = wikipedia.search(s_word, results=1)
        wp_article = wikipedia.page(wp_words[0])
        return wp_article
    except DisambiguationError as e:
        wp_article = wikipedia.page(random.choice(e.options))
        return wp_article
    except Exception as e:
        app.logger.info('Exception')
        app.logger.info(e)
        return False
Esempio n. 4
0
class WikiMedia:
    """Wikipedia class."""

    def __init__(self):
        self.wikipedia = MediaWiki()
        self.wikipedia.language = "fr"

    def get_infos(self, query):
        """Method allowing to retrieve informations from wikipedia.fr."""
        try:
            titles = self.wikipedia.search(query)
            if len(titles) > 0:
                infos = self.wikipedia.page(titles[0])
                summary = self.wikipedia.summary(titles[0], sentences=3)

                # Add regex  to remove == string == in summary:
                summary = re.sub(r"={2}\s.+={2}", r"", summary)
                status = True
                url = infos.url

            # Return empty results if no titles are return from the API
            else:
                summary = ""
                url = ""
                status = False

        # Use one except block in case of disambiguations errors.
        # Allow to search for the next title if the first one lead
        # to a disambiguation error.

        except mediawiki.exceptions.DisambiguationError:
            if len(titles) > 1:
                try:
                    infos = self.wikipedia.page(titles[1])
                    summary = self.wikipedia.summary(titles[1], sentences=3)
                    summary = re.sub(r"={2}\s.+={2}", r"", summary)
                    url = infos.url
                    status = True

                except mediawiki.exceptions.DisambiguationError:
                    summary = ""
                    url = ""
                    status = False
                    logging.exception("Exception occurred")
            else:
                summary = ""
                url = ""
                status = False
                logging.exception("Exception occurred")

        return {"summary": summary, "url": url, "status": status}
Esempio n. 5
0
def fetch_wikidata(title: str) -> Dict:

    api_data = {}
    wikipedia = MediaWiki()

    try:
        page_data = wikipedia.page(title)
    except Exception as e:
        return {'message': '[wikipedia] error getting wikidata'}

    pool = ThreadPoolExecutor(max_workers=6)

    poster = pool.submit(lambda :page_data.logos)
    content = pool.submit(lambda :page_data.sections)
    categories = pool.submit(lambda :page_data.categories)
    images = pool.submit(lambda :page_data.images)
    summary = pool.submit(lambda :page_data.summary)
    title_ = pool.submit(lambda :page_data.title)

    api_data['poster'] = poster.result()
    api_data['contents'] = content.result()
    api_data['categories'] = categories.result()
    api_data['images'] = images.result()
    api_data['summary'] = summary.result()
    api_data['title'] = title_.result()
    
    return api_data
Esempio n. 6
0
    def response(self, txObject):

        super(WikiLayer, self).response(txObject)

        if self.check_cmd(COMMANDS["WIKI"]["name"], txObject):

            key_value = parse_cmd_value(txObject[PROCESSED_INPUT])

            respose_value = None
            try:

                wikipedia = MediaWiki()

                try:

                    respose_value = wikipedia.page(key_value).summary

                except DisambiguationError as e:

                    respose_value = str(e)

                txObject[PROCESSED_INPUT] = respose_value
                STOPLAYER.send()

            except ConnectionError as e:

                txObject[PROCESSED_INPUT] = str(e)

        return txObject
Esempio n. 7
0
class WikiProvider(LookupProvider):
    '''Concrete provider which provides web results from Wikipedia.
    '''
    def __init__(self):
        '''Initialize WikiProvider with a MediaWiki instance.
        '''
        self._wiki = MediaWiki(
            user_agent="word_tools (https://github.com/ncdulo/word_tools")
        LookupProvider.__init__(self)

    def lookup(self, word, limit=0):
        '''Yield str results for `word` up to `limit`. When `limit <= 0`,
        default to `limit = 3`.
        '''
        # Default to a limit of three results. Once the re-write of CLI
        # is complete, this should be updated, and likely removed
        if limit <= 0:
            limit = 3

        try:
            for result in self._wiki.opensearch(word, results=limit):
                title, _, url = result
                summary = self._wiki.page(title).summarize(chars=200)
                output = title + ' (' + url + ')\n' + summary
                yield output
        except exceptions.DisambiguationError as e:
            print('''Search term disambiguous. There are some issues in the way
results are returned. Wikipedia suggests the following page
names. These may not be correct. This is a known issue.
                ''')
            print(e)
Esempio n. 8
0
class StoryTeller:
    """docstring"""
    def __init__(self):
        self.wikipedia = MediaWiki(lang=u'fr')
        self._latitude = None
        self._longitude = None
        self._response = None
        self._url = None
        self._summary = None

    def set_position(self, latitude, longitude):
        """docstring"""
        self._latitude = latitude
        self._longitude = longitude
        if self._latitude == None and self._longitude == None:
            self._response = []
        else:
            self._response = self.wikipedia.geosearch(
                latitude=self._latitude, longitude=self._longitude)

    def choice_title(self):
        """docstring"""
        return random.choice(self._response)

    def get_informations(self):
        """docstring"""
        if self._response == []:
            return [self._summary, self._url]
        else:
            page = self.wikipedia.page(self.choice_title())
            self._summary = page.summary
            self._url = page.url
            return [self._summary, self._url]
Esempio n. 9
0
def wikipedia_summary(topic, lang='en'):
    wikipedia = MediaWiki(lang=lang)
    search = wikipedia.search(topic)
    page = wikipedia.page(search[0])
    text = '**{}**\n\n{}\n**Read more at:** [{}]({})'.format(
        page.title, page.summary, page.title, page.url)
    return text
Esempio n. 10
0
 async def moegirl_search(q):
     moegirlwiki = MediaWiki(url='http://zh.moegirl.org/api.php')
     t = moegirlwiki.search(q)
     if len(t) == 0:
         return False
     p = moegirlwiki.page(t[0])
     return p.summary
Esempio n. 11
0
class WikiScratcher:
    def __init__(self, category):
        self.wikipedia = MediaWiki(url='https://en.wikipedia.org/w/api.php',
                                   user_agent='wiki-data-loader',
                                   lang='en')
        self.category = category

    # returns {pagename: {sectionname: section}, ....}
    def get_sections(self, num_pages):
        res = {}
        page_titles = self.wikipedia.categorymembers(self.category,
                                                     results=num_pages,
                                                     subcategories=False)
        if (len(page_titles) < num_pages):
            print('Only ' + str(len(page_titles)) + ' pages found !!!')
        for p_title in page_titles:
            res[p_title] = {}
            p = self.wikipedia.page(p_title)
            # add the summary
            res[p_title]['summary'] = p.summary
            # add all other sections
            section_titles = p.sections
            for s_title in section_titles:
                # ignore sections like 'references' or 'see also'
                if (self._ignore_section(s_title)):
                    continue
                section_text = p.section(s_title)
                # ignore empty sections which are in fact most likely subheaders
                if len(section_text) > 0:
                    res[p_title][s_title] = section_text
        return res
def webpage_content(page_name):
    """
    open the desired summary of any webpage from wikipeda
    """
    wiki = MediaWiki()
    page = wiki.page(page_name)
    summary = page.summary

    return summary
def open_webpage(page_name):
    """
    open the desired content of any webpage from wikipeda
    """
    wiki = MediaWiki()
    page = wiki.page(page_name)
    content = page.content

    return content
Esempio n. 14
0
def apiWikipedia(search, language):
    print(language, search)
    if(language == 'pt'):
        language = 'pt-br'
    wikipedia = MediaWiki(lang=language)
    if(len(wikipedia.search(search)) < 1):
        raise Exception('apiWikipedia: Content not found')
    page = wikipedia.page(search)
    return page.summary, page.url
Esempio n. 15
0
def getTopMatchesUsingCorrelation(keyword, links, numMatches):
    #Calculate correlation
    #Download each link. For each link, find out how many times, current keyword occurs.

    #how many times does this keyword  in each of its links
    keywordOccurenenceMap = {}
    remainingLinkSet = set(links)
    wikipedia = MediaWiki()

    #First get all links from db/cache
    articlesInCache = WikiArticles.objects.filter(title__in=links)
    for articleInCache in articlesInCache:
        #How many times is this keyword in link's associated wikipedia page
        title = articleInCache.title
        html = articleInCache.text
        text = covertHtml2Text(html)
        #Note that we are using link here and title as first argument
        addToKeywordOccurenceMap(title, text, keyword, keywordOccurenenceMap)
        #Remove from set, so that at the end we know what keyword we should fetch from wikipedia
        remainingLinkSet.remove(articleInCache.title)

    newWikiArticles = []
    for link in remainingLinkSet:

        try:
            l.warning("analyzing " + link)
        except Exception as e:
            l.warning("1 rags")

        linkPage = None
        try:
            linkPage = wikipedia.page(link)
        except Exception as e:
            #TODO: Log-error
            continue

        if linkPage is None or linkPage == "":
            raise Exception(
                "Wikipedia page not found/or is empty for keyword " + link)
        title = linkPage.title
        html = linkPage.html
        text = covertHtml2Text(html)
        #Note that we are using link here and title as first argument
        addToKeywordOccurenceMap(link, text, keyword, keywordOccurenenceMap)
        #bulk update
        #newWikiArticle = WikiArticles(title=title,text=text)
        #newWikiArticles.append(newWikiArticle)
        try:
            WikiArticles.objects.create(title=title, text=text)
        except Exception as e:
            l.warning("Failed to save " + title)
            l.warning(str(e))
            #continue silently

    #WikiArticles.objects.bulk_create(newWikiArticles,batch_size=DJANGO_BULK_CREATE_BATCH_SIZE)
    return keywordOccurenenceMap
Esempio n. 16
0
def lookup_wiktionary(word):
    logger = logging.getLogger(__name__)
    try:
        wikipedia = MediaWiki()
        wikipedia.set_api_url('https://en.wiktionary.org/w/api.php')
        matches = {}
        search_results = wikipedia.opensearch(word)
        if len(search_results) > 0:
            page_title = search_results[0][0]
            page = wikipedia.page(page_title)
            parts = page.content.split("\n")
            i = 0
            while i < len(parts):
                definition = ""
                part = parts[i].strip()

                if part.startswith("=== Verb ===") or part.startswith(
                        "=== Noun ===") or part.startswith(
                            "=== Adjective ==="):
                    #print(part)
                    # try to skip the first two lines after the marker
                    if (i + 1) < len(parts):
                        definition = parts[i + 1]
                    if (i + 2) < len(parts) and len(parts[i + 2].strip()) > 0:
                        definition = parts[i + 2]
                    if (i + 3) < len(parts) and len(parts[i + 3].strip()) > 0:
                        definition = parts[i + 3]

                if part.startswith(
                        "=== Adjective ===") and not 'adjective' in matches:
                    matches['adjective'] = definition
                if part.startswith("=== Noun ===") and not 'noun' in matches:
                    matches['noun'] = definition
                if part.startswith("=== Verb ===") and not 'verb' in matches:
                    matches['verb'] = definition

                i = i + 1
            final = ""

            # prefer verb, noun then adjective
            if matches.get('adjective', False):
                final = matches.get('adjective')
            if matches.get('noun', False):
                final = matches.get('noun')
            if matches.get('verb', False):
                final = matches.get('verb')
            # strip leading bracket comment
            if final[0] == '(':
                close = final.index(")") + 1
                final = final[close:]
            matches['definition'] = final
        return matches
    except:
        e = sys.exc_info()
        logger.debug(e)
 def lookup(self, word):
     wikipedia = MediaWiki()
     #wikipedia.set_api_url('https://en.wikpedia.org/w/api.php')
     summary = ''
     search_results = wikipedia.opensearch(word)
     if len(search_results) > 0:
         page_title = search_results[0][0]
         page = wikipedia.page(page_title)
         parts = page.summary.split('. ')
         summary = parts[0]
     return summary
Esempio n. 18
0
def find_short_meaning(search):
    try:
        wikipedia = MediaWiki()
        meaning = wikipedia.page(search.title())
    except DisambiguationError:
        return find_alter_meaning(search)
    else:
        if search.lower() != meaning.title.lower():
            return find_alter_meaning(search)
        def_meaning = meaning.summarize()
        return str(def_meaning + "link for further read: " +
                   wikipedia.opensearch(f'{meaning.title}', results=1)[0][2])
Esempio n. 19
0
def import_wiki(article_title):
    """
    This function fetch text from Wikipedia page based on the article title.
    This function returns the wikipedia article.

    article_title: The title of Wikipedia article (in string)
    """

    wikipedia = MediaWiki()
    article = wikipedia.page(article_title)
    # print(article.title)
    return article.content
def main(search_term):
    wikipedia = MediaWiki(lang='pap', user_agent='code-for-nl-pap-parser')
    wikidata = MediaWiki(url='https://www.wikidata.org/w/api.php',
                         user_agent='code-for-nl-pap-parser')

    search_result = wikipedia.search(search_term, results=4)

    for result_item in search_result:
        page = wikipedia.page(result_item)
        print(
            'I found page \'%s\' for term \'%s\'' % (result_item, search_term),
            'with categories', '/'.join(page.categories),
            'https://pap.wikipedia.org/wiki/' +
            urllib.parse.quote(result_item))
        # print(page.images)

        # Now I am going to search this one on wikidata, this will return a code. like Q215887
        search_data = wikidata.search(result_item, results=1)

        for data_item in search_data:
            Q_CODE = data_item
            print(result_item, 'is known on wikidata with the code', Q_CODE,
                  'https://www.wikidata.org/wiki/' + Q_CODE)
            # Now try the qwikidata interface
            entity = get_entity_dict_from_api(Q_CODE)
            q = WikidataItem(entity)
            pap_data_label = q.get_label(lang='pap')
            nl_data_label = q.get_label(lang='nl')
            if pap_data_label and nl_data_label:
                # First get the page. Read the images found
                data_page = wikidata.page(result_item)
                # print(data_page.images)

                print(pap_data_label, 'is called', nl_data_label, 'in dutch')
            elif pap_data_label and not nl_data_label:
                print(pap_data_label, 'has no entry for dutch!')
            elif not pap_data_label and nl_data_label:
                print(Q_CODE, 'does not match papiamentu entry')
            elif not pap_data_label and not nl_data_label:
                print(pap_data_label, 'has no entry for dutch or papiamentu!')
Esempio n. 21
0
    def return_answer(self):
        """this function returns a dictionnary
        containing {'result' : 2, 'commentary' : "sentence from bot",
        'latitude' : number,'longitude' : number,
        "adress" : "info", "summary" : "text", "link_wiki" : "url"}
        2 = result found, wiki found,
        1 = result found no wiki, 0 = not found.
        If 0 appears, there won't be latt, lng,
        neither summary"""

        # if result from parse is null
        if self.sentence == "Error":
            self.result['result'] = 0
            self.result['commentary'] = random.choice(GENERIC_NO_ANSWER)

        # if there is a result
        else:
            # creating googlemaps client
            gmaps = googlemaps.Client(key=os.environ.get("BACKEND_KEY", ""))
            returned_list = gmaps.geocode(self.sentence)

            # if result is empty, we're returning a message
            # and a number that will let ajax know
            if not returned_list:
                self.result['result'] = 0
                self.result['commentary'] = random.choice(GENERIC_NO_ANSWER)
            # answers = 0
            else:
                #creating local var that will display first googlemaps answer
                best_result = returned_list[0]

                compile_dic(best_result, self.result)

                wikipedia = MediaWiki(lang='fr')
                t = wikipedia.geosearch(latitude=self.result["latitude"], \
                    longitude=self.result["longitude"])
                # if wiki does not have stories regarding that place
                if not t:
                    self.result['result'] = 1
                    self.result['commentary'] = random.choice(
                        GENERIC_LOC_FOUND)

                # if wiki has full info
                else:
                    self.result['result'] = 2
                    self.result['commentary'] = random.choice(
                        GENERIC_LOC_FOUND)

                    p = wikipedia.page(t[0])
                    self.result["summary"] = p.summary[:250] + "..."
                    self.result["link_wiki"] = p.url
        return self.result
Esempio n. 22
0
class WikimediaApi:
    """ Class that interact with wikimedia api """

    def __init__(self, coord, route):
        """ Function that instanciate a WikimediaApi object """

        self.lat = str(coord['lat']) if coord else ""
        self.lng = str(coord['lng']) if coord else ""
        self.route = route
        self.wikipedia = MediaWiki(lang=u'fr')

    def geosearch(self):
        """ Function that return a list of pages from wikipedia
         and coordinate """
        try:
            geores = self.wikipedia.geosearch(self.lat, self.lng, results=5)
        except:
            geores = []
        return geores

    def get_pagetitle(self):
        """ Function that return the title of a page that match the route """

        geores = self.geosearch()
        pagetitle = ""
        try:
            regex_route = r"" + self.route
            i = 0
            for i in range(len(geores)):
                if re.match(regex_route, geores[i]):
                    pagetitle = geores[i]
        except:
            pass
        if not pagetitle:
            pagetitle = geores[0] if geores else ""
        return pagetitle

    def get_about(self):
        """ Function that return a summary and the url of a wikipedia page """

        pagetitle = self.get_pagetitle()
        page = self.wikipedia.page(pagetitle) if pagetitle else ""
        about_url = page.url if page else ""
        try:
            regex = r'== Situation et accès ==\n.*'
            section = re.search(regex, page.content).group(0)
            regex_sub = r'== Situation et accès =='
            about_text = (re.sub(regex_sub, "", section)).strip()
        except:
            about_text = page.summary if page else ""
        return {"about_text": about_text, 'about_url': about_url}
Esempio n. 23
0
def getAnchorTags(list_of_names):
    wikipedia = MediaWiki()
    output = []
    for x in list_of_names:
        per = []
        try:
            curr = wikipedia.page(x)
            soup = BeautifulSoup(request.urlopen(curr.url).read(),"html.parser")
            soup = (soup.find('p'))
            temp = [tag['href'] for tag in soup.select('a[href]')]
            for g in temp:
                if 'wiki' in g and not 'ogg' in g:
                    k = g[6:]
                    per.append(k)
        except DisambiguationError as e:
            per = []
        output += per
    return output
Esempio n. 24
0
def print_analysis_of_wiki_article():
    """ 
    For each article, this code converts the articles to text files, performs a sentiment analysis, and checks to see if certain positive and negative words are present in the articles and displays their frequencies if they are present
    """
    for i in range(len(donald_list)):
        current = donald_list[i]
        wikipedia = MediaWiki()
        wikiarticle = wikipedia.page(current)
        print(wikiarticle.title)
        # print(wikiarticle.content)
        url = donald_url_list[i]
        res = requests.get(url)
        res.raise_for_status()
        wiki = bs4.BeautifulSoup(res.text, "html.parser")
        file_to_write = open(url.split('/')[-1] + ".txt", "a")  # append mode
        for i in wiki.select('p'):
            text_to_write = i.getText().encode('utf-8')
            file_to_write.write(str(text_to_write))

        file_to_write.close()
        from nltk.sentiment.vader import SentimentIntensityAnalyzer

        sentence = wikiarticle.content
        score = SentimentIntensityAnalyzer().polarity_scores(sentence)
        print(score)

        f = open(f'{file_to_write.name}')
        for line in f:
            word = (line.strip()).split()
            unwanted_chars = ".,-_ "
            wordfreq = {}
            for raw_word in word:
                word = raw_word.strip(unwanted_chars)
                if word not in wordfreq:
                    wordfreq[word] = 0
                wordfreq[word] += 1
        for word in donald_word_list:
            if word in wordfreq:
                print(word, wordfreq[word])
        for word in donald_word_list_2:
            if word in wordfreq:
                print(word, wordfreq[word])
Esempio n. 25
0
def getTopMatchesForStr(keyword, numMatches=90):
    if keyword is None or keyword == "":
        raise Exception("Empty string passed")

    unsortedResult = {}
    sortedResult = {}
    #This throws an exception that we cannot work with, so just send it to upstream/caller
    wikipedia = MediaWiki()

    page = None
    try:
        page = wikipedia.page(keyword)
    except Exception as e:
        #TODO: Log-error
        return unsortedResult

    #TODO: What if we raise an error here? For now it is on to just return empty
    if page is None or page == "":
        raise Exception("Wikipedia page not found/or is empty")

    links = page.links
    sections = page.sections
    html = page.html
    for link in links:
        l.warning("analyzing " + link)

    if len(links) == 0:
        return unsortedResult

    how = "dumbMethod"
    how = "correlationMethod"

    if how == "dumbMethod":
        unsortedResult = getTopMatchesUsingDumbMethod(links, numMatches)

    if how == "correlationMethod":
        unsortedResult = getTopMatchesUsingCorrelation(keyword, links,
                                                       numMatches)

    sortedResult = reversed(
        sorted(unsortedResult.items(), key=operator.itemgetter(1)))
    return sortedResult
Esempio n. 26
0
class SentenceRanker:
    def __init__(self):
        self.wikipedia = MediaWiki()

    def retrieve_page_summary(self, page):
        p = self.wikipedia.page(page)
        return p.summary

    def summarize_text(self, text, word_count=600):
        return summarize(text, split=True, word_count=word_count)

    def get_top_sentences_from_page(self, pageName):
        print("Starting get_top_sentences_from_page...")

        # fetch summary from wiki page
        text = self.retrieve_page_summary(page=pageName)
        print("get_top_sentences_from_page finished.")

        # summarize summary
        return self.summarize_text(text)
Esempio n. 27
0
class WikiClient:
    """
    The module sentence_manipulator.py returns a string of key words.
    The module grandpy_map uses those key words to find an address with
    the Google Maps API.
    This class uses the previous address to find some background on wikipedia
    to be told by the good old gramps'.
    """
    def __init__(self, lang='fr'):
        self.geodata = None
        self.wikipedia = MediaWiki(lang=lang)

    def get_article_from_geodata(self, geodata):
        """
        This function gets the result of the module grandpy_map.py
        and send it to the Wikipedia's API.
        """
        formatted_address, latitude, longitude = geodata
        if latitude is None and longitude is None:
            return "", ""
        """
        geosearch(latitude='x.x', longitude='x.x') returns a list of
        different places/monuments within the radius (in meters).
        """
        titles = self.wikipedia.geosearch(latitude=latitude,
                                          longitude=longitude,
                                          radius=5000)
        # Select just one of the different results of the geosearch function :
        if titles != []:
            title = random.sample(titles, 1)
            title = title.pop()
            construction_to_describe = self.wikipedia.page(title)
            # In order to return the address of the wikipedia's article.
            url_address = construction_to_describe.url
            # The summarize function returns x first sentences of the summary.
            summary = construction_to_describe.summarize(sentences=4)
            return summary, url_address
Esempio n. 28
0
import nltk
import textblob
from mediawiki import MediaWiki
from textblob import TextBlob
wikipedia = MediaWiki()
#assigning wikipedia page
politics = wikipedia.page('2020 United States presidential election')
history = wikipedia.page('Albigensian Crusade')
science = wikipedia.page('Black hole')
celebrity = wikipedia.page('Dwayne Johnson')
movie = wikipedia.page('Thor: Ragnarok')

#texting postivie and negative text
positive_text = "The product is awesome"
negative_text = "The product is useless"

political_content = politics.content
history_content = history.content
science_content = science.content
celebrity_content = celebrity.content
movie_content = movie.content

#textblob analysis
politics_tb = TextBlob(political_content)
history_tb = TextBlob(history_content)
science_tb = TextBlob(science_content)
celebrity_tb = TextBlob(celebrity_content)
movie_tb = TextBlob(movie_content)

#text blob on postive and negative text
pos_text_tb = TextBlob(positive_text)
Esempio n. 29
0
class WikiApi:

    def __init__(self):
        self.wikipedia = MediaWiki(lang='ru')
        self.wikiquote = CustomWikiEngine(url="https://{lang}.wikiquote.org/w/api.php",
                                   lang='ru')

    def quotes(self, *words):
        results = []

        for word in words:
            titles = self.wikiquote.quotes(word, results=2)
            results += titles

        return results

    def quote_page(self, title):
        response = {}
        try:
            response = self.wikiquote.page(title=title)
        except Exception as e:
            logging.exception(e)
        return response

    def get_pages_by_categories(self, category, limit=10):
        # https://en.wikipedia.org/w/api.php?a
        # ction=query&
        # generator=categorymembers&
        # gcmlimit=100&
        # gcmtitle=Category:American%20male%20film%20actors&
        # prop=pageimages&
        # pilimit=100
        S = requests.Session()

        URL = "https://ru.wikipedia.org/w/api.php"

        PARAMS = {
            'action': "query",
            'generator': "categorymembers",
            'gcmtitle': category,
            'gcmlimit': limit,
            'format': "json"
        }

        R = S.get(url=URL, params=PARAMS)
        DATA = R.json()
        titles = []
        if 'query' in DATA and DATA['query'] and DATA['query']['pages']:
            titles = [value['title'] for key, value in DATA['query']['pages'].items()]
        return titles

    def movies(self):
        # https://ru.wikipedia.org/w/api.php?format=xml&action=query&list=embeddedin&einamespace=0&eilimit=500&eititle=Template:Infobox_film
        pass


    def search(self, *words):
        results = []

        for word in words:
            response = self.wikipedia.search(word, results=4)
            short_descriptions = response
            results += short_descriptions
        return results

    def opensearch(self, *words):
        results = []

        for word in words:
            response = self.wikipedia.opensearch(word)
            results += response
        return results

    def parse(self, *pages):
        results = []

        for page in pages:
            try:
                response = self.wikipedia.page(title=page)
                content = response.content
                sections = re.split(r'==.+?==', content)
                if sections:
                    summary = sections[0]
                    results.append(summary)
                    section_headers = re.findall(r'== \w+ ==', content)
                    if '== Сюжет ==' in section_headers:
                        index = section_headers.index('== Сюжет ==') + 1
                        if len(sections) > index:
                            plot = sections[index]
                            results.append(plot)
            except Exception as e:
                logging.error(e)
        return results
Esempio n. 30
0
class Research:

    def __init__(self, user_input):
        try:
            # Get input from user : change it to get by the parser

            self.query = PlaceExtractor.extract(user_input)
            print("self.query : ", self.query)

            # googlemaps initialisation
            self.gmaps = googlemaps.Client(key)

            self.search_json = self.get_geocode()

            # Wikipedia initialisation
            self.wikipedia = MediaWiki(lang='fr')
            lat = self.get_latitude()
            lng = self.get_longitude()
            self.article = self.wikipedia.geosearch(latitude=lat, longitude=lng)[0]
            self.page = self.wikipedia.page(self.article)
            self.summary = self.page.summarize(chars=140)
            self.title = self.page.title
            self.url = self.page.url

        except Exception as e:
            print("désolé, je ne te comprends pas ", e)

    def get_wiki(self):
        # Wiki answer
        try:
            result = {"title": self.title, "summary": self.summary, "url": self.url, "error": None}
            return result

        except Exception as e:
            return {"error": True, "error message": str(e)}

    def get_latitude(self):
        # latitude
        try:
            lat = self.search_json[0]["geometry"]["location"]["lat"]
            return lat
        except:
            return 'Nothing Found for Latitude'

    def get_longitude(self):
        # longitude
        try:
            lng = self.search_json[0]["geometry"]["location"]["lng"]
            return lng
        except:
            return 'Nothing found for Longitude'

    def get_formatted_name(self):
        # Name only
        try:
            name = self.search_json[0]["formatted_address"]
            return name
        except:
            return "Name not found"

    def get_geocode(self):

        geocode_result = self.gmaps.geocode(self.query)
        print(geocode_result)
        return geocode_result