Example #1
0
def info(topic):
	response = {}
	response["type"] = "wiki"
	try:
		page = wikipedia.page(topic)
		response['title'] = page.title
		response['url'] = page.url
		response['content'] = wikipedia.summary(page.title,sentences = 5)
		if len(response['content']) < 200:
			response['content'] = wikipedia.summary(page.title,sentences = 10)
	except Exception as error:
		ename = type(error).__name__
		if ename == 'DisambiguationError':
			page = wikipedia.page(error.options[0])
			response['title'] = page.title
			response['url'] = page.url
			response['content'] = wikipedia.summary(page.title,sentences = 2)
			if len(response['content']) < 200:
				response['content'] = wikipedia.summary(page.title,sentences = 10)
		elif ename == 'HTTPTimeoutError':
			response['type'] = "error"
			response['error'] = "I couldn't reach wikipedia"
		elif ename == 'PageError':
			response['type'] = "error"
			response['error'] = "I couldn't find anything on wikipedia"
		else:
			response['type'] = "error"
			response['error'] = "Unknown error occured while reaching wikipedia" 

	return response
def searchWiki(page):
    wikipedia.set_lang("fr")
    link = ''
    try:
#        p = wikipedia.page(page)
#        link = p.url
        propos = wikipedia.search(page,results=5,suggestion=False)
        for choice in propos:
            if choice.encode('utf-8') == page.encode('utf-8'):
                p = wikipedia.page(page)
                link = p.url
                break
            elif page in choice:
                #TODO
                print 'There is a proposition containing the keyWord '
                print choice
            else:
                try:
                    wikipedia.page(page,redirect=False,auto_suggest=False)
                except wikipedia.exceptions.RedirectError:
                    p = wikipedia.page(page)
                    link = p.url
                    break
                except:
                    link =''
    except:
        link = ""
    return link#.encode('utf-8')
def findRelevantArticles(term,data_path='.'):
    articleList = []
    articles = wikipedia.search(term) #Setting suggestion = False (default value); No clear use for it now

    for article in articles:
        try: 
            article = wikipedia.page(article)
            category_keywords = set(list(itertools.chain.from_iterable([category.lower().split() for category in article.categories])))
            if len(category_keywords & relevant_categories) > 0:
                articlefilename = "content_"+str(article.title.lower())+".txt"
                if os.path.isfile(articlefilename):
                     articlefilename = "content_"+ str(article.title.lower())+'%s.txt' % str(term+time.strftime("%Y%m%d-%H%M%S"))
                with codecs.open(os.path.join(data_path,articlefilename),'wb', 'utf-8') as outfile:
                    content = wikipedia.page(article).content
                    print>>outfile,content
                articleList.append(str(article.title))
        except wikipedia.exceptions.PageError as e:
            pass
        except wikipedia.exceptions.DisambiguationError as e:
            for article in e.options:
                try:
                    article = wikipedia.page(article)
                    category_keywords = set(list(itertools.chain.from_iterable([category.lower().split() for category in article.categories])))
                    if len(category_keywords & relevant_categories) > 0:
                        articlefilename = "content_"+str(article.title.lower())+".txt"
                        if os.path.isfile(articlefilename):
                            articlefilename = "content_"+ str(article.title.lower())+'%s.txt' % str(term+time.strftime("%Y%m%d-%H%M%S"))
                        with codecs.open(os.path.join(data_path,articlefilename),'wb','utf-8') as outfile:
                            print>>outfile,article.content
                        articleList.append(str(article.title))
                except wikipedia.exceptions.DisambiguationError as f:
                    pass
Example #4
0
def wikify():
    """Returns the sentences with wikipedia links"""
    tag_dict = look_entity()
    link_dict = {}
    combined = combine()
    for item in tag_dict.keys():
        if item in combined.keys():
            try:
                if combined[item] in wikipedia.page(combined[item]).content:
                    link_dict[item] = wikipedia.page(combined[item]).url
            except wikipedia.exceptions.DisambiguationError as disamb:
                try:
                    link_dict[item] = wikipedia.page(disamb.options[0]).url
                except:
                    pass
            except wikipedia.exceptions.PageError:
                pass
        else:
            try:
                link_dict[item] = wikipedia.page(item).url
            except wikipedia.exceptions.DisambiguationError as disamb:
                try:
                    link_dict[item] = wikipedia.page(disamb.options[0]).url
                except:
                    pass
            except wikipedia.exceptions.PageError:
                pass
    return link_dict
Example #5
0
    def article(self, pageid=None, title=None):
        """ 
            Returns a specific article from Wikipedia, 
            given its pageid or its title.
            Downloads it if necessary
        """
        if pageid is None and title is None:
            raise Exception('Pageid and title can\'t be None at the same time')

        if pageid is None:
            d = self.db.articles.find_one({'title': title})

            if d is not None:
                return d # found it
        else:
            d = self.db.articles.find_one({'_id': pageid})

            if d is not None:
                return d # found it
            
        try:
            if not(pageid is None):
                page = wikipedia.page(pageid=pageid)
            else:
                page = wikipedia.page(title=title)

        except (
            wikipedia.exceptions.DisambiguationError,
            wikipedia.exceptions.PageError,
            wikipedia.exceptions.WikipediaException,
            requests.exceptions.RequestException,
            ValueError # error decoding JSON response
        ):
            return

        try:
            time.sleep(0.5)
        except:
            time.wait(0.5)

        # Even if we didn't find pageid or title, it still could be in the DB
        # since the title could have changed
        try:
            d = {
                '_id': int(page.pageid),
                'title': page.title,
                'content': page.content
            }
        except KeyboardInterrupt: # filter KeyboardInterrupt from here
            raise
        except Exception:
            return # can't add this entry

        self.db.articles.update_one(
            {'_id': d['_id']},
            {'$set': d},
            upsert=True
        )

        return d
Example #6
0
def wiki(event, bot):
	""" wiki \x02searchterm\x02. Will search Wikipedia for \x02searchterm\x02. """
	if not event.argument: return bot.say(functionHelp(wiki))
	result = search(event.argument, results=1, suggestion=True)
	if not result[0]: 
		if result[1]: return bot.say("No results found. Did you mean \x02%s\x02?" % result[1])
		else: return bot.say("No results found.")
	
	errors = []
	attempt = 0
	p = None
	try:
		p = page(result[0]) # use preload=True  when it's fixed: https://github.com/goldsmith/Wikipedia/issues/78
	except DisambiguationError as e:
		errors.append("Random disambig page: ")
		while attempt < 3:
			try: p = page(choice(e.options))
			except DisambiguationError: pass
			attempt += 1
	if not p: return bot.say("Gave up looking for disambiguous entry from disambiguous page.")
	
	if result[1]:
		errors.append("(SP: %s?) " % result[1])
	content = p.content[:800].replace("\n", " ").replace("====", "").replace("===", "").replace("==", "")
	
	bot.say(RESULT_RPL % ("".join(errors), p.url), strins=[p.title, content], fcfs=True)
def getContentFromLink(link):
	try:
		linkText = wk.page(link, auto_suggest=False).content.lower()
	except wk.exceptions.DisambiguationError as e:
		options = filter(lambda x: "(disambiguation)" not in x, e.options)
		linkText = wk.page(options[0], auto_suggest=False).content.lower()
	return linkText
def disambiguationWikipedia(noun):

    """
    Disambiguation for Wikipedia errors
    """

        # Try to get wikipedia content
    try:
        wiki = wikipedia.page(noun)

    except wikipedia.exceptions.DisambiguationError as e:
        new = e.options[0]

        try:
            wiki = wikipedia.page(new)

        except:
            return 'Null'

    except wikipedia.exceptions.PageError:
        new = wikipedia.search(noun)

        try:
            wiki = wikipedia.page(new[0])

        except:
            return 'Null'

    except:
        return 'Null'


    return wiki
def search_wikipedia(word):
    searchArr = wikipedia.search(word)

    wiki_results = []
    try: 
        try:
            for result in searchArr:
                #print("result: " + result)
                wiki_results.append(wikipedia.page(result, preload=False))
        except wikipedia.DisambiguationError as e:
            #print("disambiguation error on " + result)
            #print(e.with_traceback)
            try:
                for item in e.options:
                    #print("disambiguation error on " + item)
                    wiki_results.append(wikipedia.page(item, preload=False))
            except wikipedia.DisambiguationError as i:
                try:
                    for item in i.options:
                        #print("disambiguation error on " + item)
                        wiki_results.append(wikipedia.page(item, preload=False))
                except wikipedia.DisambiguationError:
                    pass
    except: 
        print("Something went wrong getting wikipedia results")
        pass

    return wiki_results
Example #10
0
 def page(title=None, pageid=None, auto_suggest=True, redirect=True):
     """
     The search term from user may not corresponds to a wikipedia page,
     due to vagueness. There are 2 alternatives, "redirect"/ "disambiguous".
     :param auto_suggest:let Wikipedia find a valid page title for the query
     :return:
     """
     if pageid is not None:
         pageid = int(pageid)
         page = WikipediaArticle.objects(pageid=pageid)
     else:
         page = WikipediaArticle.objects(title=title)
         if not page:
             results, suggestion = WikipediaWrapper.search(
                 title,
                 results=1,
                 suggestion=True)
             suggested_term = suggestion or results[0]
             page = WikipediaArticle.objects(title=suggested_term)
     if page:
         page = page[0]
     else:
         try:
             page = wikipedia.page(title=title,
                                   pageid=pageid,
                                   auto_suggest=auto_suggest,
                                   redirect=redirect)
         except UnicodeDecodeError:
             page = wikipedia.page(title=str_util.normal_str(title),
                                   pageid=pageid,
                                   auto_suggest=auto_suggest,
                                   redirect=redirect)
     if type(page) is wikipedia.WikipediaPage:
         page = WikipediaWrapper.save_page(page)
     return page
Example #11
0
    def climb_tree(self):
        """Climb the tree"""

        branch_found = True
        cur_branch = self.seed
        prev_node = None
        while cur_branch is not None:
            self.logger.debug('Current branch is %s'%cur_branch)
            #Get wikipedia page
            try:
                cur_page = wikipedia.page(cur_branch)
            except wikipedia.PageError:
                self.logger.exception('Cannot find page for %s. Ending search.'%cur_branch)
                self.tree.node(cur_branch)
                self.tree.edge(cur_branch,prev_node)
                cur_branch = None
                continue
            except wikipedia.DisambiguationError:
                self.logger.exception('Multiple pages found for query %s. Adding "(physicist)" and searching again.')
                cur_page = wikipedia.page(cur_branch+' (physicist)')

            #parse the table
            html_source = BeautifulSoup(cur_page.html(),'html.parser')
            advisor = self._search_info_table(html_source,['Doctoral advisor','Doctoral advisors','Academic advisors','Academic advisor'])
            alma_mater = self._search_info_table(html_source,'Alma mater')
            students = self._search_info_table(html_source,'Doctoral students')
            #add to graph
            self.tree.node(cur_branch,cur_branch+'\n'+self._none_filter(alma_mater))
            if prev_node is not None:
                self.tree.edge(cur_branch,prev_node)
            #update
            prev_node = cur_branch
            cur_branch = self._res_filter(advisor)
Example #12
0
def link_checker(ngram):
	''' Checks if the word gives a valid wikipedia link '''
	try:
		page = wikipedia.page(ngram)
		link = page.url
		return link
	except wikipedia.exceptions.DisambiguationError: 
		#link = ngram.split(" ") 
		#newlink = "_".join(ngram)
		link = 'http://en.wikipedia.org/wiki/' + ngram + '_(disambiguation)'
		return link
	except wikipedia.exceptions.PageError:
		wordlist = ngram.split()
		counter = 0
		for word in wordlist:
			word.lower()
			if word in ["prime","minister","president"]:
				wordlist.pop(counter)
			counter += 1  
		ngram.join(wordlist)
		try:
			page = wikipedia.page(ngram)
			link = page.url
			return link 
		except wikipedia.exceptions.PageError:
			return -1
		except wikipedia.exceptions.DisambiguationError:  
			return -1
def collectFrom(lang,start,hangang):
    wikipedia.set_lang(lang)
    lookpa = wikipedia.page(start).links
    lookna = [wikipedia.page(start)]
    corpus = str(wikipedia.page(start).content)
    while len(corpus) < hangang:
        random.shuffle(lookpa)
        item = lookpa[0]
        try:
            corpus += str(wikipedia.page(item).content)
        except wikipedia.exceptions.PageError:
            pass
        except wikipedia.exceptions.DisambiguationError:
            pass
        except KeyError:
            pass
        lookna.append(item)
        lookpa.remove(item)
        try: 
            for page in wikipedia.page(item).links:
                if page not in lookpa:
                    if page not in lookna:
                        lookpa.append(page)
        except wikipedia.exceptions.PageError:
            pass
        except wikipedia.exceptions.DisambiguationError:
            pass
        except KeyError:
            pass
        print('Corpus = ' + str(len(corpus)) + '   Searched = ' + str(len(lookna)) + '  Still = ' + str(len(lookpa)))
    
    f = open(lang + 'FromWikiCorp.txt', 'w')
    f.write(corpus)
    f.close()
Example #14
0
    def context_data(self):
        """
        Gather data from Wikipedia based on user-inputed SUBJECT. 
        """
        text_list, visited, visitedSeeAlso, queue = [], set(), set(), list() 
        queue.append((self.subject, self.depth))

        while len(queue) > 0:
            print("Hi")
            next = queue.pop(0)
            try:
                if next[0] not in visited and next[1] >= 0:
                    visited.add(next[0])
                    results = wikipedia.search(next[0], self.max_searches, False)
                    for pagename in results:
                        queue.append((pagename, next[1]-1))
                    text_list.extend(wikipedia.page(next[0]).content.split())
            except:
                pass

        queue.append((self.subject, self.depth))
        while len(queue) > 0: 
            next = queue.pop(0)
            try: 

                if next[0] not in visitedSeeAlso and next[1] >= 0: 
                    visitedSeeAlso.add(next[0])
                    page = wikipedia.page(next[0])
                    for reference in page.section("See also").splitlines():
                        queue.append((reference, next[1] -1))
                    text_list.extend(wikipedia.page(next[0]).content.split())
            except:
                pass          
        return text_list
def getWikiPage(title):
  try:
    page = wikipedia.page(title)
  except wikipedia.exceptions.DisambiguationError as e:
    print(e.options)
    title = random.choice(e.options)
    page = wikipedia.page(title)
  return page
Example #16
0
 def test_tax_holiday(self):
     print("------ tax holiday test ------------")
     web_bot = wikipedia.page("Web Bot").title
     tax_holiday = wikipedia.page("Tax holiday").title
     
     article1 = Article(web_bot, repo=DataDict())
     article2 = Article(tax_holiday, op=op_backlinks, repo=DataDict())
     run_test(article1, article2)
     print('=========================================')
Example #17
0
 def test_impeachment(self):
     print("------ Impeachment test ------------")
     impeachment = wikipedia.page("Impeachment").title
     tower = wikipedia.page("Trump Tower").title
     
     article1 = Article(impeachment, repo=DataDict())
     article2 = Article(tower, op=op_backlinks, repo=DataDict())
     run_test(article1, article2)
     print('=========================================')
    def wikipediaSearch(self):
        LOGGER.info('Querying Wikipedia')
        neighborhood = False
        if self.location['neighborhood'] != '':
            neighborhood = True
            searchTerm = self.location['neighborhood'] + ' ' + self.location['city']
        elif self.location['neighborhood'] == '' and self.location['city'] != '' and self.location['region'] != '':
            searchTerm = self.location['city'] + ' ' + self.location['region']
        elif self.location['place_name'] != '':
            searchTerm = self.location['place_name']

        LOGGER.info('WIKI SEARCH TERM: ' + searchTerm)
        wikiPages = list()
        try:
            LOGGER.info('trying first wiki query')
            results = wikipedia.search(searchTerm)
            if len(results) != 0:
                if len(results) >= 3:
                   results = results[:3]
                for result in results:
                    try:
                        page = wikipedia.page(result)
                        wikiPages.append(page.content)
                    except wikipedia.exceptions.DisambiguationError as e:
			pass
                        #print 'Disambiguation Error'
                        #print e
                    except wikipedia.exceptions.PageError as e:
                        #print 'Page Error'
                        #print e
			pass
        except wikipedia.exceptions.DisambiguationError as e:
            #print 'DISAMBIGUATION ERROR'
            #print e.options
            if len(e.options) !=0:
                if len(e.options) >= 3:
                    e.options = e.options[:3]
                for opt in e.options:
                    try: 
                        page = wikipedia.page(opt)
                        wikiPages.append(page.content)
                    except wikipedia.exceptions.DisambiguationError as e:
                        #print 'Disambiguation Error'
                        #print e
			pass
                    except wikipedia.exceptions.PageError as e:
                        #print 'Page Error'
                        #print e
                        pass

        allText = ''
        if len(wikiPages) != 0:
            for page in wikiPages:
                allText += page

        self.results['wikipedia'] = allText
Example #19
0
def getText():
    commonHeaders = []
    popularity = []
    yourArticle = wikipedia.page("Obama")
    articles = []
    articles.append(wikipedia.search("American politicans"))
    articles.append(wikipedia.search("American presidents"))
    articles.append(wikipedia.search("Hillary Clinton"))
    articles.append(wikipedia.search("Bill Clinton"))
    articles.append(wikipedia.search("George Washington"))
    articles.append(wikipedia.search("John Kerry"))
    #articles.append(wikipedia.search("John F. Kennedy"))
##    yourArticle = wikipedia.page("New York")
##    articles = wikipedia.search("New York")
##    articles.append(wikipedia.search("American cities"))
##    articles.append(wikipedia.search("Boston"))
##    articles.append(wikipedia.search("Paris"))
##    articles.append(wikipedia.search("San Francisco"))
##    articles.append(wikipedia.search("Sacramento"))
##    articles.append(wikipedia.search("Seattle"))
##    articles.append(wikipedia.search("Chicago"))
##    articles.append(wikipedia.search("St. Louis"))
##    articles.append(wikipedia.search("Las Vegas"))
##    articles.append(wikipedia.search("Hartford"))
##    articles.append(wikipedia.search("Trenton, NJ"))
##    articles.append(wikipedia.search("Washington D.C."))
##    articles.append(wikipedia.search("Boise"))
##    articles.append(wikipedia.search("Detroit"))
##    articles.append(wikipedia.search("Now Orleans"))
##    articles.append(wikipedia.search("Salt Lake City"))
    for i in articles:
        article = wikipedia.page(i).content
        headers = getSectionHeaders(article)
        for x in headers:
            if x not in commonHeaders:
                commonHeaders.append(x)
                popularity.append(1)
            else:
                assert(len(popularity) > 1) 
                popularity[commonHeaders.index(x)] += 1
                print x
    print commonHeaders
    x = 0
    while (x < len(commonHeaders)):
        if (popularity[x]>1):
            print commonHeaders[x]
            print popularity[x]
        x = x + 1
        
    # Figure out what kind of article this is
    # We can use the categories tag, if you've created it
    yourCategories = yourArticle.categories
    for category in yourCategories:
        #print category
        break
    return 
Example #20
0
    def __init__(self,seed,sibling_depth=100):
        """Constructor for TreeClimber class"""
        self.logger = logging.getLogger(type(self).__name__)
        try:
            wikipedia.page(seed)
            self.seed = seed
        except wikipedia.PageError:
            self.logger.exception('Cannot find Wikipedia page for %s. Try another starting point.'%seed)

        self.tree = Digraph('G')
Example #21
0
def new_article(new_title, old_title):
# Function takes 2 arguments (current page title and old page title). This is incase of disambiguation and the user needs to choose another link.
    try:
        new_page = wikipedia.page(new_title)
        show_links(new_page)

    # Deals with wiki disambiguation error
    except wikipedia.exceptions.DisambiguationError:
        print("An error occurred (due to disambiguation), please choose another link.")
        show_links(wikipedia.page(old_title))
Example #22
0
	def createTestData(self, articleList):
		for article in articleList:
			articleFile = open(article + "_test", 'w+')
			article_nouns = open(article + "_nouns", 'w+')
			articleContentTwo = TextBlob(wikipedia.page(article).content.encode('UTF-8'))
			articleContent = wikipedia.page(article).content.encode('UTF-8')
			nouns = articleContentTwo.noun_phrases
			self.get_nouns(nouns, article_nouns)
			articleFile.write(articleContent)
			articleFile.close
Example #23
0
def getWikiText(query):
    results = wikipedia.search(query)
    try:
        page = wikipedia.page(title=results[0], auto_suggest=False)
    except wikipedia.DisambiguationError as e:
        page = wikipedia.page(e.options[0])
    text = page.content.encode('ascii', 'ignore')
    cleanedText = text.translate(None, string.punctuation + digits).lower()
    allWords = cleanedText.split()
    return allWords
Example #24
0
 def __init__(self, page):
     """
     initializes self.page to the correct wikipedia resource
     """
     try:
         self.page = wikipedia.page(page)
     except wikipedia.exceptions.DisambiguationError as e:
         self.page = wikipedia.page(e.options[0])
     self.soup = BeautifulSoup(self.page.html())
     self._gen_table()
Example #25
0
def load_wiki(db):
    # load up desired terms from csv file
    med_terms = []
    with open("snomed_cleaned_term.txt", "rb") as f:
        text = f.readlines()
    for line in text:
        med_terms.extend(line.split(","))

    con = db.connection()
    cur = db.cursor()
    missed = 0
    i = 0

    for term in med_terms:
        # look in wikipedia for page associated with term
        try:
            page = wikipedia.page(term)
        except wikipedia.exceptions.DisambiguationError as e:
            # handle disambiguation error
            e.options = [t.encode("utf-8") for t in e.options]
            # prioritize choice if it has "medic" in title, grabs things like "(medicine)"
            possible = [x for x in e.options if re.search("medic", x.lower())]
            if possible:
                try:
                    page = wikipedia.page(possible[0])
                except:
                    missed += 1
                    continue
                    # otherwise take the first choice of term
            else:
                try:
                    page = wikipedia.page(e.options[0])
                except:
                    missed += 1
                    continue
                    # if can't find any pages, skip term
        except wikipedia.exceptions.PageError:
            missed += 1
            continue

            # join all the categories by "," to make it a string for input into sql
        try:
            categories = ",".join(page.categories)
        except:
            categories = ""

            # insert page into sql table
        cur.execute(
            "insert into med_pages VALUES (%s, %s, %s, %s, %s)",
            (int(page.pageid), page.title, page.summary, categories, page.content),
        )
        i += 1
    con.commit()
    print "# unidentifiable pages:", missed
    print "Inserted:", i
Example #26
0
def main():
    parser = argparse.ArgumentParser(
        description='Toma un tema de la Wikipedia y lo twitea como hilo')
    parser.add_argument('-c',
                        action='store_true',
                        help='Loguearse como el último usuario utilizado')
    parser.add_argument('-s',
                        action='store_true',
                        help='Postear no solo el resumen de Wikipedia sino ' +
                        'también sus secciones')
    args = parser.parse_args()
    use_last_creds = args.c
    post_sections = args.s

    wikipedia.set_lang('es')

    name = input('¿De qué quieres tirarte el pisto?: ')
    search_results = wikipedia.search(name)

    if len(search_results) > 1:
        result_str = ''
        for i, e in enumerate(search_results):
            result_str += '[{}] {}\n'.format(i+1, e)

        print()
        option = input('Sé más preciso:\n' + result_str +
                       '\nNúmero de opción: ')
        page = wikipedia.page(search_results[int(option)-1])
    elif len(search_results) == 1:
        page = wikipedia.page(search_results[0])
    else:
        print('Lo siento, no hay nada para esa búsqueda :(')
        exit(0)

    # Store the page as a list of strings
    text = [u'No sé si conocéis {}. Abro hilo \U0001F447'.format(page.title)]

    text.extend([clean_string(i) for i in page.summary.splitlines()])

    if post_sections:
        for section in page.sections:
            if section in IGNORED_SECTIONS:
                continue

            text.append('##{}'.format(section))
            text.extend(
                [clean_string(i) for i in page.section(section).splitlines()])

    # Split text into tweets
    tweets = split_text(text)

    print()
    twclient.post_as_thread(tweets, use_last_creds)
    print('¡Enhorabuena!' +
          '¡Ahora todos piensan que eres un experto en {}!'.format(page.title))
def getWikiURL(noun, tag):

    """
    Get the Wikipedia URL
    """

    if tag == "PERSON":

        try:
            wiki = wikipedia.page(noun)

        except wikipedia.exceptions.DisambiguationError as e:
            try:
                newNoun = e.options[0]
                newNoun2 = e.options[1]

                wiki = wikipedia.page(newNoun)
                wiki2 = wikipedia.page(newNoun2)

                firstSentence1 = wiki.content.split(".")[0]
                firstSentence2 = wiki2.content.split(".")[0]

                if "born" in firstSentence1:
                    return wiki.url

                elif "born" in firstSentence2:
                    return wiki2.url

                else:
                    return "Null"

            except:
                return "Null"

        except wikipedia.exceptions.PageError:
            new = wikipedia.search(noun)

        try:
            wiki = wikipedia.page(new[0])

        except:
            return 'Null'

    else:
        # Check for disambiguation on Wikipedia
        wiki = disambiguationWikipedia(noun)

        try:
            url = wiki.url

        except:
            return "Null"

        return url
    def get_page_categories_unprotected(self, name, page_id=None):

        try:
            page = wiki.page(pageid=page_id)

        except (PageError, ValueError):
            page = wiki.page(name)
            if u'a' in page.categories:
                print page.title

        return self._afterprocess_categories(page.categories)
Example #29
0
    def get_text(self):

        try:
            # do a wikipedia search for the topic
            topic_results = wikipedia.search(self.topic)

            # pick one of the results and grab the content
            self.content += wikipedia.page(topic_results[rand(0, len(topic_results) - 1)]).content

            # DO IT MORE
            more_content = wikipedia.page(topic_results[rand(0, len(topic_results) - 1)]).content
            if more_content not in self.content:
                self.content += more_content
            more_content = wikipedia.page(topic_results[rand(0, len(topic_results) - 1)]).content
            if more_content not in self.content:
                self.content += more_content
            more_content = wikipedia.page(topic_results[rand(0, len(topic_results) - 1)]).content
        except wikipedia.exceptions.DisambiguationError as e:
            self.content += self.topic + self.uncertain
        except wikipedia.exceptions.PageError as e:
            self.content += self.topic + self.unknown

        # if there are more than one word in the topic try to get some more results with the first and last word
        if len(self.topic.split()) > 1:
            try:
                # get more results with less of the topic for some ambiguity
                topic_results = wikipedia.search(self.topic.split()[:1])
                self.content += wikipedia.page(topic_results[rand(0, len(topic_results) - 1)]).content
                more_content = wikipedia.page(topic_results[rand(0, len(topic_results) - 1)]).content
                if more_content not in self.content:
                    self.content += more_content
            except wikipedia.exceptions.DisambiguationError as e:
                self.content += self.topic + self.uncertain
            except wikipedia.exceptions.PageError as e:
                self.content += self.topic + self.unknown
            try:
                # get even more with the second half of the topic for wierd results maybe
                topic_results = wikipedia.search(self.topic.split()[-1:])
                self.content += wikipedia.page(topic_results[rand(0, len(topic_results) - 1)]).content
                more_content = wikipedia.page(topic_results[rand(0, len(topic_results) - 1)]).content
                if more_content not in self.content:
                    self.content += more_content
            except wikipedia.exceptions.DisambiguationError as e:
                self.content += self.topic + self.uncertain
            except wikipedia.exceptions.PageError as e:
                self.content += self.topic + self.unknown
        try:
            # do a wikipedia search for the topic
            topic_results = wikipedia.search(self.topic[:len(self.topic) / 2])

            # pick one of the results and grab the self.content
            self.content += wikipedia.page(topic_results[rand(0, len(topic_results) - 1)]).content
        except wikipedia.exceptions.DisambiguationError as e:
            self.content += self.topic + self.uncertain
        except wikipedia.exceptions.PageError as e:
            self.content += self.topic + self.unknown

        return self.content.capitalize()
Example #30
0
 def verifyInput(prompt):
     while True:
         title = raw_input(prompt)
         try:
             wikipedia.page(title).load()
         except wikipedia.PageError:
             print "Not a page"
             continue
         except wikipedia.DisambiguationError:
             print "Don't use disambiguation pages"
             continue
         return title
Example #31
0
import wikipedia
term = input("Enter a title or search phrase")
while term != "":
    try:
        print(wikipedia.page(term).title)
        print(wikipedia.page(term).summary)
        print(wikipedia.page(term).url)
        term = input("Enter a title or search phrase")
    except wikipedia.exceptions.DisambiguationError:
        print("There are multiple results. Please specify")
        print(wikipedia.search(term))
        term = input("Enter a title or search phrase")

Example #32
0
import wikipedia

menu_input = str(input('Enter "s" for search, enter "p" for page title \n>>>'))
while menu_input != '':
    if menu_input == 's':
        search_input = str(input('Search Request: '))
        results = wikipedia.search(search_input, 3)
        i = 0
        for result in results:
            print('{}.. {}'.format(i, result))
            i += 1
        choice = int(input('Choose Page: '))
        choice = wikipedia.page(results[choice])
        print(choice.title)
        print(choice.summary)
        print(choice.url)
    if menu_input == 'p':
        choice = wikipedia.page(str(input('Page Title: ')))
        print(choice.title)
        print(choice.summary)
        print(choice.url)
    menu_input = str(
        input('Enter "s" for search, enter "p" for page title \n>>>'))
Example #33
0
                    (wp_title text primary key,
                    page_url text,
                    summary text,
                    parent_id integer,
                    revision_id integer)''')

    titles = set(
        x[0]
        for x in wpedia.execute('select distinct wp_title from wikipedia'))
    titles_done = set(
        x[0] for x in wpedia.execute('select wp_title from wp_page_info'))
    for t in (titles - titles_done):
        print(t)
        page = None
        try:
            page = wikipedia.page(t)
        except Exception as e:  # catch everything and ignore
            print(e, file=sys.stderr)
            try:
                page = wikipedia.page(t.replace(' ', '_'), auto_suggest=False)
            except Exception as e:
                print(e, file=sys.stderr)
        if not page:
            continue

        values = (t, page.summary, page.url, int(page.parent_id),
                  int(page.revision_id))
        wpedia.execute(
            '''insert or ignore into wp_page_info
                        (wp_title, summary, page_url, parent_id, revision_id) values
        (?,?,?,?,?)''', values)
Example #34
0
prefix = "|"


class Paragraph:
    title: str
    content: str

    def __init__(self, title, content):
        self.title = title
        self.content = content

    def toDict(self):
        _dict = {"title": self.title, "content": self.content}


p = wikipedia.page("Jazz")
text = p.content

text = text.replace("=====", prefix)
text = text.replace("====", prefix)
text = text.replace("===", prefix)
text = text.replace("==", prefix)

tag = False
tags = []
tagname = ""
for i in range(len(text)):
    char = text[i]
    if char == prefix:
        if tag == False:
            tag = True
Example #35
0
        temp_key = int(keys[i])
        person_name = person[temp_key]
        # print(person_name)
        # print(connected[keys[i]])
        # print(connected[keys[i]][0])

        search = wiki.search(person_name)
        if (len(search) != 0):
            count += 1
            max_match = 0
            max_index = 0
            # flag = False
            for j in range(len(search)):
                # flag = False
                try:
                    summary = wiki.page(search[j]).content
                    temp_match = 0
                    for k in range(len(connected[keys[i]])):
                        if connected[keys[i]][k].lower() in summary.lower():
                            temp_match += 1
                    if (max_match < temp_match):
                        max_match = temp_match
                        max_index = j
                except:
                    print("")
            temp_row = str(temp_key) + '\t' + person_name + "\t" + search[
                max_index] + "\t" + str(max_match) + "\t" + str(
                    len(connected[keys[i]])) + "\n"
            print(temp_row)

            fin.write(temp_row)
 def __init__(self, title):
     self.page = wikipedia.page(title)
     self.summary = TextBlob(self.page.summary)
Example #37
0
    async def search_wikipedia(self, ctx: commands.Context, args):
        """Fait une recherche sur wikipd"""

        wait = await ctx.send("_Je cherche..._")
        results = wikipedia.search(args)
        nbmr = 0
        mmssgg = ""

        for value in results:
            nbmr = nbmr + 1
            mmssgg = mmssgg + "**{}**: {} \n".format(str(nbmr), value)

        em = discord.Embed(title='Résultats de : ' + args,
                           description=mmssgg,
                           colour=0x4ECDC4)
        em.set_thumbnail(url="https://upload.wikimedia.org/wikipedia/commons/"
                         "2/26/Paullusmagnus-logo_%28large%29.png")
        await wait.delete()

        sending = ["1⃣", "2⃣", "3⃣", "4⃣", "5⃣", "6⃣", "7⃣", "8⃣", "9⃣", "🔟"]

        def check(reaction, user):
            return user == ctx.author and reaction.emoji in sending and \
                reaction.message.id == msg.id

        async def waiter(future: asyncio.Future):
            reaction, _ = await self.bot.wait_for('reaction_add', check=check)
            future.set_result(reaction.emoji)

        emoji = asyncio.Future()
        self.bot.loop.create_task(waiter(emoji))

        msg = await ctx.send(embed=em)
        for e in sending:
            await msg.add_reaction(e)
            if emoji.done():
                break

        while not emoji.done():
            await asyncio.sleep(0.1)

        page = int(sending.index(emoji.result()))

        args_ = results[page]

        try:
            await msg.delete()
            await ctx.trigger_typing()
            wait = await ctx.send(
                ctx.message.author.mention +
                " ah ok sympa cette recherche, je l'effectue de suite !")
            wp = wikipedia.page(args_)
            wp_contenu = wp.summary[:200] + "..."
            em = discord.Embed(title='Wikipedia : ' + wp.title,
                               description="{} \n_Lien_ : {} ".format(
                                   wp_contenu, wp.url),
                               colour=0x9B59B6)
            em.set_author(name="Wikipedia",
                          url='http://wikipedia.org',
                          icon_url='https://upload.wikimedia.org/wikipedia/'
                          'commons/2/26/Paullusmagnus-logo_%28large'
                          '%29.png')
            em.set_thumbnail(url="https://upload.wikimedia.org/wikipedia/"
                             "commons/2/26/Paullusmagnus-logo_%28large"
                             "%29.png")
            em.set_footer(text="Merci à eux de nous fournir une encyclopédie "
                          "libre !")
            await wait.delete()
            await ctx.send(embed=em)

        except wikipedia.exceptions.PageError:
            # TODO : A virer dans l'event on_error
            await ctx.send(":open_mouth: Une **erreur interne** est survenue,"
                           " si cela ce reproduit contactez votre"
                           " administrateur ou faites une Issue sur"
                           " ``gitea`` !")
Example #38
0
def wiki_get_coordinates(places_list):
    '''
    Get location coordinate of places in a list
    Args:
        places_list: string list of places

    Return:
        coord_list: list of coordinates of the inputted places
        not_found: list of places without wikipedia page available 
    '''
    import wikipedia
    coord_list = []
    not_found = []
    for n, value in enumerate(places_list):
        try:
            coord = wikipedia.page(value).coordinates
            keywords = value
            #print(value + " NPP", "coordinates is ", coord)
        except (KeyError, wikipedia.exceptions.PageError,
                wikipedia.exceptions.DisambiguationError):
            #print("No wikipedia page named", value)
            new_value = value + " Nuclear Power Plant"
            #print("Search using keywords:", new_value)
            try:
                coord = wikipedia.page(new_value).coordinates
                keywords = new_value
                #print(new_value, "coordinates is ", coord)
            except (KeyError, wikipedia.exceptions.PageError,
                    wikipedia.exceptions.DisambiguationError):
                #print("No wikipedia page named", new_value)
                new_value2 = value[:-2] + " NPP"
                #print("Search using keywords:", new_value2)
                try:
                    coord = wikipedia.page(new_value2).coordinates
                    keywords = new_value2
                    #print(new_value2, "coordinates is ", coord)
                except (KeyError, wikipedia.exceptions.PageError,
                        wikipedia.exceptions.DisambiguationError):
                    #print("No wikipedia page named", new_value2)
                    new_value3 = new_value2[:-4] + " Nuclear Power Plant"
                    #print("Search using keywords:", new_value3)
                    try:
                        coord = wikipedia.page(new_value3).coordinates
                        keywords = new_value3
                        #print(new_value3, "coordinates is ", coord)
                    except (KeyError, wikipedia.exceptions.PageError,
                            wikipedia.exceptions.DisambiguationError):
                        #print("No wikipedia page named", new_value3)
                        new_value4 = new_value2[:-6] + " NPP"
                        #print("Search using keywords:", new_value4)
                        try:
                            coord = wikipedia.page(new_value3).coordinates
                            keywords = new_value3
                            #print(new_value3, "coordinates is ", coord)
                        except (KeyError, wikipedia.exceptions.PageError,
                                wikipedia.exceptions.DisambiguationError):
                            #print("No wikipedia page named", new_value3)
                            new_value5 = new_value2[:-6] + " Nuclear Power Plant"
                            #print("Search using keywords:", new_value5)
                            try:
                                coord = wikipedia.page(new_value5).coordinates
                                keywords = new_value5
                                #print(new_value5, "coordinates is ", coord)
                            except (KeyError, wikipedia.exceptions.PageError,
                                    wikipedia.exceptions.DisambiguationError):
                                #print("No wikipedia page named", new_value5)
                                not_found.append(value)
                                keywords = value
                                coord = (0, 0)
        coordinate = [keywords, float(coord[0]), float(coord[1])]
        coord_list.append(coordinate)
    return coord_list, not_found
Example #39
0
"""
CP1404 - Practicals
Wikipedia
"""

import wikipedia

user_search = input("Search: ")
while user_search != "":
    wiki_page = wikipedia.page(user_search)
    try:
        print("Title: " + wiki_page.title)
        print(wikipedia.summary(user_search))
        print("URL: " + wiki_page.url)
        user_search = input("Search: ")
    except wikipedia.exceptions.DisambiguationError as e:
        print(e.options)
        user_search = input("Search: ")
Example #40
0
def index(request, lemma):

    #
    # Check if we found something in our own sparql repository.  If not
    # query other sources.
    #
    # TODO: We need a better check (persons with the same name).
    #
    #if not sparql_results or not sparql_results["results"]["bindings"]:
    if False:

        #
        # DBPEDIA
        #
        sparql = SPARQLWrapper(DBPEDIA_QUERY_URL)
        sparql.setQuery(SPARQL_DBPEDIA_QUERY.format(lemma))
        sparql.setReturnFormat(JSON)

        try:
            sparql_results = sparql.queryAndConvert()
        except:
            import traceback
            print traceback.format_exc()
            sparql_results = {}

        #if sparql_results and sparql_results["results"]["bindings"]:
        #    for result in sparql_results["results"]["bindings"]:
        #        from .utils import sparql_local_insert_person
        #
        #        sparql_local_insert_person(lemma, result)
        #else:

        if True:
            #
            # CBDB
            #
            r = requests.get(CBDB_API_URL.format(lemma)).json()
            #if r.status_code == 200:
            try:
                persons = r['Package']['PersonAuthority']['PersonInfo'][
                    'Person']
            except:
                persons = []

            if type(persons) is list:
                for person in persons:
                    print person['BasicInfo']['ChName'], person['BasicInfo'][
                        'YearBirth'], person['BasicInfo']['PersonId']
            else:
                person = persons
                if person:
                    print person['BasicInfo']['ChName'], person['BasicInfo'][
                        'YearBirth'], person['BasicInfo']['PersonId']

        sparql = SPARQLWrapper(FUSEKI_QUERY_URL)
        sparql.setQuery(sparql_query.format(lemma))
        sparql.setReturnFormat(JSON)

        try:
            sparql_results = sparql.queryAndConvert()
        except:
            sparql_results = {}

    sparql = SPARQLWrapper(FUSEKI_QUERY_URL)
    sparql.setQuery(sparql_query.format(lemma))
    sparql.setReturnFormat(JSON)

    try:
        sparql_results = sparql.queryAndConvert()
    except:
        sparql_results = {}

    is_person = False
    template_result = {}
    if sparql_results.get("results", False):
        for result in sparql_results["results"]["bindings"]:
            p = result["p"]["value"].replace(prefix_default, '')
            p = p.replace(prefix_schema, '')
            p = p.replace(prefix_syntax, '')

            o = result["o"]["value"].replace(prefix_default, '')

            if p == "type" and o == "Person":
                is_person = True

            template_result[p] = o

    template_result['is_person'] = is_person
    template_result['lemma'] = lemma

    # Wikipedia
    try:
        wikipedia.set_lang("en")
        en = wikipedia.page(lemma, auto_suggest=True, redirect=True)
        wikipedia_en = en.summary
        wikipedia_en_url = en.url
    except:
        wikipedia_en = ''
        wikipedia_en_url = ''

    try:
        wikipedia.set_lang("zh")
        zh = wikipedia.page(lemma, auto_suggest=True, redirect=True)
        wikipedia_zh = zh.summary
        wikipedia_zh_url = zh.url
    except:
        wikipedia_zh = ''
        wikipedia_zh_url = ''

    # Sinology
    try:
        f = codecs.open("/docker/dublin-store/sinology/mainSpace/" + lemma,
                        "r", "utf-8")
        # Skip first line
        sinology = f.readlines()[1:]
        sinology = '\n'.join(sinology)
        sinology = creole.creole2html(sinology)
    except:
        sinology = ''

    return render(
        request, 'sparql/index.html', {
            'r': template_result,
            'wikipedia_en': wikipedia_en,
            'wikipedia_zh': wikipedia_zh,
            'wikipedia_en_url': wikipedia_en_url,
            'wikipedia_zh_url': wikipedia_zh_url,
            'sinology': sinology,
        })
Example #41
0
def get_wiki_url_and_content_by_keyphrase(phrase):
    with warnings.catch_warnings():  # TODO warning suppression
        warnings.simplefilter("ignore")
        wiki_page = wikipedia.page(phrase)
    return wiki_page.url, wiki_page.summary, wiki_page.categories
Example #42
0
import wikipedia
query=wikipedia.page("Hero")
print(query.summary)
Example #43
0
with mss.mss() as sct:
    while Number > 0:
        im3 = numpy.asarray(sct.grab(grabOption3))
        # im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)

        text3 = "River Wye"

        Number = Number - 1

q1point = 0
q2point = 0
q3point = 0

#Searches the question

Questions = wikipedia.page(textq)

Question1 = Questions.links

#Count how many times each come up in the Question

links1 = [text1]
test = []
for link in links1:
    try:
        #try to load the wikipedia page
        page = wikipedia.page(link, auto_suggest=False)
        test.append(page)
    except wikipedia.exceptions.PageError:
        #if a "PageError" was raised, ignore it and continue to next link
        continue
Example #44
0
import wikipedia
import requests
import cairosvg

PAGES = ['2016 Summer Olympics', 'London']

for page in PAGES:
    wikipage = wikipedia.page(page)
    print("Page Title: ", wikipage.title)
    print("Page URL: ", wikipage.url)
    cairosvg.svg2png(
        url=
        "https://upload.wikimedia.org/wikipedia/en/d/df/2016_Summer_Olympics_logo.svg",
        write_to="/WikimediaDataLiquidGalaxy/static/images/image2222.png")
    print(" - Main Image: ", wikipage.images)
Example #45
0
def get_wikipedia_intro(symbol):
    import wikipedia
    company_name = get_fundamental_information(symbol)[0]
    description = wikipedia.page(company_name).content
    return(description.split('\n')[0])
 def parseWiki(self):
     self.text = wikipedia.page(title = self.webpage).content
     self.text = self.CleanText()
    def assistant(self,command):
        
        #open subreddit Reddit
        if 'open reddit' in command:
            reg_ex = re.search('open reddit (.*)', command)
            url = 'https://www.reddit.com/'
            if reg_ex:
                subreddit = reg_ex.group(1)
                url = url + 'r/' + subreddit
                webbrowser.open(url)
                self.sofiaResponse('The Reddit content has been opened for you Sir.')
                self.sofia('The Reddit content has been opened for you Sir.')
                self.sofia(url)
        elif 'shutdown' in command or 'bye' in command or 'tata' in command :
            self.sofiaResponse('Bye bye. Have a nice day')
            self.sofia('Bye bye. Have a nice day')
            
            
            sys.exit()
            
        
        
        
        elif 'open website' in command:
            reg_ex = re.search('open website (.+)', command)
            if reg_ex:
                domain = reg_ex.group(1)
                print(domain)
                
            
                url = 'https://www.' + domain  + '.com' 
                webbrowser.open(url)
                self.sofiaResponse('website ' + url + ' is opened')
                self.sofia('website' + url + 'is opened')
                #self.label6 = tk.Label(self.root, text='website' + url + 'is opened')
                #self.label6.grid()
            else:
                pass
       
    # wait until thread 1 is completely executed 
        elif 'play youtube video for' in command:
            reg_ex = re.search('youtube (.+)', command)
            if reg_ex:
                domain = reg_ex.group(1)
                print(domain)
                url = 'https://www.youtube.com/results?search_query=' + domain
                webbrowser.open(url)
                self.sofiaResponse('The youtube videos are available.')
                
                self.sofia('The youtube videos are available.' + url)
                #self.label6a = tk.Label(self.root, text='The youtube videos are available.' + url)
                #self.label6a.grid()
                
            else:
                pass
 #google search
  
    
        elif 'google' in command or 'please google' in command:                                                           #what happens when google keyword is recognized
            reg_ex = re.search('google (.+)', command)
            words = command.split()
            del words[0:1]
            st = ' '.join(words)
            print('Google Results for: '+ str(st))
            url='http://google.com/search?q='+ st
            webbrowser.open(url)
            self.sofiaResponse('Google Results for: '+str(st))
           
            self.sofia('Google Results for: '+str(st) + url)
 
#greetings
        elif 'hello' in command or 'hey' in command:
            day_time = int(strftime('%H'))
            if day_time < 12:
                self.sofiaResponse('Hello ASH. Good morning')
                self.sofia('Hello ASH. Good morning')
            elif 12 <= day_time < 18:
                self.sofiaResponse('Hello ASH. Good afternoon')
                self.sofia('Hello ASH. Good afternoon')
            else:
                self.sofiaResponse('Hello ASH. Good evening')
                self.sofia('Hello ASH. Good evening')

         
#joke
        elif 'joke' in command:
            res = requests.get(
                'https://icanhazdadjoke.com/',
            headers={"Accept":"application/json"})
            if res.status_code == requests.codes.ok:
                self.sofiaResponse(str(res.json()['joke']))
                self.sofia(str(res.json()['joke']))
            else:
                self.sofiaResponse('oops!I ran out of jokes')
                self.sofia('oops!I ran out of jokes')

#top stories from google news
        elif 'news for today' in command or 'news' in command:
            try:
                news_url="https://news.google.com/news/rss"
                Client=urlopen(news_url)
            
                xml_page=Client.read()
                Client.close()
                soup_page=soup(xml_page,"xml")
                news_list=soup_page.findAll("item")
                for news in news_list[:3]:
                    self.sofiaResponse(news.title.text.encode('utf-8')) 
                    self.sofia(news.title.text)
            except Exception as e:
                print(e)

#current weather
        elif 'current weather' in command or 'weather' in command:
            reg_ex = re.search('current weather in (.*)', command)
            if reg_ex:
                city = reg_ex.group(1)
                owm = OWM(API_key='ab0d5e80e8dafb2cb81fa9e82431c1fa')
                obs = owm.weather_at_place(city)
                w = obs.get_weather()
                k = w.get_status()
                x = w.get_temperature(unit='celsius')
                self.sofiaResponse('Current weather in %s is %s. The maximum temperature is %0.2f and the minimum temperature is %0.2f degree celcius' % (city, k, x['temp_max'], x['temp_min']))
                self.sofia('Current weather in %s is %s. The maximum temperature is %0.2f and the minimum temperature is %0.2f degree celcius' % (city, k, x['temp_max'], x['temp_min']))
#time
        elif 'time' in command:
            import datetime
            now = datetime.datetime.now()
            self.sofiaResponse('Current time is %d hours %d minutes' % (now.hour, now.minute))
            self.sofia('Current time is %d hours %d minutes' % (now.hour, now.minute))

 #email  
        elif 'please email' in command:
            self.sofiaResponse('Who is the recipient?')
            recipient = self.myCommand()
            if 'ash' in recipient:
                msg = MIMEMultipart() 
  
# storing the senders email address   
                msg['From'] = fromaddr 
  
# storing the receivers email address  
                msg['To'] = toaddr 
  
# storing the subject  
                msg['Subject'] = "hello"
  
# string to store the body of the mail 
                body = "Body_of_the_mail"
  
# attach the body with the msg instance 
                msg.attach(MIMEText(body, 'plain')) 
  
# open the file to be sent  
                filename = "final forward.pdf"
                attachment = open("C:/Users/AISHU/Desktop/final forward.pdf", "rb") 
  
# instance of MIMEBase and named as p 
                p = MIMEBase('application', 'octet-stream') 
  
# To change     the payload into encoded form 
                p.set_payload((attachment).read()) 
  
# encode into base64 
                encoders.encode_base64(p) 
   
                p.add_header('Content-Disposition', "attachment; filename= %s" % filename) 
  
# attach the instance 'p' to instance 'msg' 
                msg.attach(p) 
  
# creates SMTP session 
                s = smtplib.SMTP('smtp.gmail.com', 587) 
  
        # start TLS for security 
                s.starttls() 
  
        # Authentication 
                s.login(fromaddr,"xyfsfvqsgawkfhjh") 
  
        # Converts the Multipart msg into a string 
                text = msg.as_string() 
  
        # sending the mail 
                s.sendmail(fromaddr, toaddr, text) 
  
    # terminating the session 
                s.quit() 
                self.sofiaResponse('Email has been sent successfuly. You can check your inbox.')
                self.sofia('Email has been sent successfuly. You can check your inbox.')
            else:
                self.sofiaResponse('I don\'t know what you mean!')
                self.sofia('I don\'t know what you mean!')



#launch any folder
        elif 'from desktop view folder ' in command or 'view folder' in command:
            reg_ex = re.search('from desktop view folder (.*)', command)
            if reg_ex:
                appname = reg_ex.group(1)
            
                appname1 = appname 
            #subprocess.call(["open", "-n", "/C:\/" + appname1], stdout=subprocess.PIPE)
                os.startfile('C:/Users/AISHU/Desktop/' + appname1)
            #subprocess.call()
        
            
                self.sofiaResponse('I have launched the desired application')
                self.sofia('I have launched the desired application')
#calculation
        elif 'calculate' in command:
            reg_ex = re.search('calculate (.*)', command)
            app_id = "AUXH6Q-LA7AA5J66V" 
            client = wolframalpha.Client(app_id) 
  
            indx = command.lower().split().index('calculate') 
            query = command.split()[indx + 1:] 
            res = client.query(' '.join(query)) 
            answer = next(res.results).text 
            self.sofiaResponse("The answer is " + answer) 
            self.sofia("The answer is " + answer)
            
            
        elif 'thank you' in command or 'thanks' in command:
            self.sofiaResponse('your welcome')
            self.sofia('your welcome')
    
        elif 'help me' in command:
            self.sofiaResponse("""
       
        You can use these commands and I'll help you out:
        1-. Open reddit subreddit 
        2.  Open website
        3.  play youtube video for
        4.  please google
        5.  from desktop view folder
        6.  news for today
        7.  Joke
        8.  Send email/email
        9.  Current weather in {cityname} 
        10. change wallpaper
        11. Time
        12. tell me about xyz 
        13. Calculate
        14  where is {location}
        15. launch app
        """)    
            
            
            
#wallpaper
        elif 'change wallpaper' in command:
            reg_ex = re.search('change wallpaper (.*)', command)
            path_user = os.path.expanduser('~')
            if reg_ex:
                appname = reg_ex.group(1)
            
                appname1 = appname +'.jpg'
        

        #name_of_file = 'img.jpg'
                path_to_file = os.path.join(path_user,'Desktop','wallpaper',appname1)
                print(path_to_file) # this print C:\Users\Sebastian\Desktop\wallpapers\vRATOkv.jpg 
                SPI_SETDESKWALLPAPER = 20
                ctypes.windll.user32.SystemParametersInfoW(SPI_SETDESKWALLPAPER, 0, path_to_file, 0)                    
                self.sofiaResponse('I have changed the desired wallpaper')
                self.sofia('I have changed the desired wallpaper')
#launch any app
        elif 'launch app' in command:
            reg_ex = re.search('launch app (.*)', command)
            if reg_ex:
                appname = reg_ex.group(1)
            
                appname1 = appname + ".lnk"
            #subprocess.call(["open", "-n", "/C:\/" + appname1], stdout=subprocess.PIPE)
            
                os.startfile('C:/ProgramData/Microsoft/Windows/Start Menu/Programs/' + appname1)
            #subprocess.call()
        
            
                self.sofiaResponse('I have launched the desired application')
                self.sofia('I have launched the desired application')
            
            
#meanings                        
        elif 'tell me about' in command:
            reg_ex = re.search('tell me about (.*)', command)
            if reg_ex:
                topic = reg_ex.group(1)
                ny = wikipedia.page(topic)
                self.sofiaResponse(ny.content[:500].encode('utf-8'))
                self.sofia(ny.content[:500].encode('utf-8'))
                
                
              
            
#location
        elif "where is" in command:
            reg_ex = re.search('where is (.*)', command)
            command = command.split(" ")
            location = command[2]
            self.sofiaResponse("Hold on ASH, I will show you where " + location + " is.")
        
            webbrowser.open("https://www.google.nl/maps/place/" + location + "/&amp;")
            self.sofia("Hold on ASH, I will show you where " + location + " is." + "https://www.google.nl/maps/place/" + location + "/&amp;" )
        
        else:
            self.sofiaResponse('sorry i dont understand please rephrase your sentence')
            self.sofia('sorry i dont understand please rephrase your sentence')
Example #48
0
    while left > 0:
        get = min(per_cycle, left)
        left -= get
        yield wikipedia.random(pages=get)


def process(bunch):
    for word in bunch:
        print word

for titles in get_random_wikipedia_titles(amount):
    bunch = []
    for title in titles:
        word = {}
        try:
            page = wikipedia.page(title=title)
        except wikipedia.exceptions.DisambiguationError:
            print "Disambiguation in title %s, skipping"%title
            continue
        word['word'] = title
        word['description'] = page.summary
        word['translations'] = {}
        try:
            word['imageurl'] = page.images[0]
        except:
            word['imageurl'] = "http://www.catster.com/files/original.jpg"
        bunch.append(word)
    for lang in langs:
        translations = gs.translate([word['word'] for word in bunch], lang, 'fi')

        for translation, word in zip(translations, bunch):
Example #49
0
# ---------------------------------------------------- #
# WikiWriter
# Section Scrapers
# By Kristina Wagner and Sharon Lin
# Copyright April 2016
# -----------------------------------------------------#

#Note that subheaders always begin with "="
def getSectionHeaders(article):
    headers = []
    startIndex = 0
    while startIndex != -1:
        #Find Start
        startIndex = article.find("==")
        #Find end
        subarticle = article[startIndex+2:]
        endIndex = subarticle.find("==")
        if ((startIndex > -1) and (endIndex > -1)):
            if (startIndex != endIndex):
                if (endIndex - startIndex < 100 and endIndex > startIndex):
                    header = article[startIndex+2:startIndex+2+endIndex]
                    header = header.strip(' \t\n\r')
                    if header != "" and header != "\n" and header != "=":
                        print (header)
                        headers.append(header)
        article = article[endIndex+2:]
    return headers

article = wikipedia.page("Obama").content
getSectionHeaders(article)
#test-docx.py
from docx import Document
import wikipedia

wikipedia.set_lang('th')

#summary สำหรับบทความที่่สรุป
data = wikipedia.summary('ประเทศไทย')

#page+content บทความทั้้งหน้า
data2 = wikipedia.page('ประเทศไทย')
data2 = data2.content

doc = Document() #สร้างไฟล์ เวิร์ด ใน python
doc.add_heading('แมว',0)

doc.add_paragraph(data2)

doc.save('ประเทศไทย.docx')
print('สร้างไฟล์สำเร็จ')
def summary():
    query= wikipedia.page(question.get())
    answer=Text(root,height=100,width=160,font=("Arial",14),wrap=WORD,bg="#7CEBC6" ,fg="black")
    answer.insert(END,(query.summary))
    answer.pack()
Example #52
0
    if not os.path.isfile(str(word + "_tot.json")):
        print(word)
        banlist.append(word)

for word in banlist:
    print(word)
    random.seed(word)
    current = ""
    if word in done:
        continue
    with open(word + ".json", 'r') as f:
        titles = json.load(f)
    while len(current) < target:
        rand_index = random.randrange(len(titles))
        try:
            content = wikipedia.page(titles[rand_index]).content
            current += "\n" + content
        except wikipedia.PageError:
            continue
        except wikipedia.DisambiguationError:
            continue
        except wikipedia.WikipediaException:
            print("wikipedia exception")
            time.sleep(5)
            pass
        except:
            print("unknown error")
            time.sleep(5)
            continue
    done.append(word)
    with open(word + "_tot.json", 'w') as f:
Example #53
0
def getPage(search):
    return wikipedia.page(search) 
Example #54
0
import pandas as pd
import wikipedia as wp

sp_500_current = []

# Get the html source
html = wp.page("List of S&P 500 companies").html().encode("UTF-8")
df = pd.read_html(html)[0]
for key, row in df.iterrows():
    sp_500_current.append(row["Symbol"])

print(len(sp_500_current), sp_500_current)
Example #55
0
    def get_wiki(self):
        title = w.search(self.query)[0]  # get first result
        page = w.page(title)  # get page

        return page.content  # return page content
Example #56
0
            break
        elif compute_jaccard_index(
                str(title_no_year + " (" + year + " film" + ")"), result) >= 1:
            current_query = result
            break
        # elif "film" in result:
        #     current_query = result
        #     break
        else:
            current_query = "no_results"

    print "current_query:\t\t", current_query, "/ for movie: ", full_title

    if current_query != "no_results":
        try:
            movie_page = wikipedia.page(current_query)
            go_flag = True
        except:
            print "PLOT------DisambiguationError for:", full_title
            go_flag = False

        if go_flag:
            section_results = [
                unicodedata.normalize('NFKD', x).encode('ascii', 'ignore')
                for x in movie_page.sections
            ]
            # f.write(str(full_title + "\t" + current_query + "\t" + str(search_results) + "\n"))
            print "sections for\t\t", current_query, section_results, "\n"
            if "Plot" in section_results:
                # print movie_page.section("Plot").replace("\n"," ")
                # plot = movie_page.section("Plot")
Example #57
0
def get_wiki(query):
    title = wikipedia.search(query)[0]
    page = wikipedia.page(title)
    return page.content
Example #58
0
def wiki_look(context):
    author = wikipedia.page(context.auname)
    author.sections
    print("looks for author info using Wikipedia API")  #mock
Example #59
0
import wikipedia
data=wikipedia.page("Donald Trump")
file = open('output_wiki_DonaldTrump.txt', 'w',encoding='utf8')
file.write(data.content)
file.close()

Example #60
0
  args.gradient_accumulation_steps=1
  args.local_rank=-1
  args.fp16=False
  args.loss_scale=0
  args.server_ip=''
  args.server_port=''

  run_squad.main(args)

  with open('../files/results/nbest_predictions.json', 'r') as f:
    output = f.read()

  return json.loads(output)

if __name__ == '__main__':
  page = wikipedia.page('History of the United Kingdom')

  input = {
    "data" : [
      {
        "questions" : [
          "What sactions are in place?"
        ],
        "url" : "https://www.bbc.co.uk/news/world-us-canada-48748544",
        "context" : str(page.content)
      }
    ]
  }
  input = json.dumps(input)
  answers = answer_questions(input)
  print(answers)