Beispiel #1
0
def headings(page):
    mediawikiapi = MediaWikiAPI()
    page = mediawikiapi.page(page)
    soup = BeautifulSoup(page.html(), 'html.parser')
    data = []
    for headlines in soup.find_all("h3"):
        data.append(headlines.text.strip()[:headlines.text.strip().find(' (')])
    return data
Beispiel #2
0
async def main():
    print("Running main")
    if config.import_mode:
        finish_all_in_list()
    print("Looking for new DOIs from the Event stream")
    mediawikiapi = MediaWikiAPI()
    count = 0
    count_dois_found = 0
    count_missing_dois = 0
    async for event in aiosseclient(
            'https://stream.wikimedia.org/v2/stream/recentchange', ):
        # print(event)
        data = json.loads(str(event))
        # print(data)
        meta = data["meta"]
        # what is the difference?
        server_name = data['server_name']
        namespace = int(data['namespace'])
        language_code = server_name.replace(".wikipedia.org", "")
        # for exclude in excluded_wikis:
        # if language_code == exclude:
        if language_code != "en":
            continue
        if server_name.find("wikipedia") != -1 and namespace == 0:
            title = data['title']
            if data['bot'] is True:
                bot = "(bot)"
            else:
                bot = "(!bot)"
            if data['type'] == "new":
                type = "(new)"
            elif data['type'] == "edit":
                type = "(edit)"
            else:
                type = None
            if type is not None:
                print(f"{type}\t{server_name}\t{bot}\t\"{title}\"")
                print(f"http://{server_name}/wiki/{quote(title)}")
                dois_count_tuple = process_event(
                    mediawikiapi,
                    language_code=language_code,
                    title=title,
                )
                if dois_count_tuple[0] > 0:
                    count_dois_found += dois_count_tuple[0]
                if dois_count_tuple[1] > 0:
                    count_missing_dois += dois_count_tuple[1]
                count += 1
                print(
                    f"Processed {count} events and found {count_dois_found}" +
                    f" DOIs where {count_missing_dois} were missing in WD.")
    if config.max_events > 0 and count == config.max_events:
        exit(0)
Beispiel #3
0
	def searchIntent(self, session: DialogSession):
		if 'UserRandomAnswer' in session.intentName:
			search = session.payload['input']
		else:
			search = self._extractSearchWord(session)

		if not search:
			self._whatToSearch(session, 'whatToSearch')
			return

		mediawikiapi = MediaWikiAPI()
		# set language of the results
		mediawikiapi.config.language = self.LanguageManager.activeLanguage

		# Debug code for dev
		if self._devDebug:
			self.logInfo(f'User request = {search}')

		# Store the top 5 titles of the requested search (5 reduces chances of index error)
		self._top5Results = mediawikiapi.search(search, results=5)
		# set a index value for iterating through a list in the dialogs
		index = 0

		if not self._top5Results:
			self.logWarning('No match')
			self._whatToSearch(session, 'noMatch')
			return

		# remove known ambiguous results
		self.removeKnowenAmbiguousResults()

		# Check for exceptions and return any good result
		self._resultSummary = self.sortThroughResults(wikiInstance=mediawikiapi, index=index)

		# If there are bo good answers then log a warning and inform user
		if not self._resultSummary:
			self.logWarning('No match')
			self._whatToSearch(session, 'noMatch')

		# If search result found say the result and end
		else:
			if self._devDebug:
				self.logInfo(f'result Summary is {self._resultSummary}')

			if not self._alternatveResultUsed:
				self.sayResult(session=session, index=index)
			else:
				self.sayAlternatives(alternatives=self._top5Results[index + 1])
				self.sayResult(session=session, index=index)
Beispiel #4
0
    async def that_(self, ctx, *args):
        mediawikiapi = MediaWikiAPI()

        # get random articles and number
        rand_articles, rand_num = mediawikiapi.random(2), random.randint(0, 16777215)
        article_md = ['[{}]({})'.format(article, 'https://en.wikipedia.org/wiki/'+article.replace(' ', '_')) for article in rand_articles]

        # create embed
        if not args or args[0] not in ['verbose', 'verbosify']: # zero or wrong arguments
            embed = discord.Embed(color=discord.Color(rand_num), description='That is so {1}, can we hit {0} {2}'.format(rand_num, *article_md))
        else: # either verbose or verbosify
            embed = discord.Embed(color=discord.Color(rand_num), description='**That is so {1}, can we hit {0} {2}**'.format(rand_num, *article_md))
            article_descriptions = [mediawikiapi.summary(article, chars=150, auto_suggest=False) for article in rand_articles]

            if args[0] == 'verbose':
                [embed.add_field(name="** **", value=desc, inline=True) for desc in article_descriptions]
            elif args[0] == 'verbosify':
                [embed.add_field(name="** **", value=verbosify.verbosify(desc), inline=True) for desc in article_descriptions]
            

        await ctx.send(embed=embed)
Beispiel #5
0
    async def this(self, ctx, *args):
        summoned_channel = ctx.message.author.voice_channel
        if summoned_channel is None:
            mediawikiapi = MediaWikiAPI()
            await self.bot.say(
                f'This is so {mediawikiapi.random()}, can we hit {random.randint(0,10000000)} {mediawikiapi.random()}'
            )
        else:
            r = requests.get('https://www.billboard.com/charts/hot-100')
            soup = BeautifulSoup(r.text, 'html.parser')
            div = soup.find('div',
                            {'class': 'chart-list chart-details__left-rail'})
            songs_list = json.loads(div.attrs['data-video-playlist'])
            songs = [x["title"] for x in songs_list]
            song = random.choice(songs)
            song_split = song.split(' - ')

            await self.bot.say(
                f'This is so sad, Alexa play {song_split[0]} by {song_split[-1]}'
            )
            await ctx.invoke(self.stop)
            await self.playurl(ctx, song=song)
Beispiel #6
0
def wikipedia_search(self, question):
    """
    :param self: self object where chatbot object is also located
    :param question:
    :return: A tuple of values:
    (response, confidence, stat)
    stat = Found or not found
    """
    mw = MediaWikiAPI()
    wiki = wikipediaapi.Wikipedia("en")
    a = mw.search(str(question))
    question = str(question)
    if len(a) >= 1:
        cos = self.chatbot.lp.similarity(question.lower(), a[0].lower())
    else:
        return "Oops, the item you wanted to know is not on wikipedia.", 0.9, False
    if cos > 0.9:
        self.chatbot.globals["reversei"]["enabled"] = False
        self.chatbot.globals["reversei"]["uid"] = False
        response = wiki.page(a[0]).summary
        confidence = cos
        stat = True
    else:
        self.chatbot.globals["reversei"]["enabled"] = True
        self.chatbot.globals["reversei"]["uid"] = 30000000002
        self.chatbot.globals["reversei"]["type"] = int
        self.chatbot.globals["temp_data"] = a

        def bracketize(x):
            return "\n[{}] {}".format(x[0] + 1, str(x[1]))

        response = "Did you mean any of these {}".format(" ".join(
            [bracketize(x) for x in enumerate(a)]))
        confidence = 1.0
        stat = False

    return response, confidence, stat
Beispiel #7
0
def wikitable(page):
    """
    Exports a Wikipedia table parsed by BeautifulSoup. Deals with spanning: 
    multirow and multicolumn should format as expected. 
    """
    mediawikiapi = MediaWikiAPI()
    page = mediawikiapi.page(page)
    soup = BeautifulSoup(page.html(), 'html.parser')
    rows = table.findAll("tr")
    ncols = max([len(r.findAll(['th', 'td'])) for r in rows])

    # preallocate table structure
    # (this is required because we need to move forward in the table
    # structure once we've found a row span)
    data = []
    for i in range(nrows):
        rowD = []
        for j in range(ncols):
            rowD.append('')
        data.append(rowD)

    # fill the table with data:
    # move across cells and use span to fill extra cells
    for i, row in enumerate(rows):
        cells = row.findAll(["td", "th"])
        for j, cell in enumerate(cells):
            cspan = int(cell.get('colspan', 1))
            rspan = int(cell.get('rowspan', 1))
            l = 0
            for k in range(rspan):
                # Shifts to the first empty cell of this row
                # Avoid replacing previously insterted content
                while data[i + k][j + l]:
                    l += 1
                for m in range(cspan):
                    data[i + k][j + l + m] += cell.text.strip("\n")
    return data
Beispiel #8
0
from mediawikiapi import MediaWikiAPI
from tests.request_mock_data import mock_data


# mock out _wiki_request
class _wiki_request(object):
    calls = defaultdict(int)

    @classmethod
    def __call__(cls, params, config):
        cls.calls[params.__str__()] += 1
        return mock_data["_wiki_request calls"][tuple(sorted(params.items()))]


api = MediaWikiAPI()
api.session.request = _wiki_request()


class TestSearch(unittest.TestCase):
    """Test the functionality of mediawikiapi.search."""

    def test_search(self):
        """Test parsing a mediawikiapi request result."""
        self.assertEqual(api.search("Barack Obama"), mock_data['data']["barack.search"])

    def test_limit(self):
        """Test limiting a request results."""
        self.assertEqual(api.search("Porsche", results=3), mock_data['data']["porsche.search"])

    def test_suggestion(self):
Beispiel #9
0
                    to_replace2 = ('who\'s', "what\'s", "whats", "what", "who",
                                   " is ", " are ", "who's", "what's", " an ",
                                   " a ")

                    # Removing everything except for the word to search for
                    i = remove_all(i, to_replace2)
                    send(i, comment)


def read_comment(comment):
    # Checking if the comment is valid
    if hasattr(comment, 'body') and hasattr(
            comment.author,
            'name') and comment.author.name != "wikipedia_answer_bot":
        check_and_send(comment)


# Reconnecting to the server if there's 503 error(Service Unavailable)
@retry(wait=wait_chain(*[wait_fixed(3) for i in range(3)] +
                       [wait_fixed(7) for j in range(2)] + [wait_fixed(9)]))
def main():
    # Scanning all the new comments and checking them with the functions above
    for comment in subreddit.stream.comments(skip_existing=True):
        read_comment(comment)


if __name__ == '__main__':
    mediawikiapi = MediaWikiAPI()  # Creating a wikipedia API variable
    main()
Beispiel #10
0
        for j,cell in enumerate(cells):        
            cspan=int(cell.get('colspan',1))
            rspan=int(cell.get('rowspan',1))
            l = 0
            for k in range(rspan):
                # Shifts to the first empty cell of this row
                # Avoid replacing previously insterted content
                while data[i+k][j+l]:
                    l+=1
                for m in range(cspan):
                    data[i+k][j+l+m]+=cell.text.strip("\n")

    return data


mediawikiapi = MediaWikiAPI()
test_page = mediawikiapi.page('List of video games notable for negative reception')
# to check page URL:
print(test_page.url)
soup = BeautifulSoup(test_page.html(), 'html.parser')
# tables = soup.findAll("table", { "class" : "wikitable" })
# headings = soup.findAll('h3')
# df_test = wikitable_to_dataframe(tables[0])
# print(df_test)

for headlines in soup.find_all("h3"):
    print(headlines.text.strip()[:headlines.text.strip().find(' (')])


def headings(page):
    mediawikiapi = MediaWikiAPI()
Beispiel #11
0
 async def this_(self, ctx, *args):
     # vc = ctx.voice_client
     # if not vc or not vc.is_connected():
     mediawikiapi = MediaWikiAPI()
     await ctx.send(f'This is so {mediawikiapi.random()}, can we hit {random.randint(0,10000000)} {mediawikiapi.random()}')