Python Queue Beispiele, libs.plugin.Queue Python Beispiele

Beispiel #1

0

Datei anzeigen

 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.public_namespace.newRedditThread = plugin.Queue()
     self.public_namespace.newYTChannel = plugin.Queue()
     self.public_namespace.newTwitterChannel = plugin.Queue()
     self.public_namespace.newProBoardsForum = plugin.Queue()
     self.public_namespace.newGCal = plugin.Queue()

Beispiel #2

0

Datei anzeigen

Datei: scrapercal.py Projekt: IdeaBot/web_scraper

 def threaded_action(self, q, newGCal=plugin.Queue()):
     # handle new items from url_adder
     while not newGCal.empty():
         item = newGCal.get()
         item["id"]=item["id"].split('%40group.calendar.google.com')[0]
         item["id"]=item["id"]+"@group.calendar.google.com"
         if item["action"] == "remove" or item["action"] == "delete":
             try:
                 del(self.data.content[item["id"]][self.CHANNELS][self.data.content[item["id"]][self.CHANNELS].index(item[self.CHANNEL])])
             except (TypeError, KeyError, ValueError, NameError):
                 # traceback.print_exc()
                 pass
         elif item["action"]=="add":
             if item["id"] not in self.data.content:
                 self.data.content[item["id"]]={self.CHANNELS:list(), self.SEEN:list()} # dict
             self.data.content[item["id"]][self.CHANNELS].append(item[self.CHANNEL])
     # do scrape things
     for calendar_id in self.data.content:
         try:
             url = (self.config[URL]+calendar_id+r'/events'+'?key='+self.config[TOKEN]+
             '&timeMin='+datetime.datetime.utcnow().isoformat()+'Z'+
             '&timeMax='+( datetime.datetime.utcnow()+datetime.timedelta(seconds=self.threaded_period) ).isoformat()+'Z'+
             '&maxResults=%s' % MAX_RESULTS +
             '&singleEvents=True&orderBy=startTime')
             # print(url)
             events = requests.get(url)
             events = json.loads(events.text)
             # print(json.dumps(events, indent=2))
             items = events['items']
             for item in items:
                 if item['id'] not in self.data.content[calendar_id][self.SEEN]:
                     self.data.content[calendar_id][self.SEEN].append(item['id'])
                     description = '__**%s**__' % item['summary']
                     if 'dateTime' in item['start']:
                         datething = 'dateTime'
                     else:
                         datething = 'date'
                     description+='\n' + self.process_date(item['start'][datething]) + ' to ' + self.process_date(item['end'][datething]) + ' UTC\n'
                     cal_embed = embed.create_embed( description=description,
                     author={'name':'Google Calendar (Upcoming)', 'url':item['htmlLink'], 'icon_url':GOOGLE_LOGO},
                     footer={'text':item['organizer']['displayName'], 'icon_url':None} )
                     for discord_channel in self.data.content[calendar_id][self.CHANNELS]:
                         q.put({self.SEND_MESSAGE:{plugin.ARGS:[discord.Object(id=discord_channel)], plugin.KWARGS:{'embed':cal_embed}}})
         except:
             # TODO: log request failure
             # print('Failed on %s' %calendar_id)
             # traceback.print_exc()
             pass
     self.data.save()

Beispiel #3

0

Datei anzeigen

    def threaded_action(self, q, ocr_q=plugin.Queue()):
        while not ocr_q.empty():
            item = ocr_q.get()
            discord_channel = discord.Object(id=item['channel'])
            mention = item['mention']
            img_url = item['img']
            language = item['lang']
            msg_content = COMPLETE_MSG.format(mention=mention)
            # download img
            resp = requests.get(img_url)
            with open(IMG_TMP_LOC, 'wb') as f:
                f.write(resp.content)
            # process img with ocr
            try:
                text = pytesseract.image_to_string(Image.open(IMG_TMP_LOC))
            except pytesseract.pytesseract.TesseractNotFoundError as e:
                text = 'Text to Image conversion failed.\nError: `%s`' % e
                success = False
            else:
                success = True

            if success:
                txt_filename = TEXT_TMP_LOC_START + str(self.text_tmp_index)
                # create attachment text file
                with open(txt_filename, 'w') as f:
                    f.write(text)
                q.put({
                    self.SEND_FILE: {
                        plugin.ARGS: [discord_channel, txt_filename],
                        plugin.KWARGS: {
                            'filename': 'text_from_img.txt',
                            'content': msg_content
                        }
                    }
                })
                self.text_tmp_index += 1
            else:
                q.put({
                    self.SEND_MESSAGE: {
                        plugin.ARGS: [discord_channel],
                        plugin.KWARGS: {
                            'content': text
                        }
                    }
                })

Beispiel #4

0

Datei anzeigen

Datei: scraperff.py Projekt: IdeaBot/web_scraper

    def threaded_action(self, q, newForum=plugin.Queue(), **kwargs):
        # handle new items from url_adder
        while not newForum.empty():
            item = newForum.get()
            if item["action"] == "remove" or item["action"] == "delete":
                try:
                    del (self.data.content[item["url"]][self.CHANNELS][
                        self.data.content[item["url"]][self.CHANNELS].index(
                            item[self.CHANNEL])])
                    if self.data.content[item["url"]][self.CHANNELS] == list():
                        del (self.data.content[item["url"]])
                except (TypeError, KeyError, ValueError, NameError):
                    # traceback.print_exc()
                    pass
            elif item["action"] == "add":
                forumLog.debug("Adding " + item['url'])
                if not item['url'].endswith(r"/rss/public"):
                    item['url'].strip(r"/")
                    item['url'] += r"/rss/public"
                if item["url"] not in self.data.content:
                    self.data.content[item["url"]] = {
                        self.CHANNELS: list(),
                        self.MOST_RECENT: self.FIRST
                    }  # dict
                self.data.content[item["url"]][self.CHANNELS].append(
                    item[self.CHANNEL])
        # do scrape things
        for forum in self.data.content:
            forumLog.debug("Now scraping %s" % forum)
            mostrecentrunstart = time.time()
            try:
                rss = BeautifulSoup(
                    pageRet.pageRet(forum).decode(),
                    "html.parser")  # landing page
                items = rss.find_all("item")
                threads = [[
                    x.find("guid").get_text(),
                    x.find("title").get_text()
                ] for x in items]  # list of [url, thread title]

                if self.is_new_thread(threads[0][0], forum):
                    newestint = self.get_trailing_int(
                        self.get_most_recent(forum))
                    if self.get_most_recent(forum) == self.FIRST:
                        threads = [threads[0]]
                    for i in threads:
                        if self.get_trailing_int(i[0]) > newestint:
                            forumLog.debug("New thread found: " + i[0])
                            #scrape stuff
                            recentThread = BeautifulSoup(
                                pageRet.pageRet(i[0]).decode(), "html.parser")
                            authors = []
                            for x in recentThread.find_all(
                                    "div", class_="mini-profile"):
                                try:
                                    authors.append({
                                        "name":
                                        x.find("a").get_text(),
                                        "url":
                                        x.find("a").get("href"),
                                        "img":
                                        x.find("div", class_="avatar").find(
                                            "img").get("src")
                                    })
                                except AttributeError:  # if author is a guest, x.find("a") will return a NoneType, and None.get("href") will raise an AttributeError
                                    pass
                            #authors = [x.find("a").get("href") for x in recentThread.find_all("div", class_="mini-profile")]
                            thread = [i[0], authors]
                            for discord_channel in self.data.content[forum][
                                    self.CHANNELS]:
                                q.put({
                                    self.SEND_MESSAGE: {
                                        plugin.ARGS:
                                        [discord.Object(id=discord_channel)],
                                        plugin.KWARGS: {
                                            'embed':
                                            embed.create_embed(
                                                description="In: " + thread[0],
                                                author={
                                                    "name":
                                                    thread[1][-1]["name"],
                                                    "url":
                                                    forum +
                                                    thread[1][-1]["url"],
                                                    "icon_url":
                                                    None
                                                },
                                                footer={
                                                    "text": "Forum",
                                                    "icon_url": None
                                                })
                                        }
                                    }
                                })
                                # q.put([i[0], authors])
                        else:
                            break
                    # self.delete_entry("most recent thread:", forum)
                    self.data.content[forum][self.MOST_RECENT] = threads[0][0]
                    forumLog.debug("Most recent thread is now: " +
                                   threads[0][0])
                forumLog.debug("Finished scraping run in " +
                               str(time.time() - mostrecentrunstart))
            except:
                # Prevent a failed run from crashing the whole thread
                # traceback.print_exc()
                forumLog.warning(
                    "Scraping run failed for %s. Either the page has changed or the page is unavailable..."
                    % forum)
        self.data.save()

Beispiel #5

0

Datei anzeigen

    def threaded_action(self, q, newChannel=plugin.Queue()):
        '''Keeps a channel updated by sending messages about changes in YT channels periodically'''
        '''This should
        - send a message for logarithmic (exponential?) milestones (when n goes up an integer in Subscribers=10^n)
        - send a message for new videos
        - ???
        '''
        # handle new items from url_adder
        while not newChannel.empty():
            item = newChannel.get()
            if item["action"] == "remove" or item["action"] == "delete":
                try:
                    del (self.data.content[item["id"]][self.CHANNELS][
                        self.data.content[item["id"]][self.CHANNELS].index(
                            item[self.CHANNEL])])
                    if self.data.content[item["id"]][self.CHANNELS] == list():
                        del (self.data.content[item["id"]])
                except (TypeError, KeyError, ValueError, NameError):
                    # traceback.print_exc()
                    pass
            elif item["action"] == "add":
                if item["id"] not in self.data.content:
                    self.data.content[item["id"]] = {
                        self.CHANNELS: list()
                    }  # dict
                self.data.content[item["id"]][self.CHANNELS].append(
                    item[self.CHANNEL])
        # do scrape things
        for channel_id in self.data.content:
            try:
                channel_info = json.loads(
                    requests.get(self.CHANNEL_URL + channel_id).text
                )  # one liners ftw (API GET to URL is loaded as JSON)
                uploads_id = channel_info['items'][0]['contentDetails'][
                    'relatedPlaylists']['uploads']
                uploads_info = json.loads(
                    requests.get(self.UPLOADS_URL + uploads_id).text)

                # get uploads playlist url
                upload_iframe = uploads_info['items'][0]['player']['embedHtml']
                upload_url = upload_iframe.split('src=')[1].split(
                    ' ')[0].strip("\'\"").replace('embed/videoseries',
                                                  'playlist')

                # sub milestone check
                subscriberCount = channel_info['items'][0]['statistics'][
                    'subscriberCount']
                if self.is_new_milestone(subscriberCount, channel_id):
                    for discord_channel in self.data.content[channel_id][
                            self.CHANNELS]:
                        q.put({
                            self.SEND_MESSAGE: {
                                plugin.ARGS:
                                [discord.Object(id=discord_channel)],
                                plugin.KWARGS: {
                                    'embed':
                                    self.get_new_milestone(
                                        channel_info, channel_id)
                                }
                            }
                        })

                # new upload check
                videoCount = channel_info['items'][0]['statistics'][
                    'videoCount']
                if self.new_upload(videoCount, channel_id):
                    # message = "IdeaProject has uploaded a new video! Go check it out here: %s" % upload_url
                    for discord_channel in self.data.content[channel_id][
                            self.CHANNELS]:
                        q.put({
                            self.SEND_MESSAGE: {
                                plugin.ARGS:
                                [discord.Object(id=discord_channel)],
                                plugin.KWARGS: {
                                    'embed': self.get_new_upload(uploads_id)
                                }
                            }
                        })

            except:
                print('Failed to scrape YT API for channel %s ' % channel_id)

Beispiel #6

0

Datei anzeigen

Datei: scrapert.py Projekt: IdeaBot/web_scraper

    def threaded_action(self, q, newTwit=plugin.Queue(), **kwargs):
        '''(ThreadedPlugin, Queue ) -> None
        Checks continuously for new tweets from the official twitter.
        This should be run in a different thread since it is blocking (it's a while loop ffs)'''
        # handle new items from url_adder
        while not newTwit.empty():
            item = newTwit.get()
            item['url']=item['url'].strip('/').lower()
            if item["action"] == "remove" or item["action"] == "delete":
                try:
                    del(self.data.content[item["url"]][self.CHANNELS][self.data.content[item["url"]][self.CHANNELS].index(item[self.CHANNEL])])
                    if self.data.content[item["url"]][self.CHANNELS] == list():
                        del(self.data.content[item["url"]])
                except (TypeError, KeyError, ValueError, NameError):
                    # traceback.print_exc()
                    pass
            elif item["action"]=="add":
                twitLog.debug("Adding "+item['url'])
                if item["url"] not in self.data.content:
                    self.data.content[item["url"]]={self.CHANNELS:list(), self.MOST_RECENT:self.FIRST, self.MOST_RECENT2:self.FIRST} # dict
                self.data.content[item["url"]][self.CHANNELS].append(item[self.CHANNEL])
        # do scrape things
        for twitAccount in self.data.content: # twitAccount is the user's account URL
            twitLog.debug("Now scraping "+twitAccount)
            mostrecentrunstart = time.time()
            try:
                author = self.get_twitter_user_from_url(twitAccount)
                twitLog.debug("URL: "+RSS_URL_START+author)
                rss = BeautifulSoup(pageRet.pageRet(RSS_URL_START+author).decode(), "html.parser") # rss page
                items = rss.find_all("item")
                #print(items)
                tweets = [[self.get_url(x), self.get_tweet(x), x] for x in items] # create list of [url to tweet, tweet content]
                pinned_tweet = tweets[0]
                tweets = tweets[1:] # remove first tweet since it's pinned

                if len(tweets)>1 and self.is_new_tweet(tweets[0][0], twitAccount) and self.is_new_tweet(tweets[1][0], twitAccount, second=True):
                    if self.data.content[twitAccount][self.MOST_RECENT]==self.FIRST:
                        tweets=tweets[0:2]
                    for i in tweets:
                        if self.is_new_tweet(i[0], twitAccount):
                            twitLog.debug("New tweet found: " + i[0])
                            tweet_author = self.get_author(i[2])
                            tweet = {"url":i[0], "content":i[1], "author":tweet_author, "retweet":False}
                            # search for picture in content
                            img_link = self.get_image(i[2])
                            img=None
                            if img_link is not None: # set pic
                                img = {'url':img_link}
                                tweet['content']=tweet['content'].replace(img_link, '')
                                tweet['content']=re.sub(r'pic\.twitter\.com/([\w\d]+)', '', tweet['content'], re.I)
                            if author.lower() != tweet_author.lower():
                                tweet["retweet"] = True
                                em = embed.create_embed(image=img, author={"name":author+" retweeted "+tweet["author"], "url":tweet["url"], 'icon_url':None}, description=tweet["content"], footer={"text":"Twitter", "icon_url":TWITTER_LOGO})
                            else:
                                good_author=tweet["author"]
                                em = embed.create_embed(image=img, author={"name":good_author, "url":tweet["url"], 'icon_url':None}, description=tweet["content"], footer={"text":"Twitter", "icon_url":TWITTER_LOGO})
                            for discord_channel in self.data.content[twitAccount][self.CHANNELS]:
                                params= {self.SEND_MESSAGE:{plugin.ARGS:[discord.Object(id=discord_channel)], plugin.KWARGS:{'embed':em}}}
                                q.put(params)
                                #q.put(q_entry)
                        else:
                            break
                    # self.delete_entry("most recent tweet:")
                    if good_author!= author: # fix author capitalisation if necessary
                        good_twitAccount = twitAccount.replace(author, good_author)
                        self.data.content[good_twitAccount] = self.data.content[twitAccount]
                        self.data.content[twitAccount] = None
                        del(self.data.content[twitAccount])
                        twitAccount = good_twitAccount
                    self.data.content[twitAccount][self.MOST_RECENT]=tweets[0][0]
                    self.data.content[twitAccount][self.MOST_RECENT2]=tweets[1][0]
                    twitLog.debug("Most recent tweet is now: " + tweets[0][0])
                    twitLog.debug("Second most recent tweet is now: " + tweets[1][0])
                twitLog.debug("Finished scraping run in "+ str(time.time() - mostrecentrunstart))
            except:
                # Prevent a failed run from crashing the whole thread
                twitLog.warning("Scraping run failed. Either the page has changed or the page is unavailable...")
                # traceback.print_exc()
        self.data.save()

Beispiel #7

0

Datei anzeigen

 def __init__(self, *args, **kwargs):
     super().__init__(*args, should_spawn_thread=False, **kwargs)
     self.public_namespace.ocr_q = plugin.Queue()
     self.text_tmp_index = 0
     self.threaded_kwargs = {"ocr_q": self.public_namespace.ocr_q}
     self.spawn_process()