def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.public_namespace.newRedditThread = plugin.Queue() self.public_namespace.newYTChannel = plugin.Queue() self.public_namespace.newTwitterChannel = plugin.Queue() self.public_namespace.newProBoardsForum = plugin.Queue() self.public_namespace.newGCal = plugin.Queue()
def threaded_action(self, q, newGCal=plugin.Queue()): # handle new items from url_adder while not newGCal.empty(): item = newGCal.get() item["id"]=item["id"].split('%40group.calendar.google.com')[0] item["id"]=item["id"]+"@group.calendar.google.com" if item["action"] == "remove" or item["action"] == "delete": try: del(self.data.content[item["id"]][self.CHANNELS][self.data.content[item["id"]][self.CHANNELS].index(item[self.CHANNEL])]) except (TypeError, KeyError, ValueError, NameError): # traceback.print_exc() pass elif item["action"]=="add": if item["id"] not in self.data.content: self.data.content[item["id"]]={self.CHANNELS:list(), self.SEEN:list()} # dict self.data.content[item["id"]][self.CHANNELS].append(item[self.CHANNEL]) # do scrape things for calendar_id in self.data.content: try: url = (self.config[URL]+calendar_id+r'/events'+'?key='+self.config[TOKEN]+ '&timeMin='+datetime.datetime.utcnow().isoformat()+'Z'+ '&timeMax='+( datetime.datetime.utcnow()+datetime.timedelta(seconds=self.threaded_period) ).isoformat()+'Z'+ '&maxResults=%s' % MAX_RESULTS + '&singleEvents=True&orderBy=startTime') # print(url) events = requests.get(url) events = json.loads(events.text) # print(json.dumps(events, indent=2)) items = events['items'] for item in items: if item['id'] not in self.data.content[calendar_id][self.SEEN]: self.data.content[calendar_id][self.SEEN].append(item['id']) description = '__**%s**__' % item['summary'] if 'dateTime' in item['start']: datething = 'dateTime' else: datething = 'date' description+='\n' + self.process_date(item['start'][datething]) + ' to ' + self.process_date(item['end'][datething]) + ' UTC\n' cal_embed = embed.create_embed( description=description, author={'name':'Google Calendar (Upcoming)', 'url':item['htmlLink'], 'icon_url':GOOGLE_LOGO}, footer={'text':item['organizer']['displayName'], 'icon_url':None} ) for discord_channel in self.data.content[calendar_id][self.CHANNELS]: q.put({self.SEND_MESSAGE:{plugin.ARGS:[discord.Object(id=discord_channel)], plugin.KWARGS:{'embed':cal_embed}}}) except: # TODO: log request failure # print('Failed on %s' %calendar_id) # traceback.print_exc() pass self.data.save()
def threaded_action(self, q, ocr_q=plugin.Queue()): while not ocr_q.empty(): item = ocr_q.get() discord_channel = discord.Object(id=item['channel']) mention = item['mention'] img_url = item['img'] language = item['lang'] msg_content = COMPLETE_MSG.format(mention=mention) # download img resp = requests.get(img_url) with open(IMG_TMP_LOC, 'wb') as f: f.write(resp.content) # process img with ocr try: text = pytesseract.image_to_string(Image.open(IMG_TMP_LOC)) except pytesseract.pytesseract.TesseractNotFoundError as e: text = 'Text to Image conversion failed.\nError: `%s`' % e success = False else: success = True if success: txt_filename = TEXT_TMP_LOC_START + str(self.text_tmp_index) # create attachment text file with open(txt_filename, 'w') as f: f.write(text) q.put({ self.SEND_FILE: { plugin.ARGS: [discord_channel, txt_filename], plugin.KWARGS: { 'filename': 'text_from_img.txt', 'content': msg_content } } }) self.text_tmp_index += 1 else: q.put({ self.SEND_MESSAGE: { plugin.ARGS: [discord_channel], plugin.KWARGS: { 'content': text } } })
def threaded_action(self, q, newForum=plugin.Queue(), **kwargs): # handle new items from url_adder while not newForum.empty(): item = newForum.get() if item["action"] == "remove" or item["action"] == "delete": try: del (self.data.content[item["url"]][self.CHANNELS][ self.data.content[item["url"]][self.CHANNELS].index( item[self.CHANNEL])]) if self.data.content[item["url"]][self.CHANNELS] == list(): del (self.data.content[item["url"]]) except (TypeError, KeyError, ValueError, NameError): # traceback.print_exc() pass elif item["action"] == "add": forumLog.debug("Adding " + item['url']) if not item['url'].endswith(r"/rss/public"): item['url'].strip(r"/") item['url'] += r"/rss/public" if item["url"] not in self.data.content: self.data.content[item["url"]] = { self.CHANNELS: list(), self.MOST_RECENT: self.FIRST } # dict self.data.content[item["url"]][self.CHANNELS].append( item[self.CHANNEL]) # do scrape things for forum in self.data.content: forumLog.debug("Now scraping %s" % forum) mostrecentrunstart = time.time() try: rss = BeautifulSoup( pageRet.pageRet(forum).decode(), "html.parser") # landing page items = rss.find_all("item") threads = [[ x.find("guid").get_text(), x.find("title").get_text() ] for x in items] # list of [url, thread title] if self.is_new_thread(threads[0][0], forum): newestint = self.get_trailing_int( self.get_most_recent(forum)) if self.get_most_recent(forum) == self.FIRST: threads = [threads[0]] for i in threads: if self.get_trailing_int(i[0]) > newestint: forumLog.debug("New thread found: " + i[0]) #scrape stuff recentThread = BeautifulSoup( pageRet.pageRet(i[0]).decode(), "html.parser") authors = [] for x in recentThread.find_all( "div", class_="mini-profile"): try: authors.append({ "name": x.find("a").get_text(), "url": x.find("a").get("href"), "img": x.find("div", class_="avatar").find( "img").get("src") }) except AttributeError: # if author is a guest, x.find("a") will return a NoneType, and None.get("href") will raise an AttributeError pass #authors = [x.find("a").get("href") for x in recentThread.find_all("div", class_="mini-profile")] thread = [i[0], authors] for discord_channel in self.data.content[forum][ self.CHANNELS]: q.put({ self.SEND_MESSAGE: { plugin.ARGS: [discord.Object(id=discord_channel)], plugin.KWARGS: { 'embed': embed.create_embed( description="In: " + thread[0], author={ "name": thread[1][-1]["name"], "url": forum + thread[1][-1]["url"], "icon_url": None }, footer={ "text": "Forum", "icon_url": None }) } } }) # q.put([i[0], authors]) else: break # self.delete_entry("most recent thread:", forum) self.data.content[forum][self.MOST_RECENT] = threads[0][0] forumLog.debug("Most recent thread is now: " + threads[0][0]) forumLog.debug("Finished scraping run in " + str(time.time() - mostrecentrunstart)) except: # Prevent a failed run from crashing the whole thread # traceback.print_exc() forumLog.warning( "Scraping run failed for %s. Either the page has changed or the page is unavailable..." % forum) self.data.save()
def threaded_action(self, q, newChannel=plugin.Queue()): '''Keeps a channel updated by sending messages about changes in YT channels periodically''' '''This should - send a message for logarithmic (exponential?) milestones (when n goes up an integer in Subscribers=10^n) - send a message for new videos - ??? ''' # handle new items from url_adder while not newChannel.empty(): item = newChannel.get() if item["action"] == "remove" or item["action"] == "delete": try: del (self.data.content[item["id"]][self.CHANNELS][ self.data.content[item["id"]][self.CHANNELS].index( item[self.CHANNEL])]) if self.data.content[item["id"]][self.CHANNELS] == list(): del (self.data.content[item["id"]]) except (TypeError, KeyError, ValueError, NameError): # traceback.print_exc() pass elif item["action"] == "add": if item["id"] not in self.data.content: self.data.content[item["id"]] = { self.CHANNELS: list() } # dict self.data.content[item["id"]][self.CHANNELS].append( item[self.CHANNEL]) # do scrape things for channel_id in self.data.content: try: channel_info = json.loads( requests.get(self.CHANNEL_URL + channel_id).text ) # one liners ftw (API GET to URL is loaded as JSON) uploads_id = channel_info['items'][0]['contentDetails'][ 'relatedPlaylists']['uploads'] uploads_info = json.loads( requests.get(self.UPLOADS_URL + uploads_id).text) # get uploads playlist url upload_iframe = uploads_info['items'][0]['player']['embedHtml'] upload_url = upload_iframe.split('src=')[1].split( ' ')[0].strip("\'\"").replace('embed/videoseries', 'playlist') # sub milestone check subscriberCount = channel_info['items'][0]['statistics'][ 'subscriberCount'] if self.is_new_milestone(subscriberCount, channel_id): for discord_channel in self.data.content[channel_id][ self.CHANNELS]: q.put({ self.SEND_MESSAGE: { plugin.ARGS: [discord.Object(id=discord_channel)], plugin.KWARGS: { 'embed': self.get_new_milestone( channel_info, channel_id) } } }) # new upload check videoCount = channel_info['items'][0]['statistics'][ 'videoCount'] if self.new_upload(videoCount, channel_id): # message = "IdeaProject has uploaded a new video! Go check it out here: %s" % upload_url for discord_channel in self.data.content[channel_id][ self.CHANNELS]: q.put({ self.SEND_MESSAGE: { plugin.ARGS: [discord.Object(id=discord_channel)], plugin.KWARGS: { 'embed': self.get_new_upload(uploads_id) } } }) except: print('Failed to scrape YT API for channel %s ' % channel_id)
def threaded_action(self, q, newTwit=plugin.Queue(), **kwargs): '''(ThreadedPlugin, Queue ) -> None Checks continuously for new tweets from the official twitter. This should be run in a different thread since it is blocking (it's a while loop ffs)''' # handle new items from url_adder while not newTwit.empty(): item = newTwit.get() item['url']=item['url'].strip('/').lower() if item["action"] == "remove" or item["action"] == "delete": try: del(self.data.content[item["url"]][self.CHANNELS][self.data.content[item["url"]][self.CHANNELS].index(item[self.CHANNEL])]) if self.data.content[item["url"]][self.CHANNELS] == list(): del(self.data.content[item["url"]]) except (TypeError, KeyError, ValueError, NameError): # traceback.print_exc() pass elif item["action"]=="add": twitLog.debug("Adding "+item['url']) if item["url"] not in self.data.content: self.data.content[item["url"]]={self.CHANNELS:list(), self.MOST_RECENT:self.FIRST, self.MOST_RECENT2:self.FIRST} # dict self.data.content[item["url"]][self.CHANNELS].append(item[self.CHANNEL]) # do scrape things for twitAccount in self.data.content: # twitAccount is the user's account URL twitLog.debug("Now scraping "+twitAccount) mostrecentrunstart = time.time() try: author = self.get_twitter_user_from_url(twitAccount) twitLog.debug("URL: "+RSS_URL_START+author) rss = BeautifulSoup(pageRet.pageRet(RSS_URL_START+author).decode(), "html.parser") # rss page items = rss.find_all("item") #print(items) tweets = [[self.get_url(x), self.get_tweet(x), x] for x in items] # create list of [url to tweet, tweet content] pinned_tweet = tweets[0] tweets = tweets[1:] # remove first tweet since it's pinned if len(tweets)>1 and self.is_new_tweet(tweets[0][0], twitAccount) and self.is_new_tweet(tweets[1][0], twitAccount, second=True): if self.data.content[twitAccount][self.MOST_RECENT]==self.FIRST: tweets=tweets[0:2] for i in tweets: if self.is_new_tweet(i[0], twitAccount): twitLog.debug("New tweet found: " + i[0]) tweet_author = self.get_author(i[2]) tweet = {"url":i[0], "content":i[1], "author":tweet_author, "retweet":False} # search for picture in content img_link = self.get_image(i[2]) img=None if img_link is not None: # set pic img = {'url':img_link} tweet['content']=tweet['content'].replace(img_link, '') tweet['content']=re.sub(r'pic\.twitter\.com/([\w\d]+)', '', tweet['content'], re.I) if author.lower() != tweet_author.lower(): tweet["retweet"] = True em = embed.create_embed(image=img, author={"name":author+" retweeted "+tweet["author"], "url":tweet["url"], 'icon_url':None}, description=tweet["content"], footer={"text":"Twitter", "icon_url":TWITTER_LOGO}) else: good_author=tweet["author"] em = embed.create_embed(image=img, author={"name":good_author, "url":tweet["url"], 'icon_url':None}, description=tweet["content"], footer={"text":"Twitter", "icon_url":TWITTER_LOGO}) for discord_channel in self.data.content[twitAccount][self.CHANNELS]: params= {self.SEND_MESSAGE:{plugin.ARGS:[discord.Object(id=discord_channel)], plugin.KWARGS:{'embed':em}}} q.put(params) #q.put(q_entry) else: break # self.delete_entry("most recent tweet:") if good_author!= author: # fix author capitalisation if necessary good_twitAccount = twitAccount.replace(author, good_author) self.data.content[good_twitAccount] = self.data.content[twitAccount] self.data.content[twitAccount] = None del(self.data.content[twitAccount]) twitAccount = good_twitAccount self.data.content[twitAccount][self.MOST_RECENT]=tweets[0][0] self.data.content[twitAccount][self.MOST_RECENT2]=tweets[1][0] twitLog.debug("Most recent tweet is now: " + tweets[0][0]) twitLog.debug("Second most recent tweet is now: " + tweets[1][0]) twitLog.debug("Finished scraping run in "+ str(time.time() - mostrecentrunstart)) except: # Prevent a failed run from crashing the whole thread twitLog.warning("Scraping run failed. Either the page has changed or the page is unavailable...") # traceback.print_exc() self.data.save()
def __init__(self, *args, **kwargs): super().__init__(*args, should_spawn_thread=False, **kwargs) self.public_namespace.ocr_q = plugin.Queue() self.text_tmp_index = 0 self.threaded_kwargs = {"ocr_q": self.public_namespace.ocr_q} self.spawn_process()