async def rss_yt(self,guild,identifiant,date=None): if identifiant=='help': return await self.translate(guild,"rss","yt-help") url = 'https://www.youtube.com/feeds/videos.xml?channel_id='+identifiant feeds = feedparser.parse(url) if feeds.entries==[]: url = 'https://www.youtube.com/feeds/videos.xml?user='******'media_thumbnail' in feed.keys() and len(feed['media_thumbnail'])>0: img_url = feed['media_thumbnail'][0]['url'] obj = self.rssMessage(bot=self.bot,Type='yt',url=feed['link'],title=feed['title'],emojis=self.bot.cogs['EmojiCog'].customEmojis,date=feed['published_parsed'],author=feed['author'],image=img_url) return [obj] else: liste = list() for feed in feeds.entries: if datetime.datetime(*feed['published_parsed'][:6]) <= date: break img_url = None if 'media_thumbnail' in feed.keys() and len(feed['media_thumbnail'])>0: img_url = feed['media_thumbnail'][0]['url'] obj = self.rssMessage(bot=self.bot,Type='yt',url=feed['link'],title=feed['title'],emojis=self.bot.cogs['EmojiCog'].customEmojis,date=feed['published_parsed'],author=feed['author'],image=img_url) liste.append(obj) liste.reverse() return liste
async def rss_tw(self,guild,nom,date=None): if nom == 'help': return await self.translate(guild,"rss","tw-help") url = self.twitter_api_url+nom feeds = feedparser.parse(url) if feeds.entries==[]: url = self.twitter_api_url+nom.capitalize() feeds = feedparser.parse(url) if feeds.entries==[]: url = self.twitter_api_url+nom.lower() feeds = feedparser.parse(url) if feeds.entries==[]: return await self.translate(guild,"rss","nothing") if len(feeds.entries)>1: while feeds.entries[0]['published_parsed'] < feeds.entries[1]['published_parsed']: del feeds.entries[0] if len(feeds.entries)==1: break if not date: feed = feeds.entries[0] r = re.search(r"(pic.twitter.com/[^\s]+)",feed['title']) if r != None: t = feed['title'].replace(r.group(1),'') else: t = feed['title'] author = feed['author'].replace('(','').replace(')','') rt = None if author.replace('@','') not in url: rt = url.split("=")[1] obj = self.rssMessage(bot=self.bot,Type='tw',url=feed['link'],title=t,emojis=self.bot.cogs['EmojiCog'].customEmojis,date=feed['published_parsed'],author=author,retweeted_by=rt,channel=feeds.feed['title']) return [obj] else: liste = list() for feed in feeds.entries: if datetime.datetime(*feed['published_parsed'][:6]) <= date: break author = feed['author'].replace('(','').replace(')','') rt = None if author.replace('@','') not in url: rt = url.split("=")[1] if rt != None: t = feed['title'].replace(rt,'') else: t = feed['title'] obj = self.rssMessage(bot=self.bot,Type='tw',url=feed['link'],title=t,emojis=self.bot.cogs['EmojiCog'].customEmojis,date=feed['published_parsed'],author=author,retweeted_by=rt,channel= feeds.feed['title']) liste.append(obj) liste.reverse() return liste
def check_site(self): rss = None must_save = False try: self.lock = True rss = feedparser.parse("http://www.mspaintadventures.com/rss/rss.xml") except: return finally: self.lock = False if len(rss.entries) == 0: return entries = sorted(rss.entries,key=(lambda x: mktime(x.updated_parsed))) if self.status == None: self.status = {} self.status['last_visited'] = {'pubdate':mktime(entries[-1].updated_parsed),'link':entries[-1].link} self.status['last_seen'] = {'pubdate':mktime(entries[-1].updated_parsed),'link':entries[-1].link} must_save = True elif mktime(entries[-1].updated_parsed) > self.status['last_seen']['pubdate']: #This is the first time the app itself has noticed this update. self.status['last_seen'] = {'pubdate':mktime(entries[-1].updated_parsed),'link':entries[-1].link} must_save = True if self.status['last_seen']['pubdate'] > self.status['last_visited']['pubdate']: if not hasattr(self, "mspa"): self.mspa = None if not self.mspa: self.mspa = MSPAUpdateWindow(self.parent()) self.mspa.accepted.connect(self.visit_site) self.mspa.rejected.connect(self.nothing) self.mspa.show() else: #print "No new updates :(" pass if must_save: self.save_state()
def add_new_posts(last_updated=None): """Run this on a cron""" for blog in Blog.objects.all(): try: document = feedparser.parse(blog.feed_url) except: print "error parsing" continue if last_updated is None: print("- Adding %i articles from %s" % (len(document['entries']), blog.title)) for entry in document['entries']: # now we create a new post post = Post() post.blog = blog post.title = entry['title'] if 'summary' in entry: post.content = entry['summary'] if 'content' in entry: post.content = entry['content'] post.link = entry['link'] post.save() else: # TODO: only parse from a date pass
async def rss_twitch(self,guild,nom,date=None): url = 'https://twitchrss.appspot.com/vod/'+nom feeds = feedparser.parse(url) if feeds.entries==[]: return await self.translate(guild,"rss","nothing") if not date: feed = feeds.entries[0] r = re.search(r'<img src="([^"]+)" />',feed['summary']) img_url = None if r != None: img_url = r.group(1) obj = self.rssMessage(bot=self.bot,Type='twitch',url=feed['link'],title=feed['title'],emojis=self.bot.cogs['EmojiCog'].customEmojis,date=feed['published_parsed'],author=feeds.feed['title'].replace("'s Twitch video RSS",""),image=img_url) return [obj] else: liste = list() for feed in feeds.entries: if datetime.datetime(*feed['published_parsed'][:6]) <= date: break r = re.search(r'<img src="([^"]+)" />',feed['summary']) img_url = None if r != None: img_url = r.group(1) obj = self.rssMessage(bot=self.bot,Type='twitch',url=feed['link'],title=feed['title'],emojis=self.bot.cogs['EmojiCog'].customEmojis,date=feed['published_parsed'],author=feeds.feed['title'].replace("'s Twitch video RSS",""),image=img_url) liste.append(obj) liste.reverse() return liste
def post(self): from libs.feedparser import parse user = users.get_current_user() url = self.request.get('url') p = parse(str(url)) try: d = p['items'][0] except IndexError: pass if user: q = Feeds.query(Feeds.user == user, Feeds.url == url) if q.get() is None: feed = Feeds() def txn(): feed.blog = p.feed.title feed.root = p.feed.link feed.user = user feed.feed = url feed.url = d.link feed.put() ndb.transaction(txn) deferred.defer(utils.new_bm, d, feed.key, _queue="admin") self.redirect(self.request.referer) else: self.redirect('/')
def pop_feed(feedk): feed = feedk.get() result = urlfetch.fetch(str(feed.feed), deadline=60) d = parse(result.content) e = 0 try: entry = d['items'][e] while feed.last_id.encode('utf-8') != entry.id.encode('utf-8'): u = feed.user t = entry['title'] o = entry['link'] try: c = entry['content'][0].value except KeyError: try: c = entry['description'] except KeyError: c = 'no comment' deferred.defer(submit_bm, feedk, u, t, o, c, _queue='bookmark') e += 1 entry = d['items'][e] feed.last_id = d['items'][0].id.encode('utf-8') feed.put() except IndexError: pass
def get(self): keywords = ['python', 'php', 'javascript', 'wordpress', 'nodejs'] output="" feeds = [ 'http://www.pittsource.com/all_jobs.atom', 'http://rss.indeed.com/rss?q=(' + '%20OR%20'.join(keywords) + ')&l=Pittsburgh%2C+PA', 'http://pghcareerconnector.com/jobs/?display=rss&keywords=' + '%20OR%20'.join(keywords) + '&filter=%2BSTATE_PROVINCE%3Apennsylvania%20%2BSHOW_AT%3A766827&resultsPerPage=1000', 'https://pittsburgh.craigslist.org/search/web?format=rss&query=' + '%20|%20'.join(keywords), 'https://pittsburgh.craigslist.org/search/eng?format=rss&query=' + '%20|%20'.join(keywords), 'https://pittsburgh.craigslist.org/search/sof?format=rss&query=' + '%20|%20'.join(keywords), 'https://pittsburgh.craigslist.org/search/cpg?format=rss&query=' + '%20|%20'.join(keywords), ] entries = [] for feed in feeds: d = feedparser.parse(feed) entries.extend(d["items"]) sorted_entries = sorted(entries, key=lambda entry: entry["date_parsed"]) sorted_entries.reverse() # for most recent entries first items = [ PyRSS2Gen.RSSItem( title = x.title, link = x.link, description = x.description, guid = x.link, pubDate = datetime.datetime( x.modified_parsed[0], x.modified_parsed[1], x.modified_parsed[2], x.modified_parsed[3], x.modified_parsed[4], x.modified_parsed[5]) ) for x in sorted_entries ] # make the RSS2 object # Try to grab the title, link, language etc from the orig feed rss = PyRSS2Gen.RSS2( title="JKirchartz's Pittsburgh Jobs Feed", link="http://tools.jkirchartz.com/jobfeed", description="JKirchartz's job searches everywhere in town", lastBuildDate= datetime.datetime.now(), items = items ) self.response.headers['Content-Type'] = 'application/rss+xml' self.response.out.write(rss.to_xml(encoding = 'utf-8')) return
def get(self): output = "" feeds = [ "http://jkirchartz.com/rss.xml", "http://glitches.jkirchartz.com/rss", "http://tools.jkirchartz.com/researchfeed", "http://stash.jkirchartz.com/rss", "https://github.com/JKirchartz.atom", "http://stackoverflow.com/feeds/user/276250", ] entries = [] for feed in feeds: d = feedparser.parse(feed) entries.extend(d["items"]) sorted_entries = sorted(entries, key=lambda entry: entry["date_parsed"]) sorted_entries.reverse() # for most recent entries first items = [ PyRSS2Gen.RSSItem( title=x.title, link=x.link, description=x.description, guid=x.link, pubDate=datetime.datetime( x.modified_parsed[0], x.modified_parsed[1], x.modified_parsed[2], x.modified_parsed[3], x.modified_parsed[4], x.modified_parsed[5], ), ) for x in sorted_entries ] # make the RSS2 object # Try to grab the title, link, language etc from the orig feed rss = PyRSS2Gen.RSS2( title="JKirchartz's MegaFeed", link="http://tools.jkirchartz.com/megafeed", description="JKirchartz's feeds from everywhere", lastBuildDate=datetime.datetime.now(), items=items, ) self.response.headers["Content-Type"] = "application/rss+xml" self.response.out.write(rss.to_xml(encoding="utf-8")) return
def post(self): user = users.get_current_user() feed = self.request.get('url') q = Feeds.query(Feeds.user == user, Feeds.feed == feed) if user and q.get() == None: d = parse(str(feed)) feed_k = Feeds(feed=feed, title=d['channel']['title'], link=d['channel']['link'], user=user, last_id=d['items'][0].id).put() deferred.defer(pop_feed, feed_k, _queue="user") self.redirect('/feeds') else: self.redirect('/')
async def check_rss_url(self,url): r = await self.parse_yt_url(url) if r!=None: return True r = await self.parse_tw_url(url) if r!=None: return True r = await self.parse_twitch_url(url) if r!=None: return True try: f = feedparser.parse(url) _ = f.entries[0] return True except: return False
def pop_feed(feedk): from libs.feedparser import parse feed = feedk.get() f = urlfetch.fetch(url="%s" % feed.feed, deadline=60) p = parse(f.content) e = 0 try: d = p['items'][e] while feed.url != d['link'] and e < 15: deferred.defer(new_bm, d, feedk, _target="worker", _queue="importer") e += 1 d = p['items'][e] except IndexError: pass s = p['items'][0] feed.url = s['link'] feed.put()
def get(self): """ cleanup research RSS to be suitable for sharing elsewhere """ output = "" feed = feedparser.parse("http://research.jkirchartz.com/rss") entries = feed['items'] sorted_entries = sorted(entries, key=lambda entry: entry["date_parsed"]) sorted_entries.reverse() # for most recent entries first items = [] for x in sorted_entries: link_rex = re.search(r'href=[\'"]?([^\'" >]+)', x.summary) if link_rex: items.append(PyRSS2Gen.RSSItem( title=x.title, link=link_rex.group(1), description=x.title, guid=x.link, pubDate=datetime.datetime( x.modified_parsed[0], x.modified_parsed[1], x.modified_parsed[2], x.modified_parsed[3], x.modified_parsed[4], x.modified_parsed[5]) )) # make the RSS2 object # Try to grab the title, link, language etc from the orig feed rss = PyRSS2Gen.RSS2( title="JKirchartz's Research Feed", link="http://tools.jkirchartz.com/researchfeed", description="JKirchartz's Research Feed", lastBuildDate=datetime.datetime.now(), items=items ) self.response.headers['Content-Type'] = 'application/rss+xml' self.response.out.write(rss.to_xml()) return
def pop_feed(feedk): feed = feedk.get() result = urlfetch.fetch(str(feed.feed), deadline=60) d = parse(result.content) e = 0 try: entry = d['items'][e] while str(feed.last_id) != str(entry.id): u = feed.user t = entry['title'] o = entry['link'] try: c = entry['description'] except KeyError: c = 'no comment' deferred.defer(submit_bm, feedk, u, t, o, c) e += 1 entry = d['items'][e] except IndexError: pass feed.last_id = str(d.entries[0].id) feed.put()
async def rss_web(self,guild,url,date=None): if url == 'help': return await self.translate(guild,"rss","web-help") try: feeds = feedparser.parse(url,timeout=5) except socket.timeout: return await self.translate(guild,"rss","research-timeout") if 'bozo_exception' in feeds.keys() or len(feeds.entries)==0: return await self.translate(guild,"rss","web-invalid") published = None for i in ['published_parsed','published','updated_parsed']: if i in feeds.entries[0].keys() and feeds.entries[0][i]!=None: published = i break if published!=None and len(feeds.entries)>1: while len(feeds.entries)>1 and feeds.entries[0][published] < feeds.entries[1][published]: del feeds.entries[0] if not date or published != 'published_parsed': feed = feeds.entries[0] if published==None: datz = 'Unknown' else: datz = feed[published] if 'link' in feed.keys(): l = feed['link'] elif 'link' in feeds.keys(): l = feeds['link'] else: l = url if 'author' in feed.keys(): author = feed['author'] elif 'author' in feeds.keys(): author = feeds['author'] elif 'title' in feeds['feed'].keys(): author = feeds['feed']['title'] else: author = '?' if 'title' in feed.keys(): title = feed['title'] elif 'title' in feeds.keys(): title = feeds['title'] else: title = '?' obj = self.rssMessage(bot=self.bot,Type='web',url=l,title=title,emojis=self.bot.cogs['EmojiCog'].customEmojis,date=datz,author=author,channel=feeds.feed['title'] if 'title' in feeds.feed.keys() else '?') return [obj] else: liste = list() for feed in feeds.entries: if published==None: datz = 'Unknown' else: datz = feed[published] if feed['published_parsed']==None or datetime.datetime(*feed['published_parsed'][:6]) <= date: break if 'link' in feed.keys(): l = feed['link'] elif 'link' in feeds.keys(): l = feeds['link'] else: l = url if 'author' in feed.keys(): author = feed['author'] elif 'author' in feeds.keys(): author = feeds['author'] elif 'title' in feeds['feed'].keys(): author = feeds['feed']['title'] else: author = '?' if 'title' in feed.keys(): title = feed['title'] elif 'title' in feeds.keys(): title = feeds['title'] else: title = '?' obj = self.rssMessage(bot=self.bot,Type='web',url=l,title=title,emojis=self.bot.cogs['EmojiCog'].customEmojis,date=datz,author=author,channel= feeds.feed['title']) liste.append(obj) liste.reverse() return liste
async def test_rss(self,ctx,url,*,args=None): """Test if an rss feed is usable""" url = url.replace('<','').replace('>','') try: feeds = feedparser.parse(url,timeout=8) txt = "feeds.keys()\n```py\n{}\n```".format(feeds.keys()) if 'bozo_exception' in feeds.keys(): txt += "\nException ({}): {}".format(feeds['bozo'],str(feeds['bozo_exception'])) return await ctx.send(txt) if len(str(feeds.feed))<1400-len(txt): txt += "feeds.feed\n```py\n{}\n```".format(feeds.feed) else: txt += "feeds.feed.keys()\n```py\n{}\n```".format(feeds.feed.keys()) if len(feeds.entries)>0: if len(str(feeds.entries[0]))<1950-len(txt): txt += "feeds.entries[0]\n```py\n{}\n```".format(feeds.entries[0]) else: txt += "feeds.entries[0].keys()\n```py\n{}\n```".format(feeds.entries[0].keys()) if args != None and 'feeds' in args and 'ctx' not in args: txt += "\n{}\n```py\n{}\n```".format(args,eval(args)) try: await ctx.send(txt) except Exception as e: print("[rss_test] Error:",e) await ctx.send("`Error`: "+str(e)) print(txt) if args==None: ok = '<:greencheck:513105826555363348>' notok = '<:redcheck:513105827817717762>' nothing = '<:_nothing:446782476375949323>' txt = ['**__Analyse :__**',''] yt = await self.parse_yt_url(url) if yt==None: tw = await self.parse_tw_url(url) if tw!=None: txt.append("<:twitter:437220693726330881> "+tw) elif 'link' in feeds.feed.keys(): txt.append(":newspaper: <"+feeds.feed['link']+'>') else: txt.append(":newspaper: No 'link' var") else: txt.append("<:youtube:447459436982960143> "+yt) txt.append("Entrées : {}".format(len(feeds.entries))) if len(feeds.entries)>0: entry = feeds.entries[0] if 'title' in entry.keys(): txt.append(nothing+ok+" title: ") if len(entry['title'].split('\n'))>1: txt[-1] += entry['title'].split('\n')[0]+"..." else: txt[-1] += entry['title'] else: txt.append(nothing+notok+' title') if 'published_parsed' in entry.keys(): txt.append(nothing+ok+" published_parsed") elif 'published' in entry.keys(): txt.append(nothing+ok+" published") elif 'updated_parsed' in entry.keys(): txt.append(nothing+ok+" updated_parsed") else: txt.append(nothing+notok+' date') if 'author' in entry.keys(): txt.append(nothing+ok+" author: "+entry['author']) else: txt.append(nothing+notok+' author') await ctx.send("\n".join(txt)) except Exception as e: await ctx.bot.cogs['ErrorsCog'].on_cmd_error(ctx,e)