def setUp(self): fg = FeedGenerator() self.feedId = 'http://example.com' self.title = 'Some Testfeed' fg.id(self.feedId) fg.title(self.title) fg.link(href='http://lkiesow.de', rel='alternate')[0] fg.description('...') fe = fg.add_entry() fe.id('http://lernfunk.de/media/654321/1') fe.title('The First Episode') fe.content(u'…') # Use also the different name add_item fe = fg.add_item() fe.id('http://lernfunk.de/media/654321/1') fe.title('The Second Episode') fe.content(u'…') fe = fg.add_entry() fe.id('http://lernfunk.de/media/654321/1') fe.title('The Third Episode') fe.content(u'…') self.fg = fg
def gen_feed(user, base_url, path, debug=False): # Create feed feed = FeedGenerator() feed.id(urlparse.urljoin(base_url, user + '.xml')) feed.title('Snapchat story for ' + user) feed.link( href=urlparse.urljoin(base_url, user + '.xml'), rel='self' ) feed.language('en') feed.description('Snapchat media') # Iterate through files in path, sort by unix timestamp (newest first), then add to feed files = sorted(os.listdir(path), reverse=True) for filename in files: split = filename.split('~') if split[0] != user: continue if os.path.splitext(filename)[1] in ['.mp4', '.jpg']: entry = feed.add_entry() entry.id(urlparse.urljoin(base_url, filename)) entry.link(href=urlparse.urljoin(base_url, filename)) entry.title(filename) # Write feed to disk feed.rss_file(os.path.join(path, user + '.xml')) date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M") if debug: print('{0} Regenerated {1}'.format(date, urlparse.urljoin(base_url, user + '.xml')))
def generateFeeds(buffered, meta): utc = pytz.utc fg = FeedGenerator() fg.id(meta['id']) fg.title(meta['title']) fg.author(meta['author']) fg.subtitle(meta['subtitle']) fg.link( href=meta['link'], rel='self' ) fg.language(meta['language']) for tweet in buffered: fe = fg.add_entry() fe.id(tweet['url'].decode('utf-8')) fe.published(utc.localize(tweet['created_at']).astimezone(pytz.timezone(locale))) #fe.guid(tweet['url'].decode('utf-8')) fe.link(href=tweet['url'].decode('utf-8'), rel='alternate') fe.title(tweet['readable_title']) fe.description(tweet['readable_article']) try: fe.author({'name': '', 'email':tweet['user_name'].decode('utf-8') + ": " + tweet['text'].decode('utf-8')}) except Exception, e: logger.error(e) fe.author({'name': 'a', 'email':'*****@*****.**'})
def feed(self, feed_title, title, content, url, published=None, summary=None, enclosure=None, media_thumbnail=None): feed = FeedGenerator() feed.title(feed_title) feed.description(faker.sentence()) feed.link({'href': WP_FEED_URL}) entry = feed.add_entry() entry.title(title) entry.link({'href': url}) entry.author(name=faker.name()) entry.content(content, type="cdata") if summary: entry.description(summary) if enclosure: entry.enclosure(url=enclosure['url'], type=enclosure['type'], length=str(faker.pyint())) if media_thumbnail: feed.load_extension('media') entry.media.thumbnail({'url': media_thumbnail}) tz = pytz.timezone(faker.timezone()) published = published or faker.date_time(tzinfo=tz) entry.published(published) entry.updated(faker.date_time_between(start_date=published, tzinfo=tz)) return feed.rss_str().decode('utf8')
def generate_feed(self): fg = FeedGenerator() fg.load_extension('podcast') for field in self.MAPPINGS: value_names = field[0] methods = field[1] values = [] # collect the values from self for value_name in value_names: values.append( getattr(self, value_name) ) # decend the attribute tree method = get_method(methods, fg) # apply the values to the found method method(*values) for episode in self.episodes.all(): # This is the same pattern as above, I wonder if I can DRY this out. entry = fg.add_entry() value_names, method_names = zip(*episode.MAPPINGS) values = [] for ind, value_name in enumerate(value_names): print value_name values = [getattr(episode, v) for v in value_name] if None not in values: print values method = get_method(method_names[ind], entry) method(*values) print "DONE" return fg
def generate(app, category, torrents): """ generate an rss feed from category with torrents as results if category is None this feed is for all categories """ feed = FeedGenerator() if category: url = util.fullSiteURL(app, 'feed', '{}.rss'.format(category)) else: url = util.fullSiteURL(app, 'feed', 'all.rss') feed.link(href=url, rel="self") feed.id(url) if category: title = "new {} torrents on index ex invisibilis".format(category) else: title = "new torrents on index ex invisibilis" feed.title(title) feed.description(title) feed.author({"name": "anonymous"}) feed.language("en") for torrent in torrents: item = feed.add_entry() url = util.fullSiteURL(app, torrent.downloadURL()) item.id(torrent.infohash) item.link(href=url) item.title(torrent.title) item.description(torrent.summary(100)) return feed
def makeRss(self): fg = FeedGenerator() fg.load_extension('podcast') fg.id('http://hypecast.blackmad.com/' + self.mode) fg.title('Hype Machine Robot Radio: ' + self.mode) fg.author( {'name':'David Blackmad','email':'*****@*****.**'} ) fg.logo('http://dump.blackmad.com/the-hype-machine.jpg') fg.language('en') fg.link(href='http://hypecast.blackmad.com/' + self.mode) fg.description('Hype Machine Robot Radio: ' + self.mode) description = ' <br/>'.join(['%s. %s' % (index + 1, self.mk_song_id(s)) for index, s in enumerate(self.songs)]) fe = fg.add_entry() fe.title(self.track_name) fe.description(description) fe.id(self.filename) # add length print(self.relative_dir) print(self.filename) fe.enclosure(url = 'http://hypecast.blackmad.com/%s' % (self.filename), type="audio/mpeg") rss_str = fg.rss_str() newItem = ET.fromstring(rss_str)[0].find('item') out = open(self.get_filename('xml'), 'w') out.write(ET.tostring(newItem)) out.close() self.updateRss()
def feed(): """ Generate atom feed """ entries = parse_posts(0, C.feed_count) fg = FeedGenerator() fg.id(str(len(entries))) fg.title(C.title) fg.subtitle(C.subtitle) fg.language(C.language) fg.author(dict(name=C.author, email=C.email)) fg.link(href=C.root_url, rel='alternate') fg.link(href=make_abs_url(C.root_url, 'feed'), rel='self') for entry in entries: fe = fg.add_entry() fe.id(entry.get('url')) fe.title(entry.get('title')) fe.published(entry.get('date')) fe.updated(entry.get('updated') or entry.get('date')) fe.link(href=make_abs_url(C.root_url, entry.get('url')), rel='alternate') fe.author(dict(name=entry.get('author'), email=entry.get('email'))) fe.content(entry.get('body')) atom_feed = fg.atom_str(pretty=True) return atom_feed
def render_atom(self): fg = FeedGenerator() fg.id(self.site_url) fg.title(self.site_title) fg.link(href = self.site_url,rel = 'alternate') fg.link(href = self.site_url + 'atom.xml',rel = 'self') fg.language('zh-cn') link_list = ArticleManager.sharedManager().link_list() for link in link_list: article = ArticleManager.sharedManager().article_for_link(link) if not article: continue fe = fg.add_entry() fe.id(article.article_link) fe.link(link = {'href':self.site_url + article.article_link}) fe.title(article.article_title) fe.description(article.article_subtitle or '') fe.author(name = article.author or '', email = article.author_email or '') d = datetime.strptime(article.article_publish_date,'%Y-%m-%d') pubdate = datetime(year = d.year, month = d.month, day = d.day,tzinfo = UTC(8)) fe.pubdate(pubdate) article.render_content_html() fe.content(content = article._content_html, type = 'html') atom_feed = fg.atom_str(pretty = True) return atom_feed
def generate_feed(page=1): feed = FeedGenerator() feed.id("https://pub.dartlang.org/feed.atom") feed.title("Pub Packages for Dart") feed.link(href="https://pub.dartlang.org/", rel="alternate") feed.link(href="https://pub.dartlang.org/feed.atom", rel="self") feed.description("Last Updated Packages") feed.author({"name": "Dart Team"}) i = 1 pager = QueryPager(int(page), "/feed.atom?page=%d", Package.all().order('-updated'), per_page=10) for item in pager.get_items(): i += 1 entry = feed.add_entry() for author in item.latest_version.pubspec.authors: entry.author({"name": author[0]}) entry.title("v" + item.latest_version.pubspec.get("version") +\ " of " + item.name) entry.link(link={"href": item.url, "rel": "alternate", "title": item.name}) entry.id( "https://pub.dartlang.org/packages/" + item.name + "#" +\ item.latest_version.pubspec.get("version")) entry.description( item.latest_version.pubspec .get("description", "Not Available")) readme = item.latest_version.readme if not readme is None: entry.content(item.latest_version.readme.render(), type='html') else: entry.content("<p>No README Found</p>", type='html') return feed
def podcast_feed(): logo_url = url_for("static", filename="wpclogo_big.png", _external=True) fg = FeedGenerator() fg.load_extension('podcast') fg.podcast.itunes_category('Technology', 'Podcasting') fg.podcast.itunes_image(logo_url) fg.author({'name': 'Nathan Kellert', 'email': '*****@*****.**'}) fg.link(href='http://watchpeoplecode.com/podcast_feed.xml', rel='self') fg.title('WPC Coders Podcast') fg.description('WPC Coders Podcast is a weekly peek into the lives of developers and the WatchPeopleCode community. Our goal is to keep our listeners entertained by giving them new and interesting insights into our industry as well as awesome things happening within our own community. Here, you can expect hear about some of the latest news, tools, and opportunities for developers in nearly every aread of our industry. Most importantly, we hope to have some fun and a few laughs in ways only other nerds know how.') # NOQA episodes = [('ep1.mp3', 'Episode 1', datetime(2015, 02, 21, 23), 'Learn all about the WPC hosts, and where we came from in Episode 1!'), ('ep2.mp3', 'Episode 2', datetime(2015, 02, 28, 23), 'This week we cover your news, topics and questions in episode 2!'), ('ep3.mp3', 'Episode 3', datetime(2015, 03, 07, 23), "On todays podcast we talk to WatchPeopleCode's founder Alex Putilin. Hear about how the reddit search engine thousands watched him write. Also, hear the inside scoop of how WatchPeopleCode got started!"), # NOQA ('ep4.mp3', 'Episode 4', datetime(2015, 03, 14, 23), "This week we talk to FreeCodeCamps Quincy Larson(http://www.freecodecamp.com) about their project that combines teaching new developers how to code and completing projects for non-profits! Lets find out how this group of streamers code with a cause!")] # NOQA for epfile, eptitle, epdate, epdescription in episodes[::-1]: epurl = "https://s3.amazonaws.com/wpcpodcast/{}".format(epfile) fe = fg.add_entry() fe.id(epurl) fe.title(eptitle) fe.description(epdescription) fe.podcast.itunes_image(logo_url) fe.pubdate(epdate.replace(tzinfo=pytz.UTC)) fe.enclosure(epurl, 0, 'audio/mpeg') return Response(response=fg.rss_str(pretty=True), status=200, mimetype='application/rss+xml')
def latestRss(userID): userID = userID.lower() shows = {} episodes = [] today = date.today().strftime('%Y-%m-%d') for showID in series.getUserShowList(userID): shows[showID] = series.getShowInfo(userID, showID, withEpisodes=True, onlyUnseen=True) episodes.extend((showID, episode) for episode in shows[showID]['episodes'] if episode['airdate'] and airdateKey(episode['airdate']) < today) episodes.sort(key=episodeAirdateKey, reverse=True) feed = FeedGenerator() feed.id(userID) feed.title('%s\'s shows' % userID) feed.description('Unseen episodes') feed.link(href=request.url_root) feed.language('en') for showID, episode in episodes: entry = feed.add_entry() entry.id('%s/%s' % (showID, episode['episode_id'])) entry.title('%s S%02dE%02d: %s' % (shows[showID]['name'], episode['season'], episode['episode'], episode['title'])) return feed.rss_str(pretty=True)
def write_podcast(show, podcast_dir, base_public_url, showlocal_tz): """Create the podcast file.""" fg = FeedGenerator() fg.load_extension('podcast') url = "{}{}.xml".format(base_public_url, show.id) fg.id(url.split('.')[0]) fg.title(show.name) fg.image(show.image_url) fg.description(show.description) fg.link(href=url, rel='self') # collect all mp3s for the given show all_mp3s = glob.glob(os.path.join(podcast_dir, "{}_*.mp3".format(show.id))) for filepath in all_mp3s: filename = os.path.basename(filepath) mp3_date = _get_date_from_mp3_path(filepath, showlocal_tz) mp3_size = os.stat(filepath).st_size mp3_url = base_public_url + filename mp3_id = filename.split('.')[0] title = "Programa del {0:%d}/{0:%m}/{0:%Y}".format(mp3_date) # build the rss entry fe = fg.add_entry() fe.id(mp3_id) fe.pubdate(mp3_date) fe.title(title) fe.enclosure(mp3_url, str(mp3_size), 'audio/mpeg') fg.rss_str(pretty=True) fg.rss_file(os.path.join(podcast_dir, '{}.xml'.format(show.id)))
class Feeder(): def __init__( self, url, title='', feedURL='' ): scraper = None if url.startswith( "https://twitter.com/" ): scraper = TwitterScraper( url ) if title == '': title = "Twitter: @" + url.split('/')[3] elif url.startswith( "http://www.lindwurm-linden.de/termine" ): scraper = LindwurmScraper( url ) if title == '': title = "Lindwurm: Termine" else: raise UnsupportedService( "No scraper found for this URL." ) self.feed = FeedGenerator() self.feed.id( url ) self.feed.title( title ) self.feed.author( { "name": url } ) if feedURL != '': self.feed.link( href=feedURL, rel='self' ) for entry in scraper.entries: fe = self.feed.add_entry() fe.id( entry['url'] ) fe.title( entry['title'] ) fe.link( href=entry['url'], rel='alternate' ) fe.content( entry['text'] ) def GetAtom( self ): return self.feed.atom_str( pretty=True ).decode()
def get_feed(query, title, description, link, image): """Get an RSS feed from the results of a query to the YouTube API.""" service = _get_youtube_client() videos = service.search().list(part='snippet', **query, order='date', type='video', safeSearch='none').execute() fg = FeedGenerator() fg.load_extension('podcast') fg.title(title) fg.description(description) fg.link(href=link, rel='alternate') fg.image(image) youtube_plugin = get_plugin_from_settings() for video in videos['items']: try: video_url = youtube_plugin.extract_link( "https://www.youtube.com/watch?v=" + video['id']['videoId']) except PluginException: continue fe = fg.add_entry() fe.id(video['id']['videoId']) fe.title(video['snippet']['title']) fe.description(video['snippet']['description']) fe.pubdate(dateutil.parser.parse(video['snippet']['publishedAt'])) fe.podcast.itunes_image(video['snippet']['thumbnails']['high']['url']) video_info = requests.head(video_url) fe.enclosure(video_url, video_info.headers['Content-Length'], video_info.headers['Content-Type']) return fg.rss_str(pretty=True)
def generate_feed(output_file, exclude_highlights=True): # Parse RSS feed d = feedparser.parse(ESPN_RSS_FEED) IMAGE_URL = d.feed.image["href"] # RSS feed generation fg = FeedGenerator() fg.load_extension("podcast", rss=True) ## RSS tags # Required fg.title(d.feed.title) fg.link(href="https://github.com/aaearon/lebatard-show-rss") fg.description(d.feed.description) # Optional fg.language(d.feed.language) fg.image(IMAGE_URL) fg.subtitle(d.feed.subtitle) # iTunes fg.podcast.itunes_author(d.feed.author) fg.podcast.itunes_category(itunes_category=d.feed.category) fg.podcast.itunes_image(itunes_image=IMAGE_URL) fg.podcast.itunes_explicit(itunes_explicit="clean") fg.podcast.itunes_owner(name=CONTACT["name"], email=CONTACT["email"]) tz = pytz.timezone("America/Los_Angeles") for e in d.entries: if exclude_highlights and episode_duration_string_to_int(e["itunes_duration"]) > 3600: pass else: fe = fg.add_entry() fe.id(e.id) fe.title(e.title) fe.description(e.description) fe.enclosure(url=e.enclosures[0]["href"], length=e.enclosures[0]["length"], type=e.enclosures[0]["type"]) fe.podcast.itunes_summary(e.description) fe.podcast.itunes_subtitle(e.description) fe.podcast.itunes_duration(e["itunes_duration"]) dt = datetime.fromtimestamp(time.mktime(e.published_parsed)) date = tz.localize(dt) # Local hour if "Show: " in e.title: fe.published(date) elif "Hour 1" in e.title: fe.published(date + timedelta(hours=1)) elif "Hour 2" in e.title: fe.published(date + timedelta(hours=2)) elif "Hour 3" in e.title: fe.published(date + timedelta(hours=3)) else: fe.published(date + timedelta(hours=-1)) fg.rss_str(pretty=True) fg.rss_file(output_file)
class YoutubeFeed: ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': '%(id)s.%(ext)s', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3', 'preferredquality': '192', }] } def __init__(self, name): self.name = name self.ydl = youtube_dl.YoutubeDL(self.ydl_opts) self.fg = FeedGenerator() self.fg.title(name) self.fg.author({"name": "Youtube Audio Feed", "email": ""}) self.fg.link(href="http://www.foo.bar.baz.com", rel="alternate") self.fg.description("Personalized Youtube audio feed") self.fg.generator("") self.fg.docs("") def add_video(self, url): info = self.ydl.extract_info(url, download=True) entry = self.fg.add_entry() entry.id(info['id']) entry.title(info['title']) entry.description(info['description']) entry.enclosure(info['id'] + ".mp3", str(info['duration']), 'audio/mpeg') def save(self): self.fg.rss_file(name + '.xml')
def main(): client = moduleSocial.connectTumblr() posts = client.posts('fernand0') fg = FeedGenerator() fg.id(posts['blog']['url']) fg.title(posts['blog']['title']) fg.author( {'name':posts['blog']['name'],'email':'*****@*****.**'} ) fg.link( href=posts['blog']['url'], rel='alternate' ) fg.subtitle('Alternate feed due to Tumblr GDPR restrictions') fg.language('en') print(len(posts['posts'])) for i in range(len(posts['posts'])): fe = fg.add_entry() print(posts['posts'][i]['post_url']) if 'title' in posts['posts'][i]: title = posts['posts'][i]['title'] print('T', posts['posts'][i]['title']) else: title = posts['posts'][i]['summary'].split('\n')[0] print('S', posts['posts'][i]['summary'].split('\n')[0]) fe.title(title) fe.link(href=posts['posts'][i]['post_url']) fe.id(posts['posts'][i]['post_url']) print(fg.atom_file('/var/www/html/elmundoesimperfecto/tumblr.xml')) sys.exit()
def get_feed(atom=False): fg = FeedGenerator() domain = get_domain() items = get_posts({"limit": "10"}, full=True)["results"] fg.id("http://%s/"%domain) fg.title("Blog do MatrUFSC2") fg.description("Feed do blog do MatrUFSC2, onde noticias e novos recursos sao anunciados primeiro!") fg.language('pt-BR') fg.link({"href":"/blog/feed","rel":"self"}) fg.updated(items[0]["posted_at"].replace(tzinfo=pytz.UTC)) for item in items: entry = fg.add_entry() entry.title(item["title"]) tree = html.fromstring(item["summary"]) cleaner = Cleaner(allow_tags=[]) tree = cleaner.clean_html(tree) text = tree.text_content() entry.description(text, True) entry.link({"href":item["link"],"rel":"self"}) entry.content(item["body"]) entry.published(item["posted_at"].replace(tzinfo=pytz.UTC)) entry.updated(item["posted_at"].replace(tzinfo=pytz.UTC)) entry.category({"label": item["category"]["title"], "term": item["category"]["slug"]}) entry.id(item["id"]) if atom: return fg.atom_str(pretty=True) else: return fg.rss_str(pretty=True)
def GET(self): cherrypy.response.headers["Access-Control-Allow-Origin"] = "*" fg = FeedGenerator() #TODO create icon # fg.icon('http://www.det.ua.pt') fg.id(config.get('rss','id')) fg.title(config.get('rss','title')) fg.subtitle(config.get('rss','subtitle')) fg.description(config.get('rss','description')) fg.author({'name': config.get('rss','author_name'), 'email':config.get('rss','author_email')}) fg.language(config.get('rss','language')) fg.link(href=config.get('rss','href'), rel='related') client = EmailClient() for msgn in reversed(client.listBox(config.get('imap','mailbox'))[:config.getint('rss','maxitems')]): cherrypy.log("RSS Entry: "+msgn) em = client.getEMail(msgn) entry = fg.add_entry() entry.title(em['subject']) entry.author({'name': em['From']['name'], 'email': em['From']['email']}) entry.guid(config.get("main","baseurl")+'news/'+msgn) entry.link({'href':config.get("main","baseurl")+'news/'+msgn, 'rel':'alternate'}) entry.pubdate(em['date']) entry.content(em['body']) return fg.rss_str(pretty=True)
def build_feed(self): "Build the feed given our existing URL" # Get all the episodes page_content = str(requests.get(self.url).content) parser = BassdriveParser() parser.feed(page_content) links = parser.get_links() # And turn them into something usable fg = FeedGenerator() fg.id(self.url) fg.title(self.title) fg.description(self.title) fg.author({'name': self.dj}) fg.language('en') fg.link({'href': self.url, 'rel': 'alternate'}) fg.logo(self.logo) for link in links: fe = fg.add_entry() fe.author({'name': self.dj}) fe.title(link[0]) fe.description(link[0]) fe.enclosure(self.url + link[1], 0, 'audio/mpeg') # Bassdrive always uses date strings of # [yyyy.mm.dd] with 0 padding on days and months, # so that makes our lives easy date_start = link[0].find('[') date_str = link[0][date_start:date_start+12] published = datetime.strptime(date_str, '[%Y.%m.%d]') fe.pubdate(UTC.localize(published)) fe.guid((link[0])) return fg
def generate_rss(language, since): url = "{0}?since={1}".format(language["url"], since) file_name = "github_trends_{0}_{1}.rss".format(language["key"], since) title = "GitHub Trends - {0} - {1}".format(language["name"], since.capitalize()) print(url) page = requests.get(url) tree = html.fromstring(page.content) lis = tree.cssselect("ol.repo-list li") fg = FeedGenerator() fg.title(title) fg.link(href="http://github-trends.ryotarai.info/rss/{0}".format(file_name)) fg.description(title) index = 1 for li in lis: a = li.cssselect("h3 a")[0] description = "" ps = li.cssselect("p") if len(ps) > 0: description = ps[0].text_content().strip() fe = fg.add_entry() fe.link(href="https://github.com{0}".format(a.get("href"))) fe.title("{0} (#{1} - {2} - {3})".format( a.text_content().strip().replace(" / ", "/"), index, language["name"], since.capitalize(), )) fe.description(description) index += 1 rssfeed = fg.rss_str(pretty=True) s3.Object(bucket, 'rss/{0}'.format(file_name)).put(Body=rssfeed, ContentType="application/xml")
def run(folder, url): from feedgen.feed import FeedGenerator fg = FeedGenerator() head, tail = os.path.split(folder) title = tail.decode("utf-8") fg.id(str(uuid.uuid4())) fg.title(title) fg.link(href="{0}/rss.xml".format(url), rel="self") fg.description(u"Audiobook `{0}` generated with rssbook".format(title)) fg.load_extension("podcast") for item in sorted(os.listdir(folder)): if os.path.splitext(item)[1] == ".mp3": get_node(os.path.join(folder, item)) fullpath = os.path.join(folder, item) fe = fg.add_entry() fe.id(str(uuid.uuid4())) fe.title(title) fe.description(item) fe.link( href="{0}/{1}".format(url, item), rel="enclosure", type="audio/mpeg", length=str(os.stat(fullpath).st_size) ) fg.rss_file(os.path.join(folder, "rss.xml"))
def _filter_fb_rss_feeed(url): parsed_feed = feedparser.parse(url) filtered_entries = filter( lambda x: ' shared a link: "' in x.title, parsed_feed.entries) fg = FeedGenerator() fg.id('https://fb-notifications-to-pocket.herokuapp.com/') fg.title('Facebook Notifications to Pocket') fg.author({'name': 'Pankaj Singh', 'email': '*****@*****.**'}) fg.description( '''Filter FB notifications which contain a link and generate a new rss feed which will be used by IFTTT''') fg.link(href='https://fb-notifications-to-pocket.herokuapp.com/') for entry in filtered_entries: root = etree.HTML(entry.summary_detail.value) title = entry.title.split(" shared a link: ")[1].strip()[1:-2] author_name = entry.title.split(" shared a link: ")[0].strip() url = urlparse.parse_qs( urlparse.urlparse(root.findall(".//a")[-1].attrib["href"]).query)["u"][0] title = get_title_for_url(url) or title fe = fg.add_entry() fe.id(entry.id) fe.link(href=url) fe.published(entry.published) fe.author({'name': author_name}) fe.title(title) return fg.atom_str(pretty=True)
def rss(): config = public_app.config['feed'] fg = FeedGenerator() fg.id('%s/blog' % Config.BASE_URL) fg.title(config['title']) fg.author( {'name': config['author'],'email': config['email']} ) fg.description(config['desc']) fg.link( href=Config.BASE_URL, rel='alternate' ) query = { 'id': { '$regex': 'blog' }, 'current': True, 'meta.hide': { '$ne': True } } posts = db.pages.find(query).sort('meta.created', -1)[:20] for post in posts: fe = fg.add_entry() fe.title(post['meta']['title']) if 'author' in post['meta']: fe.author( {'name': post['meta']['author'],'email': config['email']} ) else: fe.author( {'name': config['author'],'email': config['email']} ) fe.description(do_truncate(post['content'], 300)) fe.link(href="%s/%s" % (Config.BASE_URL, post['id']), rel='alternate') fe.pubdate(utc.localize(post['meta']['created'])) fe.content(post['content']) response.headers['Content-Type'] = 'application/rss+xml' return fg.rss_str(pretty=True)
def build(): global fg fg = FeedGenerator() fg.title(parser.get('technowatch', 'name')) fg.language('en') fg.description(parser.get('technowatch', 'name')) fg.link(href=parser.get('technowatch', 'link'), rel='alternate') # Cleaning stories if too much if len(known_stories) > int(parser.get('technowatch', 'cache_max')): clean() # Sorting stories by crawled date for item in sorted(known_stories.values(), key=operator.itemgetter('crawledDate'), reverse=True): fe = fg.add_entry() fe.link(href=item['url'], rel='alternate') fe.title("[" + item['type'] + "] " + item['title']) fe.category({ 'label': item['type'], 'term': item['type'] }) fe.author({'name': item['by']}) fe.description(item['desc']) fe.pubdate(item['crawledDate']) # Caching RSS building pickle.dump(known_stories, open(cust_path + "/technowatch.data", "wb")) if parser.get('wsgi', 'activated') == "True": fg.rss_file(cust_path + '/static/rss.xml') if parser.get('ftp', 'activated') == "True": upload()
def feed(column_id): api = Api(column_id) with request.urlopen(api.info) as stream: result = stream.read().decode('utf-8') if not result: return '', 404 info = json.loads(result) with request.urlopen(api.posts) as stream: result = stream.read().decode('utf-8') entries = json.loads(result) fg = FeedGenerator() fg.id(str(entries[0]['slug'])) fg.title(info['name']) fg.language('zh_CN') fg.icon(info['avatar']['template'].replace('{id}', info['avatar']['id']).replace('{size}', 's')) fg.logo(info['avatar']['template'].replace('{id}', info['avatar']['id']).replace('{size}', 'l')) fg.description(info['intro']) fg.author(dict(name=info['creator']['name'])) fg.link(href=api.base_url + info['url'], rel='alternate') for entry in entries: fe = fg.add_entry() fe.id(entry['url']) fe.title(entry['title']) fe.published(entry['publishedTime']) fe.updated(entry['publishedTime']) fe.author(dict(name=entry['author']['name'])) fe.link(href=api.base_url + entry['url'], rel='alternate') fe.content(entry['content']) return fg.atom_str(pretty=True)
def export_feed(self, output): fg = FeedGenerator() fg.load_extension('podcast') fg.podcast.itunes_category('Religion & Spirituality', 'Christianity') fg.podcast.itunes_image("%s/icon.png" % URL_BASE) fg.title('JW.ORG Magazines') fg.description('Combined Feed of Watchtower (public), Watchtower (study), and Awake! in English from jw.org.') fg.link(href="%s/%s" % (URL_BASE, output), rel='self') manifest = self._load() entries = [] for lang, mnemonics in manifest.items(): for mnemonic, issues in mnemonics.items(): for issue, data in issues.items(): entries.append((issue, data)) for issue, entry in sorted(entries, key=lambda i: i[0], reverse=True): fe = fg.add_entry() fe.id( entry['hash'] ) fe.title( entry['title'] ) fe.description( entry['title'] ) fe.published( pytz.utc.localize( entry['created_on'] ) ) url = "%s/%s" % (URL_BASE, os.path.basename(entry['file'])) mime = 'audio/mpeg' fe.enclosure(url, str(entry['duration']), mime) fe.link(href=url, type=mime) fg.rss_str(pretty=True) fg.rss_file(os.path.join(CACHE_DIR, output))
def main(): session = vk.Session() api = vk.API(session) group_id = '96469126' group_info = api.groups.getById(group_ids=group_id, fields=['description', 'site', 'name', 'photo', 'gid']) assert len(group_info) == 1 group_info = group_info[0] url = 'http://vk.com/club{}'.format(group_info['gid']) # a = api.wall.get(owner_id=-1 * group_info['gid']) # # with open('out', 'wb') as fio: # pickle.dump(a, fio) with open('out', 'rb') as fio: data = pickle.loads(fio.read()) assert len(data) > 1 fg = FeedGenerator() fg.id(url) fg.title(_(group_info['name'])) fg.description(_(group_info['description'])) fg.logo(group_info['photo']) site_url = group_info.get('site', url) if group_info.get('site', url) else url fg.link(href=_(site_url)) fg.link(href=_(site_url), rel='self') fg.link(href=_(site_url), rel='alternate') fg.author({'name': 'Alexander Sapronov', 'email': '*****@*****.**'}) fg.webMaster('[email protected] (Alexander Sapronov)') pat = re.compile(r"#(\w+)") for x in data[1:]: post_link = "{}?w=wall-{}_{}".format(url, group_info['gid'], x['id']) e = fg.add_entry() # text = x.get('text', '').replace('<br>', '\n') text = x.get('text', '') e.description(_(text)) e.author({'name': _(get_author_name(api, x.get('from_id')))}) e.id(post_link) e.link(href=_(post_link)) e.link(href=_(post_link), rel='alternate') tags = pat.findall(text) title = x.get('text', '') for tag in tags: e.category(term=_(tag)) title = title.replace('#{}'.format(tag), '') title = re.sub('<[^<]+?>', ' ', title) title = textwrap.wrap(title, width=80)[0] e.title(_(title.strip())) fg.rss_file('rss.xml')
def rss(conversation, url, author_name, author_email, title, subtitle, language, output_path): """Export all the links of the conversation in a simple RSS feed""" from feedgen.feed import FeedGenerator fg = FeedGenerator() fg.id(url) fg.title(title) fg.author( { 'name': author_name, 'email': author_email, } ) fg.link( href=url, rel='alternate' ) if subtitle: fg.subtitle(subtitle) fg.language(language) for message in conversation.history(): match = re.search( "^.*<(?P<url>[^>|]+)\|?(?P<title>[^>]+)?>.*$", message.data["text"], flags=re.MULTILINE ) if match is not None: fe = fg.add_entry() link = match.group("url") title = match.group("title") or link date = naive_to_local(datetime.datetime.fromtimestamp(float(message.data["ts"]))) description = message.data["text"] if "attachments" in message.data: attachment = [a for a in message.data["attachments"] if a["title_link"] == link][0] title += " | " + attachment["title"] description += """ """ + attachment["text"] fe.id(link) fe.title(title) fe.link(href=link) fe.published(date) user = config.slack.get_user(message.data["user"]) author = { "name": message.data["username"], "email": user.email or "noemail", } fe.author(author) fe.description(description) fg.rss_file(output_path, pretty=True)
def autogen(*args): # Setup Feed feed = FeedGenerator() feed.id('http://www.whiterock.org/sermons/') feed.title('White Rock Fellowship Sermon Podcast') feed.description(WRF_DESCRIPTION) feed.link(href='https://s3-us-west-1.amazonaws.com/wrf-autogen/wrf-podcast.rss') feed.language('en-US') user_agent = {'User-agent': 'Mozilla/5.0'} html = requests.get(WRF_WEBSITE, headers=user_agent).text soup = bs4.BeautifulSoup(html, convertEntities=bs4.BeautifulSoup.HTML_ENTITIES) feed.load_extension('podcast') feed.podcast.itunes_subtitle('Sermon Recordings from White Rock Followship Dallas, TX') feed.podcast.itunes_category('Religion & Spirituality', 'Christianity') feed.podcast.itunes_author('White Rock Fellowship') feed.podcast.itunes_explicit('no') feed.podcast.itunes_owner('Ryan Hoium', '*****@*****.**') feed.podcast.itunes_summary(WRF_DESCRIPTION) feed.podcast.itunes_subtitle('White Rock Fellowship Sermon Podcast') feed.podcast.itunes_image(WRF_LOGO) series_pagelinks =[] series_sidebar = soup.find('div', {'id':'custom_category-2'}) for series in series_sidebar.findAll('a'): series_link = series['href'] series_name = series.text series_html = requests.get(series_link, headers=user_agent).text series_soup = bs4.BeautifulSoup(series_html, convertEntities=bs4.BeautifulSoup.HTML_ENTITIES) pagination = series_soup.find('ul', {'class':'pagination'}) series_pagelinks.append({'series_name': series_name, 'series_link': series_link}) if pagination is not None: for series_page in pagination.findAll('a', {'class':''}): series_pagelinks.append({'series_name': series_name, 'series_link': series_page['href']}) postings =[] for series_page in series_pagelinks: series_link = series_page['series_link'] series_name = series_page['series_name'] series_html = requests.get(series_link, headers=user_agent).text series_soup = bs4.BeautifulSoup(series_html, convertEntities=bs4.BeautifulSoup.HTML_ENTITIES) for posting in series_soup.findAll('article', {'class':'post sermon'}): for link in posting.findAll('a', {'data-original-title':'Audio'}): try: inner_html = requests.get(link['href'], headers=user_agent).text inner_soup = bs4.BeautifulSoup(inner_html, convertEntities=bs4.BeautifulSoup.HTML_ENTITIES) title = series_name + ' : ' + inner_soup.find('h2', {'class':'post-title'}).text download_link = inner_soup.find('a', {'data-original-title':'Download Audio'})['href'] download_link = download_link.split('=')[1] length = requests.head(download_link).headers.get('content-length', None) staff_data = inner_soup.find('div', {'class':'staff-data'}).text author = staff_data.split(' on ')[0] author = author.split('by ')[1] date = staff_data.split(' on ')[1] date = datetime.datetime.strptime(date,'%B %d, %Y') #.strftime('%m/%d/%Y') date = central_tz.localize(datetime.datetime.combine(date,datetime.time.min)) except: print "Error processing: ", link continue try: entry = feed.add_entry() entry.id(download_link) entry.title(title + " by " + author) entry.enclosure(download_link, length, 'audio/mpeg') entry.podcast.itunes_author(itunes_author=author) entry.pubdate(pubDate=date) except: print "Error Generating Feed Entry: ", title continue postings.append({'title':title, 'download_link':download_link, 'date': date, 'author': staff_data}) rssString = feed.rss_str(pretty=True) s3 = boto.connect_s3() wrf = s3.get_bucket('wrf-autogen') s3key = wrf.get_key('wrf-podcast.rss') s3key.set_contents_from_string(rssString) s3key.make_public()
def get_rss(): zen_url = request.args.get('url') # set telegram instant view rhash if available tg_rhash = request.args.get('tg_rhash') limit_description = request.args.get('limit_description', type=int) if not zen_url: return 'url (?url=https://zen.yandex.ru/media/.../) must be set' parsed_url = urlparse(zen_url) if parsed_url.netloc != 'zen.yandex.ru': return 'Domain must be zen.yandex.ru' # validate tg_rhash if tg_rhash and not re.match(r'^[a-fA-F\d]+$', tg_rhash): return 'Invalid tg_rhash. Please, check rhash value from instant view template' if not re.match(r'^/(media/)?(id/[\da-f]+|[a-z\d_]+)/?$', parsed_url.path): return 'Url is unsupported. Supported formats:<br>' \ '• https://zen.yandex.ru/media/id/01234567890abcdef0123456 <br>' \ '• https://zen.yandex.ru/media/nickname' resp = requests.get( zen_url, headers={'User-Agent': 'TelegramBot (like TwitterBot)'}) doc = fromstring(resp.text) try: text = re.search( r'{.+}', doc.xpath('.//script[contains(text(), "window.__SERVER_STATE__")]') [0].text)[0] json_data = json.loads(text) except: return abort(404) items = json_data['feed'].get('items') items_order = json_data['feed'].get('itemsOrder') publisher = next(iter(json_data.get('sources').values())) feed = FeedGenerator() feed.id('http://zen.yandex.ru/') feed.title(publisher.get('title')) feed.subtitle(publisher.get('description').strip()) feed.language('ru') feed.author({'name': '-', 'email': '-'}) feed.link(href=zen_url, rel='alternate') try: image_logo_url = publisher.get('logo') feed.logo(image_logo_url) except: pass for oItem in items_order: item = items.get(oItem) if item.get('type') != 'card': continue entry = feed.add_entry() entry.title(item.get('title').strip()) entry.description(item.get('text').strip()[:limit_description]) if item.get('image'): item_image_url = item.get('image') entry.enclosure(url=item_image_url, type='image/webp', length='2048') entry_url = item.get('link').split('?')[0] # convert to instant view link if tg hash is provided if tg_rhash: # write original url into author field entry.author({'name': '', 'email': entry_url}) entry.link({ 'href': TG_URL.format(url=quote_plus(entry_url), rhash=tg_rhash) }) else: entry.link({'href': entry_url}) try: entry.pubdate( dateparser.parse(item.get('creationTime'), settings={'RETURN_AS_TIMEZONE_AWARE': True})) except: pass rss_response = Response(feed.rss_str(pretty=True)) rss_response.headers.set('Content-Type', 'application/rss+xml; charset=utf-8') return rss_response
def lambda_handler(event, context): # obtain all entries in database response = table.scan( FilterExpression=Attr('episode_int').gte(1) ) # save object with the items themselves items = response['Items'] #print(items) items_sorted = sorted(items, key = lambda i: i['episode_int']) # set up overall feed metadata fg = FeedGenerator() # general feed params fg.id('https://r-podcast.org') fg.title('Residual Snippets') fg.author( {'name':'Eric Nantz', 'email':'*****@*****.**'}) fg.link(href='https://r-podcast.org', rel='alternate' ) fg.logo(LOGO_URL) fg.subtitle('Musings on R, data science, linux, and life') fg.link( href=RSS_URL, rel='self') fg.language('en') fg.load_extension('podcast') # podcast-specific params fg.podcast.itunes_category('Technology') fg.podcast.itunes_author('Eric Nantz') fg.podcast.itunes_explicit('no') fg.podcast.itunes_owner('Eric Nantz', '*****@*****.**') fg.podcast.itunes_summary('Residual Snippets is an informal, unedited, and free-flowing audio podcast from Eric Nantz. If you enjoy hearing quick takes from a data scientist on their journey to blend innovative uses of open-source technology, contributing back to their brilliant communities, and juggling the curveballs life throws at them, this podcast is for you!') for x in range(len(items_sorted)): #print(items[x]) fe = fg.add_entry() fe.title(items_sorted[x]['episode_title']) fe.author( {'name':'Eric Nantz', 'email':'*****@*****.**'} ) fe.enclosure(url=items_sorted[x]['episode_url'], type = 'audio/mpeg') # process description before adding to feed ep_desc = create_summary(items_sorted[x]['episode_summary']) #fe.description(items_sorted[x]['episode_summary']) fe.description(ep_desc) # populate xml file for RSS feed feed_string = fg.rss_str(pretty=True) fg.rss_file('/tmp/residual_snippets.xml', pretty=True) # upload xml feed to pcloud and s3 pc = PyCloud(PCLOUD_USERNAME, PCLOUD_PASS) pc.uploadfile(data = feed_string, filename='residual_snippets.xml', folderid=PCLOUD_FOLDER_ID) #upload_file("/tmp/residual_snippets.xml", BUCKET_NAME, object_name = 'residual_snippets.xml') s3_client.upload_file("/tmp/residual_snippets.xml", BUCKET_NAME, 'residual_snippets.xml') # create export of dynamodb and upload to s3 # obtain all entries in database response2 = table.scan( FilterExpression=Attr('episode_int').gte(1) ) # save object with the items themselves items2 = response2['Items'] items2_sorted = sorted(items2, key = lambda i: i['episode_int']) db_export = "/tmp/dbexport.json" f = open(db_export, "w") f.write(json.dumps(items2_sorted, indent=2, default=decimal_default)) f.close() # upload to s3 bucket success = s3_client.upload_file(db_export, BUCKET_NAME, 'dbexport.json') return { 'statusCode': 200, 'body': json.dumps('Hello from Lambda!') }
def get(self, channel): channel = channel.split('/') if len(channel) < 2: channel.append('video') channel_name = ['/'.join(channel)] self.set_header('Content-type', 'application/rss+xml') if channel_name[0] in channel_feed and channel_feed[ channel_name[0]]['expire'] > datetime.datetime.now(): self.write(channel_feed[channel_name[0]]['feed']) self.finish() return fg = None video = None calls = 0 response = {'nextPageToken': ''} while 'nextPageToken' in response.keys(): next_page = response['nextPageToken'] payload = { 'part': 'snippet,contentDetails', 'maxResults': 50, 'channelId': channel[0], 'key': key, 'pageToken': next_page } request = requests.get( 'https://www.googleapis.com/youtube/v3/activities', params=payload) calls += 1 if request.status_code != 200: payload = { 'part': 'snippet', 'maxResults': 1, 'forUsername': channel[0], 'key': key } request = requests.get( 'https://www.googleapis.com/youtube/v3/channels', params=payload) response = request.json() channel[0] = response['items'][0]['id'] channel_name.append('/'.join(channel)) payload = { 'part': 'snippet,contentDetails', 'maxResults': 50, 'channelId': channel[0], 'key': key, 'pageToken': next_page } request = requests.get( 'https://www.googleapis.com/youtube/v3/activities', params=payload) calls += 2 response = request.json() if request.status_code == 200: logging.debug('Downloaded Channel Information') else: logging.error('Error Downloading Channel: %s', request.reason) self.send_error(reason='Error Downloading Channel') return if not fg: fg = FeedGenerator() fg.load_extension('podcast') fg.generator('PodTube (python-feedgen)', __version__, 'https://github.com/aquacash5/PodTube') for item in response['items']: if item['snippet']['type'] != 'upload': continue elif 'Private' in item['snippet']['title']: continue else: snippet = item['snippet'] break logging.info('Channel: %s (%s)', channel[0], snippet['channelTitle']) icon = max(snippet['thumbnails'], key=lambda x: snippet['thumbnails'][x]['width']) fg.title(snippet['channelTitle']) fg.id('http://' + self.request.host + self.request.uri) fg.description(snippet['description'] or ' ') fg.author(name='Podtube', email='*****@*****.**', uri='https://github.com/aquacash5/PodTube') fg.podcast.itunes_author(snippet['channelTitle']) fg.image(snippet['thumbnails'][icon]['url']) fg.link(href=f'http://youtube.com/channel/{channel}', rel='self') fg.language('en-US') fg.podcast.itunes_image(snippet['thumbnails'][icon]['url']) fg.podcast.itunes_explicit('no') fg.podcast.itunes_owner(name='Podtube', email='*****@*****.**') fg.podcast.itunes_summary(snippet['description']) fg.podcast.itunes_category(cat='Technology') fg.updated(str(datetime.datetime.utcnow()) + 'Z') for item in response['items']: snippet = item['snippet'] if snippet['type'] != 'upload': continue if 'private' in snippet['title'].lower(): continue current_video = item['contentDetails']['upload']['videoId'] logging.debug('ChannelVideo: %s (%s)', current_video, snippet['title']) fe = fg.add_entry() fe.title(snippet['title']) fe.id(current_video) icon = max(snippet['thumbnails'], key=lambda x: snippet['thumbnails'][x]['width']) fe.podcast.itunes_image(snippet['thumbnails'][icon]['url']) fe.updated(snippet['publishedAt']) if channel[1] == 'video': fe.enclosure( url=f'http://{self.request.host}/video/{current_video}', type="video/mp4") elif channel[1] == 'audio': fe.enclosure( url=f'http://{self.request.host}/audio/{current_video}', type="audio/mpeg") fe.author(name=snippet['channelTitle']) fe.podcast.itunes_author(snippet['channelTitle']) fe.pubDate(snippet['publishedAt']) fe.link(href=f'http://www.youtube.com/watch?v={current_video}', title=snippet['title']) fe.podcast.itunes_summary(snippet['description']) fe.description(snippet['description']) if not video or video['expire'] < fe.pubDate(): video = {'video': fe.id(), 'expire': fe.pubDate()} feed = { 'feed': fg.rss_str(), 'expire': datetime.datetime.now() + datetime.timedelta(hours=calls) } for chan in channel_name: channel_feed[chan] = feed self.write(feed['feed']) self.finish() video = video['video'] mp3_file = 'audio/{}.mp3'.format(video) if channel[1] == 'audio' and not os.path.exists( mp3_file) and video not in conversion_queue.keys(): conversion_queue[video] = { 'status': False, 'added': datetime.datetime.now() }
from datetime import datetime import pytz from algoliasearch.search_client import SearchClient from feedgen.feed import FeedGenerator if __name__ == '__main__': client = SearchClient.create(os.environ['APP_ID'], os.environ['API_KEY']) index = client.init_index('interviews_publishedAt_desc') articles = index.search('')['hits'] fg = FeedGenerator() fg.title('IH Interviews') fg.id('ih-interviews-20201123-205642') pubs = [] for article in articles: pub = datetime.fromtimestamp(article['publishedAt'] / 1000).replace(tzinfo=pytz.timezone('UTC')) pubs.append(pub) fe = fg.add_entry() fe.id(article['interviewId']) fe.published(pub) fe.pubDate(pub) fe.updated(pub) fe.title(article['title']) fe.link( href= f"https://www.indiehackers.com/interview/{article['interviewId']}") fg.updated(max(pubs)) print(fg.atom_str(pretty=True).decode())
def main(): if len(sys.argv) != 2 or not ( sys.argv[1].endswith('rss') or sys.argv[1].endswith('atom') or sys.argv[1] == 'torrent' or sys.argv[1] == 'podcast'): print(USAGE) exit() arg = sys.argv[1] fg = FeedGenerator() fg.id('http://lernfunk.de/_MEDIAID_123') fg.title('Testfeed') fg.author({'name': 'Lars Kiesow', 'email': '*****@*****.**'}) fg.link(href='http://example.com', rel='alternate') fg.category(term='test') fg.contributor(name='Lars Kiesow', email='*****@*****.**') fg.contributor(name='John Doe', email='*****@*****.**') fg.icon('http://ex.com/icon.jpg') fg.logo('http://ex.com/logo.jpg') fg.rights('cc-by') fg.subtitle('This is a cool feed!') fg.link(href='http://larskiesow.de/test.atom', rel='self') fg.language('de') fe = fg.add_entry() fe.id('http://lernfunk.de/_MEDIAID_123#1') fe.title('First Element') fe.content( '''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si ista mala sunt, placet. Aut etiam, ut vestitum, sic sententiam habeas aliam domesticam, aliam forensem, ut in fronte ostentatio sit, intus veritas occultetur? Cum id fugiunt, re eadem defendunt, quae Peripatetici, verba.''') fe.summary(u'Lorem ipsum dolor sit amet, consectetur adipiscing elit…') fe.link(href='http://example.com', rel='alternate') fe.author(name='Lars Kiesow', email='*****@*****.**') if arg == 'atom': print_enc(fg.atom_str(pretty=True)) elif arg == 'rss': print_enc(fg.rss_str(pretty=True)) elif arg == 'podcast': # Load the podcast extension. It will automatically be loaded for all # entries in the feed, too. Thus also for our “fe”. fg.load_extension('podcast') fg.podcast.itunes_author('Lars Kiesow') fg.podcast.itunes_category('Technology', 'Podcasting') fg.podcast.itunes_explicit('no') fg.podcast.itunes_complete('no') fg.podcast.itunes_new_feed_url('http://example.com/new-feed.rss') fg.podcast.itunes_owner('John Doe', '*****@*****.**') fg.podcast.itunes_summary('Lorem ipsum dolor sit amet, consectetur ' + 'adipiscing elit. Verba tu fingas et ea ' + 'dicas, quae non sentias?') fe.podcast.itunes_author('Lars Kiesow') print_enc(fg.rss_str(pretty=True)) elif arg == 'torrent': fg.load_extension('torrent') fe.link(href='http://example.com/torrent/debian-8-netint.iso.torrent', rel='alternate', type='application/x-bittorrent, length=1000') fe.torrent.filename('debian-8.4.0-i386-netint.iso.torrent') fe.torrent.infohash('7661229811ef32014879ceedcdf4a48f256c88ba') fe.torrent.contentlength('331350016') fe.torrent.seeds('789') fe.torrent.peers('456') fe.torrent.verified('123') print_enc(fg.rss_str(pretty=True)) elif arg.startswith('dc.'): fg.load_extension('dc') fg.dc.dc_contributor('Lars Kiesow') if arg.endswith('.atom'): print_enc(fg.atom_str(pretty=True)) else: print_enc(fg.rss_str(pretty=True)) elif arg.startswith('syndication'): fg.load_extension('syndication') fg.syndication.update_period('daily') fg.syndication.update_frequency(2) fg.syndication.update_base('2000-01-01T12:00+00:00') if arg.endswith('.rss'): print_enc(fg.rss_str(pretty=True)) else: print_enc(fg.atom_str(pretty=True)) elif arg.endswith('atom'): fg.atom_file(arg) elif arg.endswith('rss'): fg.rss_file(arg)
def index(): limit_tag = request.args.get('tag') pause_uuid = request.args.get('pause') if pause_uuid: try: datastore.data['watching'][pause_uuid]['paused'] ^= True datastore.needs_write = True return redirect(url_for('index', tag=limit_tag)) except KeyError: pass # Sort by last_changed and add the uuid which is usually the key.. sorted_watches = [] for uuid, watch in datastore.data['watching'].items(): if limit_tag != None: # Support for comma separated list of tags. for tag_in_watch in watch['tag'].split(','): tag_in_watch = tag_in_watch.strip() if tag_in_watch == limit_tag: watch['uuid'] = uuid sorted_watches.append(watch) else: watch['uuid'] = uuid sorted_watches.append(watch) sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True) existing_tags = datastore.get_all_tags() rss = request.args.get('rss') if rss: fg = FeedGenerator() fg.title('changedetection.io') fg.description('Feed description') fg.link(href='https://changedetection.io') for watch in sorted_watches: if not watch['viewed']: fe = fg.add_entry() fe.title(watch['url']) fe.link(href=watch['url']) fe.description(watch['url']) fe.guid(watch['uuid'], permalink=False) dt = datetime.datetime.fromtimestamp( int(watch['newest_history_key'])) dt = dt.replace(tzinfo=pytz.UTC) fe.pubDate(dt) response = make_response(fg.rss_str()) response.headers.set('Content-Type', 'application/rss+xml') return response else: from backend import forms form = forms.quickWatchForm(request.form) output = render_template( "watch-overview.html", form=form, watches=sorted_watches, tags=existing_tags, active_tag=limit_tag, has_unviewed=datastore.data['has_unviewed']) return output
async def channel(request, channel_id, return_type='video'): log.info(f'Channel: {channel_id}') channel_name = [f'{channel_id}/{return_type}'] if channel_name[0] in channel_feed and channel_feed[ channel_name[0]]['expire'] > datetime.now(): return raw(channel_feed[channel_name[0]]['feed'], content_type='application/rss+xml') fg = None calls = 0 response = {'nextPageToken': ''} while 'nextPageToken' in response: next_page = response['nextPageToken'] payload = { 'part': 'snippet,contentDetails', 'maxResults': 50, 'channelId': channel_id, 'key': KEY, 'pageToken': next_page } response = json.loads( await get('https://www.googleapis.com/youtube/v3/activities', params=payload)) calls += 1 if 'error' in response: payload = { 'part': 'snippet', 'maxResults': 1, 'forUsername': channel_id, 'key': KEY } response = json.loads(await get( 'https://www.googleapis.com/youtube/v3/channels', params=payload)) channel_id = response['items'][0]['id'] channel_name.append(f'{channel_id}/{return_type}') payload = { 'part': 'snippet,contentDetails', 'maxResults': 50, 'channelId': channel_id, 'key': KEY, 'pageToken': next_page } response = json.loads(await get( 'https://www.googleapis.com/youtube/v3/activities', params=payload)) calls += 2 if not fg: fg = FeedGenerator() fg.load_extension('podcast') fg.generator('PodTube', __version__, 'https://github.com/aquacash5/PodTube') snippet = response['items'][0]['snippet'] if 'Private' in snippet['title']: continue icon = max(snippet['thumbnails'], key=lambda x: snippet['thumbnails'][x]['width']) fg.title(snippet['title']) fg.id(f'http://{request.headers["host"]}{request.url}') fg.description(snippet['description'] or ' ') fg.author(name=snippet['channelTitle']) fg.image(snippet['thumbnails'][icon]['url']) fg.link(href=f'https://www.youtube.com/playlist?list={channel_id}') fg.podcast.itunes_image(snippet['thumbnails'][icon]['url']) fg.podcast.itunes_summary(snippet['description']) fg.podcast.itunes_category('Technology', 'Podcasting') fg.updated(f'{str(datetime.utcnow())}Z') for item in response['items']: snippet = item['snippet'] if snippet['type'] != 'upload': continue current_video = item['contentDetails']['upload']['videoId'] log.debug(f'ChannelVideo: {current_video} {snippet["title"]}') fe = fg.add_entry() fe.title(snippet['title']) fe.id(current_video) icon = max(snippet['thumbnails'], key=lambda x: snippet['thumbnails'][x]['width']) fe.podcast.itunes_image(snippet['thumbnails'][icon]['url']) fe.updated(snippet['publishedAt']) if return_type == 'audio': fe.enclosure( url= f'http://{request.headers["host"]}/audio/{current_video}', type="audio/mpeg") else: fe.enclosure( url= f'http://{request.headers["host"]}/video/{current_video}', type="video/mp4") fe.author(name=snippet['channelTitle']) fe.podcast.itunes_author(snippet['channelTitle']) fe.podcast.itunes_author(snippet['channelTitle']) fe.pubdate(snippet['publishedAt']) fe.link(href=f'http://www.youtube.com/watch?v={current_video}', title=snippet['title']) fe.podcast.itunes_summary(snippet['description']) fe.description(snippet['description']) await sleep(0) feed = { 'feed': fg.rss_str(), 'expire': datetime.now() + timedelta(hours=calls) } for _name in channel_name: channel_feed[_name] = feed return raw(feed['feed'], content_type='application/rss+xml')
class Bridge(object): NO_VALUE = object() def __init__(self, filelike): if isstr(filelike): feed = filelike else: feed = filelike.read() self.raw = feed self.parsed = feedparser.parse(self.raw) self.feed = FeedGenerator() # Set feed-level values. self.build_feed() self.build_entries() def build_feed(self): f = self.parsed.feed for field in [ 'id', 'title', 'subtitle', 'updated', 'rights', 'generator', 'docs', 'language', ('xml_lang', 'language'), ('authors', 'author'), ('links', 'link') ]: self._copy(f, self.feed, field) if f.get('image'): image_kwargs = {} for image_field in 'url', 'title', 'link', 'width', 'height', 'description': ignore, value = self._setter(f.image, self.feed, image_field) if value is not self.NO_VALUE: image_kwargs[image_field] = value if image_kwargs: self.feed.image(**image_kwargs) def build_entries(self): for entry in self.parsed.entries: self.build_entry(entry) def build_entry(self, parsed): built = self.feed.add_entry(order='append') # TODO: 'tag' is not supported in feedgen for field in [ 'id', 'title', 'updated', 'summary', 'published', ('links', 'link') ]: self._copy(parsed, built, field) permalink = parsed.get('link') guid_is_link = parsed['guidislink'] if permalink: built.guid(permalink, guid_is_link) def _setter(self, feedparser_obj, feedgen_obj, field): if isinstance(field, tuple): field, method_name = field else: method_name = field setter = getattr(feedgen_obj, method_name, None) value = feedparser_obj.get(field, self.NO_VALUE) return setter, value def _copy(self, feedparser_obj, feedgen_obj, field): setter, value = self._setter(feedparser_obj, feedgen_obj, field) if value is self.NO_VALUE: return if not isinstance(value, list): value = [value] for v in value: setter(v) if field in feedparser_obj: # Temporary cleanup del feedparser_obj[field]
def render(self, data, media_type=None, renderer_context=None): renderer_context = renderer_context or {} response = renderer_context['response'] if response.exception: return f'Error: {response.status_code}' # If we get this far, the response is not an error and we can render the RSS feed. # Prepare the feed generator object. fg = FeedGenerator() fg.load_extension('podcast') # Playlist wrapper. fg.id(data['url']) fg.title(data['title']) # Description. Feedgen will raise exception if the description is empty fg.description(_ensure_non_empty(data['description'])) fg.podcast.itunes_summary(_ensure_non_empty(data['description'])) # Self link. fg.link(href=data['url']) # TODO: Missing fields from playlists: author, contributors, logo, subtitle, and language. # Add entries for entry in data['entries']: fe = fg.add_entry() # The item id. We don't set permaLink for the moment because URLs may change during the # alpha. fe.id(entry['url']) # Set basic metadata. Feedgen will raise an exception if the description is empty. fe.title(entry['title']) fe.description(_ensure_non_empty(entry['description'])) fe.summary(_ensure_non_empty(entry['description'])) # RSS only supports one link with nothing but a URL. So for the RSS link element the # last link with rel=alternate is used. We link to the UI view even though we use the # API endpoint as the id. fe.link(href=entry['url']) # Publication date. fe.pubDate(entry['published_at']) # Free-text copyright field. fe.rights(entry['rights']) # When the item was last updated. fe.updated(entry['updated_at']) # Image. Note: iTunes *requires* this to end in ".jpg" or ".png" which is annoying. fe.podcast.itunes_image(entry['imageUrl']) # Duration in seconds. fe.podcast.itunes_duration(entry['duration']) # The actual downloads themselves. for enclosure in entry['enclosures']: fe.enclosure(url=enclosure['url'], type=enclosure['mime_type']) # Render the feed. return fg.rss_str(pretty=True)
def get_xml(self: Serializer, response: Response) -> Tuple[str, int]: """ Serialize the provided response data into Atom, version 1.0. Parameters ---------- response : Response The search response data to be serialized. Returns ------- data : str The serialized XML results. status The HTTP status code for the operation. """ fg = FeedGenerator() fg.register_extension("arxiv", ArxivExtension, ArxivEntryExtension, rss=False) fg.id("http://arxiv.org/rss/version=atom_1.0") archive = response.hits[0]["primary_classification"]["archive"] fg.title(archive["id"] + " updates on arXiv.org") fg.link(href='http://arxiv.org/rss/version=atom_1.0', rel='self', type='application/atom+xml') fg.updated(datetime.utcnow().replace(tzinfo=utc)) # TODO - Try to remove generator element? This doesn't work - code ignores "None" # fg.generator(None) # TODO - We don't currently set "subtitle", but could do it like this # fg.subtitle( # f"{archive['name']} ({archive['id']}) updates on the arXiv.org e-print archive") # Add each search result "hit" to the feed for hit in response: entry = fg.add_entry() entry.id("http://arxiv.org/abs/" + hit['id']) entry.title(hit['title']) entry.summary(hit['abstract']) entry.published(hit['submitted_date']) entry.updated(hit['updated_date']) entry.link({ "href": url_for("abs_by_id", paper_id=hit['id']), "type": "text/html" }) pdf_link = { "title": "pdf", "rel": "related", "type": "application/pdf" } pdf_link["href"] = url_for("pdf_by_id", paper_id=hit['id']) entry.link(pdf_link) # Add categories categories = [hit['primary_classification'].to_dict()['category']] for dict in hit['secondary_classification']: categories += [dict['category'].to_dict()] for cat in categories: label = cat['name'] + " (" + cat['id'] + ")" category = { "term": cat['id'], "scheme": "http://arxiv.org/schemas/atom", "label": label } entry.category(category) # Add arXiv-specific element "comment" if not hit['comments'].strip(): entry.arxiv.comment(hit['comments']) # Add arXiv-specific element "journal_ref" if not hit['journal_ref'].strip(): entry.arxiv.journal_ref(hit['journal_ref']) # Add arXiv-specific element "primary_category" prim_cat = hit['primary_classification'].to_dict()['category'] label = prim_cat['name'] + " (" + prim_cat['id'] + ")" category = { "term": prim_cat['id'], "scheme": "http://arxiv.org/schemas/atom", "label": label } entry.arxiv.primary_category(category) # Add arXiv-specific element "doi" if hit['doi']: entry.arxiv.doi(hit['doi']) # Add each author for author in hit['authors']: author_list = {"name": author['full_name']} entry.author(author_list) # TODO - How can arxiv-specific affiliation elements be added to authors? data = fg.atom_str(pretty=True) status_code = status.HTTP_200_OK return data, status_code
def write_rss(self, audio=False): """Write podcast feeds to files.""" print("playlist self.info", flush=True) pp.pprint(self.info) prefix = "audio-" if audio else "" feed_url = self.controller.base_url + self.folder + '/' + prefix + 'podcast.xml' feedgen = FeedGenerator() feedgen.load_extension('podcast') feedgen.generator('Adafruit-Podcast') feedgen.id(feed_url) feedgen.title(self.info['title']) feedgen.subtitle(self.info['itunesSubtitle']) feedgen.author({'name': self.info['author']}) for category in self.info['categories']: feedgen.category(term=category) feedgen.webMaster(self.info['webMaster']) feedgen.managingEditor(self.info['managingEditor']) feedgen.link(href=feed_url, rel='self') # Link to a chosen URL as an alternate, if set. if 'htmlUrl' in self.info: feedgen.link(href=self.info['htmlUrl'], rel='alternate') else: # Otherwise link to the original YouTube playlist as an alternate: if isinstance(self.url, list): for url in self.url: feedgen.link(href=url, rel='alternate') else: feedgen.link(href=self.url, rel='alternate') feedgen.language('en') # feedgen.logo('http://ex.com/logo.jpg') # pylint: disable=no-member feedgen.podcast.itunes_category(self.info['itunesCategory']['text']) feedgen.podcast.itunes_subtitle(self.info['itunesSubtitle']) feedgen.podcast.itunes_summary(self.info['description']) feedgen.podcast.itunes_owner(email=self.info['itunesOwner']['email'], name=self.info['itunesOwner']['name']) feedgen.podcast.itunes_author(self.info['itunesOwner']['name']) feedgen.podcast.itunes_image(self.controller.base_url + self.folder + '/image.jpg') feedgen.podcast.itunes_explicit('clean') for vid in self.videos: print("vid:\n", flush=True) pp.pprint(vid) print("\n", flush=True) vid_filename = vid['_filename'].split('.')[0] + (".mp3" if audio else ".mp4") vid_url = self.video_url(vid_filename) # Size of enclosed file in bytes: vid_size = os.path.getsize(vid_filename) # Date of upload (from the youtube-dl JSON data) eastern = pytz.timezone('US/Eastern') vid_date = eastern.localize( datetime.datetime.strptime(vid['upload_date'], '%Y%m%d')) entry = feedgen.add_entry() entry.id(vid_url) entry.title(vid['fulltitle']) entry.published(vid_date) for category in vid['categories']: entry.category(term=category) entry.description(vid['description']) entry.enclosure(vid_url, str(vid_size), ('audio/mp3' if audio else 'video/mp4')) entry.podcast.itunes_image(self.controller.base_url + self.folder + '/image.jpg') entry.podcast.itunes_author(self.info['author']) entry.podcast.itunes_summary(vid['description']) entry.podcast.itunes_duration(vid['duration']) feedgen.rss_str(pretty=True) # Ensure output folder for this podcast exists: os.makedirs(os.path.join(self.controller.output_dir, self.folder), exist_ok=True) # Generate RSS file in output folder: feedgen.rss_file( os.path.join(self.controller.output_dir, self.folder, prefix + 'podcast.xml'))
class Spider: def __init__(self): self.config = toml.load("config.toml") self.aws_session = boto3.session.Session() self.cloudwatch = self.aws_session.client( service_name="cloudwatch", region_name=self.config["secretsmanager"]["region"], ) self._report_execution() self.metric_retry_count = 0 self.environment = getenv("APP_ENVIRONMENT", "development") with open("version.txt", "r") as f: self.version = f.readline().strip() loglevel = logging.DEBUG if self.environment == "production": loglevel = logging.ERROR logging.basicConfig( stream=stdout, level=loglevel, format="%(asctime)s - %(levelname)s - %(message)s", ) sentry_sdk.init( "https://[email protected]/1509686", environment=self.environment, release=self.version, ) self.feed = FeedGenerator() self.session = HTMLSession() self.s3 = self.aws_session.client(service_name="s3") def _anti_hammer_sleep(self): logging.debug("zzzZZzzzZZZZZzzzzz") sleep(randrange(1, self.config["anti_hammer_sleep"])) def _secrets(self): logging.debug("fetching secrets from AWS") try: client = self.aws_session.client( service_name="secretsmanager", region_name=self.config["secretsmanager"]["region"], ) get_secret_value_response = client.get_secret_value( SecretId=self.config["secretsmanager"]["secret_name"]) except ClientError as e: capture_exception(e) if e.response["Error"]["Code"] == "DecryptionFailureException": # Secrets Manager can't decrypt the protected secret text using the provided KMS key. # Deal with the exception here, and/or rethrow at your discretion. raise e elif e.response["Error"][ "Code"] == "InternalServiceErrorException": # An error occurred on the server side. # Deal with the exception here, and/or rethrow at your discretion. raise e elif e.response["Error"]["Code"] == "InvalidParameterException": # You provided an invalid value for a parameter. # Deal with the exception here, and/or rethrow at your discretion. raise e elif e.response["Error"]["Code"] == "InvalidRequestException": # You provided a parameter value that is not valid for the current state of the resource. # Deal with the exception here, and/or rethrow at your discretion. raise e elif e.response["Error"]["Code"] == "ResourceNotFoundException": # We can't find the resource that you asked for. # Deal with the exception here, and/or rethrow at your discretion. raise e else: # Decrypts secret using the associated KMS CMK. # Depending on whether the secret is a string or binary, one of these fields will be populated. if "SecretString" in get_secret_value_response: return json.loads(get_secret_value_response["SecretString"]) def crawl(self): self._login() self.feed.id(f"{self.version}.vadviktor.xyz") self.feed.updated(datetime.utcnow().isoformat("T") + "Z") self.feed.author({ "name": "Viktor (Ikon) VAD", "email": "*****@*****.**", "uri": "https://www.github.com/vadviktor", }) self.feed.title("Animetorrents.me feed") self.feed.link( href=self.config["s3"]["object_url"].format( bucket=self.config["s3"]["bucket"], region=self.config["s3"]["region"], filekey=self.config["s3"][f"feed_filename_{self.environment}"], ), rel="self", ) for profile_url in self._torrent_profile_links(self._max_pages()): profile_data = self._parse_profile(profile_url) if profile_data is None: continue fe = self.feed.add_entry(order="append") fe.id(profile_url) fe.title(profile_data["title"]) fe.link(href=profile_url, rel="self") cover_image_url = None if profile_data["cover_image_src"] is not None: cover_image_url = self._cover_image_upload_and_get_url( profile_data["cover_image_src"]) thumbnail_small_image_urls = self._thumbnail_small_image_upload_and_get_urls( profile_data["thumbnail_small_image_srcs"]) thumbnail_large_image_urls = self._thumbnail_large_image_upload_and_get_urls( profile_data["thumbnail_large_image_srcs"]) torrent_public_url = self._torrent_upload_and_get_url( profile_data["torrent_download_url"], profile_data["torid"], slugify(profile_data["title"]), profile_data["publish_date"], ) content_lines = [] if cover_image_url is not None: content_lines.append(f'<p><img src="{cover_image_url}" /></p>') content_lines.append(f'<p>[{profile_data["category"]}]</p>') content_lines.append(f'<p>Tags: {profile_data["tags"]}</p>') content_lines.append( f'<p>Published: {profile_data["publish_date"]}</p>') content_lines.append( f'<p><a href="{profile_url}" target="blank">{profile_url}</a></p>' ) content_lines.append( f'<p style="white-space: pre-wrap;">{profile_data["description"]}</p>' ) content_lines.append(f"<p>") for k, v in enumerate(thumbnail_small_image_urls): content_lines.append(f""" <a href="{thumbnail_large_image_urls[k]}" target="blank"> <img src="{v}" width="200" height="100" /> </a>""") content_lines.append(f"</p>") content_lines.append( f'<p><a href="{torrent_public_url}" target="blank">Download</a></p>' ) content_lines.append(f'<p>{profile_data["torrent_details"]}</p>') content_lines.append(f'<p>{profile_data["file_list"]}</p>') if profile_data["media_info"] is not None: content_lines.append(f'<p>{profile_data["media_info"]}</p>') fe.content(self._valid_xhtml_content(content_lines), type="xhtml") self._upload_feed() self._report_retry_count() @staticmethod def _valid_xhtml_content(content_lines: List) -> str: broken_html = "".join(content_lines) # parse as HTML parser = etree.HTMLParser() tree = etree.parse(StringIO(broken_html), parser) # output as valid XML result = etree.tostring(tree.getroot(), pretty_print=True, method="xml") return result.decode("utf-8") def _upload_feed(self): logging.debug("construct and upload feed") atomfeed = self.feed.atom_str() bucket = self.config["s3"]["bucket"] key = self.config["s3"][f"feed_filename_{self.environment}"].format( version=getenv("FEED_VERSION", "v0")) self.s3.upload_fileobj(BytesIO(atomfeed), bucket, key) resp = self.s3.put_object_acl(ACL="public-read", Bucket=bucket, Key=key) if resp is None: capture_message(f"Failed to set object ACL for {bucket}/{key}") def _parse_profile(self, profile_url): logging.debug(f"processing profile {profile_url}") resp = self._get(profile_url) if ("Error 404: Torrent not found" in resp.text or "Torrent not found" in resp.text): msg = f"No torrent found for {profile_url}" logging.info(msg) capture_message(msg) return None profile_data = {} profile_data["category"] = resp.html.find("h1.headline img", first=True).attrs["alt"] if any(category in profile_data["category"] for category in self.config["exclude_categories"]): return None profile_data["torid"] = re.match(r".*=(\d+)$", profile_url)[1] try: profile_data["torrent_download_url"] = next( l for l in resp.html.links if "download.php?torid=" in l) except StopIteration: msg = f"did not find download link for {profile_url}" capture_message(msg) raise RuntimeError(msg) profile_data["hashid"] = re.match( r".*torid=([a-z0-9]+)$", profile_data["torrent_download_url"]).group(1) profile_data["title"] = resp.html.find("h1.headline", first=True).text profile_data["description"] = resp.html.find("#torDescription", first=True).text profile_data["tags"] = resp.html.find("#tagLinks", first=True).text profile_data["publish_date"] = self._parse_publish_date( resp.html.find("div.ribbon span.blogDate", first=True).text) profile_data["torrent_details"] = resp.html.find( "#tabs-1 table.dataTable", first=True).html profile_data["media_info"] = self._download_media_info( profile_data["torid"]) profile_data["file_list"] = self._download_file_list( profile_data["hashid"]) try: profile_data["cover_image_src"] = next( link.attrs["src"] for link in resp.html.find("div.contentArea img") if "imghost/covers/" in link.attrs["src"]) except StopIteration: logging.debug(f"did not find cover image for {profile_url}") profile_data["cover_image_src"] = None profile_data["thumbnail_small_image_srcs"] = [ i.attrs["src"] for i in resp.html.find("#torScreens img") ] profile_data["thumbnail_large_image_srcs"] = [ i.attrs["href"] for i in resp.html.find("#torScreens a") ] return profile_data @retry((TimeOutException, ConnectionError), tries=5, delay=3, backoff=2) def _get(self, url, **kwargs) -> Response: self._anti_hammer_sleep() resp = self.session.get(url, **kwargs) if resp.status_code in TIMEOUT_STATUS_CODES: self.metric_retry_count += 1 raise TimeOutException return resp @staticmethod def _parse_publish_date(text) -> datetime: return datetime.fromtimestamp( mktime(strptime(text, "%d %b, %Y [%I:%M %p]"))) def _torrent_profile_links(self, max_pages) -> List: links = [] for page in range(1, self.config["torrent_pages_to_scan"] + 1): resp = self._torrent_list_response(page, max_pages) [ links.append(l) for l in resp.html.links if "torrent-details.php?torid=" in l ] return links @retry(TimeOutException, tries=5, delay=3, backoff=2) def _torrent_list_response(self, current_page: int, max_pages: int) -> Response: logging.debug(f"getting torrent list page no. {current_page}") headers = {"X-Requested-With": "XMLHttpRequest"} url = self.config["site"]["torrent_list_url"].format( max=max_pages, current=current_page) resp = self._get(url=url, headers=headers) if resp.status_code in TIMEOUT_STATUS_CODES: self.metric_retry_count += 1 raise TimeOutException logging.debug(f"response status code {resp.status_code}") logging.debug(f"response length {len(resp.text)}") if "Access Denied!" in resp.text: raise RuntimeError("AJAX request was denied") return resp @retry(TimeOutException, tries=5, delay=3, backoff=2) def _login(self): login_url = self.config["site"]["login_url"] username = self._secrets()["username"] password = self._secrets()["password"] self._get(login_url) resp = self.session.post( login_url, data={ "form": "login", "username": username, "password": password }, ) if resp.status_code in TIMEOUT_STATUS_CODES: self.metric_retry_count += 1 raise TimeOutException if "Error: Invalid username or password." in resp.text: raise RuntimeError("login failed because of invalid credentials") else: logging.debug("logged in") @retry(TimeOutException, tries=5, delay=3, backoff=2) def _max_pages(self): logging.debug("finding out torrents max page number") try: resp = self._get(self.config["site"]["torrents_url"]) if resp.status_code in TIMEOUT_STATUS_CODES: self.metric_retry_count += 1 raise TimeOutException if resp.status_code != 200: raise RuntimeError( "the torrents page is not responding correctly") pattern = r"ajax/torrents_data\.php\?total=(?P<max>\d+)&page=1" match = re.search(pattern, resp.text) if match is None: raise RuntimeError("could not find max page number") max_page = match.group("max") logging.debug(f"max pages figured out: {max_page}") return int(max_page) except ConnectionError as e: capture_exception(e) raise RuntimeError("failed to get the torrents page") def _download_media_info(self, torid) -> Optional[str]: logging.debug(f"getting torrent media info for {torid}") headers = {"X-Requested-With": "XMLHttpRequest"} url = self.config["site"]["torrent_techspec_url"].format(torid) resp = self._get(url=url, headers=headers) logging.debug(f"response status code {resp.status_code}") logging.debug(f"response length {len(resp.text)}") if len(resp.text) == 0: return None if "Access Denied!" in resp.text: raise RuntimeError("AJAX request was denied") return resp.html.html def _download_file_list(self, hashid) -> str: logging.debug(f"getting torrent file list for {hashid}") headers = {"X-Requested-With": "XMLHttpRequest"} url = self.config["site"]["torrent_filelist_url"].format(hashid) resp = self._get(url=url, headers=headers) logging.debug(f"response status code {resp.status_code}") logging.debug(f"response length {len(resp.text)}") if "Access Denied!" in resp.text: raise RuntimeError("AJAX request was denied") return resp.html.html def _cover_image_upload_and_get_url(self, url) -> str: matches = re.match(r".*/covers/(\d{4})/(\d{2})/(.*)", url) year = matches[1] month = matches[2] filename = matches[3] key = f"covers/{year}/{month}/{filename}" return self._upload(key, url) def _upload(self, key, url) -> str: """ Check if key exists in the bucket. If not, then download it from url and upload it to S3 as key. Set the object ACL to public readable. Return the public URL for the object. Args: key (str): S3 object key url (str): source URL to download the data from Returns: (str): the public URL in S3 """ bucket = self.config["s3"]["bucket"] try: self.s3.head_object(Bucket=bucket, Key=key) except ClientError: resp = self._get(url) self.s3.upload_fileobj( BytesIO(resp.content), bucket, key, ExtraArgs={"StorageClass": "STANDARD_IA"}, ) resp = self.s3.put_object_acl(ACL="public-read", Bucket=bucket, Key=key) if resp is None: capture_message(f"Failed to set object ACL for {bucket}/{key}") return self.config["s3"]["object_url"].format( bucket=self.config["s3"]["bucket"], region=self.config["s3"]["region"], filekey=key, ) def _thumbnail_small_image_upload_and_get_urls(self, urls) -> List: pub_urls = [] for url in urls: matches = re.match(r".*/screenthumb/(\d{4})/(\d{2})/(.*)", url) year = matches[1] month = matches[2] filename = matches[3] key = f"screenthumbs/small/{year}/{month}/{filename}" pub_urls.append(self._upload(key, url)) return pub_urls def _thumbnail_large_image_upload_and_get_urls(self, urls) -> List: pub_urls = [] for url in urls: matches = re.match(r".*/screens/(\d{4})/(\d{2})/(.*)", url) year = matches[1] month = matches[2] filename = matches[3] key = f"screenthumbs/large/{year}/{month}/{filename}" pub_urls.append(self._upload(key, url)) return pub_urls def _torrent_upload_and_get_url(self, url, torid, filename, publish_date) -> str: """ Args: url (str): Source URL to torrent torid (str): Torrent ID filename (str): The filename to use in the S3 key publish_date (datetime): Torrent publish date Returns: (str) S3 public URL for the file """ key = f"torrents/{publish_date.year}/{publish_date.month}/{filename}_{torid}.torrent" return self._upload(key, url) def _report_execution(self): self.cloudwatch.put_metric_data( Namespace="Animetorrents", MetricData=[{ "MetricName": "execution", "Value": 0.0 }], ) def _report_retry_count(self): self.cloudwatch.put_metric_data( Namespace="Animetorrents", MetricData=[{ "MetricName": "retries", "Value": self.metric_retry_count }], )
class Outputter(BaseOutputter): @property def REQUIRED_FIELDS(self): return ['description', 'pubDate', 'title'] def __init__(self, config): BaseOutputter.__init__(self, config) self.file = config.get('filename', 'output') + '.xml' self.title = config.get('title', 'Sample RSS') self.description = config.get('description', 'Sample') self.cache = config.get('cache', False) self.base_link = config.get('host') self.temp_dir = file_util.get_temp_dir() self.fg = FeedGenerator() self._create_feed() self.item_count = 0 self.filtered_count = 0 def _create_feed(self): fg = self.fg fg.id(self.base_link) fg.title(self.title) fg.language('zh-CN') fg.link(href=self.base_link, rel='self') fg.description(self.description) fg.author(name='Tohsaka') def _valid(self, item): self.item_count += 1 valid = BaseOutputter._valid(self, item) if not self.cache: return valid if valid: filename = hashlib.md5( item.get('link').encode('utf-8')).hexdigest() valid = not file_util.touch(os.path.join(self.temp_dir, filename)) if not valid: logger.debug('%s is filtered', item.get('title')) self.filtered_count += 1 return valid def _clear_obsolete_cache(self, days): files = os.listdir(self.temp_dir) now = time.time() removed_count = 0 for f in files: filename = os.path.join(self.temp_dir, f) diff = now - os.path.getmtime(filename) if diff > SECONDS_OF_DAY * days: os.remove(filename) removed_count += 1 if removed_count > 0: logger.info(f'Removing {removed_count} obsolete cache', ) def _output(self): filename = os.path.join(self.output_folder, self.file) logger.info( f'Output to file {filename}. Total items {self.item_count}, filtered {self.filtered_count}' ) self.fg.atom_file(filename) if self.cache: self._clear_obsolete_cache(14) def _add_item(self, item): title = item.get('title') description = item.get('description') link = item.get('link') pub_date = item.get('pubDate') guid = item.get('id', link) entry = self.fg.add_entry() entry.title(title) entry.link(href=link) entry.content(content=description, type='html') entry.guid(guid) try: pub_date = parser.parse(pub_date).replace( tzinfo=pytz.timezone('Asia/Shanghai')) except: pub_date = datetime.now(pytz.utc).isoformat() entry.pubDate(pub_date) entry.updated(pub_date)
def rss_feed_for_group(api, group, reposts=True): """ Create rss feed based on the group posts :param api: VkApiMethod instance, to initialise it, api = vk_api.VkApi(USERNAME, PASSWORD).get_api(); :param group: string, short name of a group, for instance, 'club1' in https://vk.com/club1/; :param reposts: boolean, False if we do not want to add reposts to the feed :returns: FeedGenerator instance, ready for writing XML """ # VK API allows to make 10000 calls per day with wall.get_localzone # so if we going to refresh a feed every 20 minutes (it's 72 a day), # we should be ok with about 138 groups (if I get it right) # Get the first 60 (should be enough) posts from a group vargs = {'domain': group, 'count': 60} # If a group doesn't have a short name, its url looks like, # for example, this: vk.com/club526452694, but, in general, a group # can have a short name beginning with 'club'. The problem is that # VK API doesn't allow to get the posts from the group 'club526452694' # if we use it as the short name (it returns an empty list) therefore # we have to check it if group[:4] == 'club': # So if it's a shortname beginning with 'club', we get an exception try: owner_id = -1 * int(group[4:]) vargs['owner_id'] = owner_id del vargs['domain'] except ValueError: pass try: posts = api.wall.get(**vargs)['items'] # Get the name of a group group_name = api.groups.getById(group_id=group)[0]['name'] except VkApiError as error_msg: print(error_msg) # Generate the feed fg = FeedGenerator() fg.title(group_name) fg.link(href='https://vk.com/{}/'.format(group)) fg.description("Vk feed - {}".format(group_name)) # Get the local timezone odject local_tz = get_localzone() # Feedgen lib desperatly want timezone info in every date fg.lastBuildDate(datetime.now(local_tz)) # Go through the posts... for post in posts: # We do not need ads, right? if post['marked_as_ads']: continue # If the post is not a repost if post.get('copy_history') is None: post_data = post_parsing(post, group_name) # If it is, pass to post_parsing function the dictionary # post['copy_history'][0] representing the post # which the repost are made from (if we want reposts) elif reposts: post_data = post_parsing(post['copy_history'][0], group_name) else: continue # ...and create RSS items fe = fg.add_entry() fe.title(post_data['title']) fe.link(href=post_data['link']) fe.description(post_data['description']) fe.guid(post_data['guid']) fe.pubdate(post_data['pubDate']) return fg
def main(): all_tags = {} post_data = [] for post in get_posts(): out_file = post[len('posts/'):] output, title = get_post_data(post) header, date, tags_raw = title[1], title[2], title.get(6, "") tags = tags_raw.split(",") tags_html = get_html_tags(tags) post_data.append((out_file, title[1], title[2], post, output)) for tag in tags: if tag not in all_tags: all_tags[tag] = [] all_tags[tag].append((out_file, title[1], title[2])) title = title[1] with open('dist/' + out_file, 'w') as f: f.write( TEMPLATE.format(post=output, title=title, subtitle=date, tag=title, tags=tags_html)) post_data.sort(key=lambda post: datetime.strptime(post[2], '%B %d, %Y')) post_data.reverse() home_page = HOME_PAGE home_page += "\n".join([POST_SUMMARY.format(*args) for args in post_data]) with open('dist/index.html', 'w') as f: f.write( TEMPLATE.format(post=home_page, title="", tag=TAG, subtitle="", tags="")) with open('dist/style.css', 'w') as fw: with open('style.css') as fr: fw.write(fr.read()) fg = FeedGenerator() for url, title, date, post, content in reversed(post_data): fe = fg.add_entry() fe.id('http://notes.eatonphil.com/' + url) fe.title(title) fe.link(href='http://notes.eatonphil.com/' + url) fe.pubDate( datetime.strptime(date, '%B %d, %Y').replace(tzinfo=timezone.utc)) fe.content(content) fg.id('http://notes.eatonphil.com/') fg.link(href='http://notes.eatonphil.com/') fg.title(TAG) fg.description(TAG) fg.author(name='Phil Eaton', email='*****@*****.**') fg.language('en') fg.rss_file('dist/rss.xml') if not os.path.exists('dist/tags'): os.makedirs('dist/tags') for tag in all_tags: posts = all_tags[tag] with open('dist/tags/%s.html' % tag, 'w') as f: posts.sort( key=lambda post: datetime.strptime(post[2], '%B %d, %Y')) posts.reverse() tag_page = TAG_PAGE.format(tag) tag_page += "\n".join( [POST_SUMMARY.format(*args) for args in posts]) f.write( TEMPLATE.format(post=tag_page, title="", tag=TAG, subtitle="", tags=""))
"href": "https://github.com/OWASP/CheatSheetSeries", "rel": "alternate" }) feed_generator.language("en") feed_generator.pubDate(current_date) feed_generator.lastBuildDate(current_date) for pull_request in pull_requests: # Take only merged PR if pull_request["merged_at"] is None: continue # Convert merge date from 2019-08-25T06:36:35Z To Sun, 19 May 2002 15:21:36 GMT merge_date_src = pull_request["merged_at"] merge_date_dst = datetime.strptime( merge_date_src, "%Y-%m-%dT%H:%M:%SZ").strftime("%a, %d %B %Y %H:%M:%S GMT") feed_entry = feed_generator.add_entry() feed_entry.id(pull_request["html_url"]) feed_entry.title(pull_request["title"]) feed_entry.link({"href": pull_request["html_url"], "rel": "self"}) feed_entry.link({"href": pull_request["url"], "rel": "alternate"}) feed_entry.pubDate(merge_date_dst) feed_entry.updated(merge_date_dst) contributors = [] for assignee in pull_request["assignees"]: contributors.append({ "name": assignee["login"], "uri": f"https://github.com/{assignee['login']}" }) feed_entry.contributor(contributors) # Save the feed to a XML file
def from_activities(activities, actor=None, title=None, feed_url=None, home_page_url=None, hfeed=None): """Converts ActivityStreams activities to an RSS 2.0 feed. Args: activities: sequence of ActivityStreams activity dicts actor: ActivityStreams actor dict, the author of the feed title: string, the feed title feed_url: string, the URL for this RSS feed home_page_url: string, the home page URL hfeed: dict, parsed mf2 h-feed, if available Returns: unicode string with RSS 2.0 XML """ try: iter(activities) except TypeError: raise TypeError('activities must be iterable') if isinstance(activities, (dict, str)): raise TypeError('activities may not be a dict or string') fg = FeedGenerator() fg.id(feed_url) assert feed_url fg.link(href=feed_url, rel='self') if home_page_url: fg.link(href=home_page_url, rel='alternate') # TODO: parse language from lang attribute: # https://github.com/microformats/mf2py/issues/150 fg.language('en') fg.generator('granary', uri='https://granary.io/') hfeed = hfeed or {} actor = actor or {} image = (util.get_url(hfeed.get('properties', {}), 'photo') or util.get_url(actor, 'image')) if image: fg.image(image) props = hfeed.get('properties') or {} content = microformats2.get_text(util.get_first(props, 'content', '')) summary = util.get_first(props, 'summary', '') desc = content or summary or '-' fg.description(desc) # required fg.title(title or util.ellipsize(desc)) # required latest = None feed_has_enclosure = False for activity in activities: obj = activity.get('object') or activity if obj.get('objectType') == 'person': continue item = fg.add_entry() url = obj.get('url') id = obj.get('id') or url item.id(id) item.link(href=url) item.guid(url, permalink=True) # title (required) title = (obj.get('title') or obj.get('displayName') or util.ellipsize(obj.get('content', '-'))) # strip HTML tags title = util.parse_html(title).get_text('').strip() item.title(title) content = microformats2.render_content(obj, include_location=True, render_attachments=True, render_image=True) if not content: content = obj.get('summary') if content: item.content(content, type='CDATA') categories = [ { 'term': t['displayName'] } for t in obj.get('tags', []) if t.get('displayName') and t.get('verb') not in ('like', 'react', 'share') and t.get('objectType') not in ('article', 'person', 'mention') ] item.category(categories) author = obj.get('author', {}) author = { 'name': author.get('displayName') or author.get('username'), 'uri': author.get('url'), 'email': author.get('email') or '-', } item.author(author) published = obj.get('published') or obj.get('updated') if published and isinstance(published, str): try: dt = mf2util.parse_datetime(published) if not isinstance(dt, datetime): dt = datetime.combine(dt, time.min) if not dt.tzinfo: dt = dt.replace(tzinfo=util.UTC) item.published(dt) if not latest or dt > latest: latest = dt except ValueError: # bad datetime string pass item_has_enclosure = False for att in obj.get('attachments', []): stream = util.get_first(att, 'stream') or att if not stream: continue url = stream.get('url') or '' mime = mimetypes.guess_type(url)[0] or '' if (att.get('objectType') in ENCLOSURE_TYPES or mime and mime.split('/')[0] in ENCLOSURE_TYPES): if item_has_enclosure: logging.info( 'Warning: item %s already has an RSS enclosure, skipping additional enclosure %s', id, url) continue item_has_enclosure = feed_has_enclosure = True item.enclosure(url=url, type=mime, length=str(stream.get('size', ''))) item.load_extension('podcast') duration = stream.get('duration') if duration: item.podcast.itunes_duration(duration) if feed_has_enclosure: fg.load_extension('podcast') fg.podcast.itunes_author( actor.get('displayName') or actor.get('username')) if summary: fg.podcast.itunes_summary(summary) fg.podcast.itunes_explicit('no') fg.podcast.itunes_block(False) name = author.get('name') if name: fg.podcast.itunes_author(name) if image: fg.podcast.itunes_image(image) fg.podcast.itunes_category(categories) if latest: fg.lastBuildDate(latest) return fg.rss_str(pretty=True).decode('utf-8')
def _construct_feeds(self) -> Dict[str, Dict[str, str]]: """ Takes the current content and returns a constructed dictionary of atom-formatted feeds. This method should only be called by the background thread. :return: A dictionary with string keys, one for each board command and one for ``master``. The values are XML-formated feeds. """ def id_generator(name, ts): return ('tag:{feed_domain},{date}:{name}'.format( feed_domain=self.feed_domain, date=datetime.fromtimestamp(ts).strftime('%Y-%m-%d'), name=name)) def translate_content_to_xhtml(content): """Try to render a board post as faithfully as possible in xhtml.""" # Unfortunately most readers I find strip the style attribute so we'll probably have to work on this. return '<p style="white-space:pre-wrap;">{}</p>'.format( escape(content).replace('\n', '<br />')) # TODO(hyena): It would be more useful if these links were absolute. # Consider adding that if we ever make the web-app aware of its own # url. new_feeds = {} master_feedgen = FeedGenerator() master_feedgen.title("SpinDizzy Boards Master") master_feedgen.link({'href': '/sdb/atom', 'rel': 'self'}) master_feedgen.description("All posts as scraped from SpinDizzy") master_feedgen.id(id_generator('master', 0)) master_entry_list = [] for board_command in self.current_content: board_feedgen = FeedGenerator() board_feedgen.title("SpinDizzy {}".format( self.board_names[board_command])) board_feedgen.link({ 'href': '/sdb/{}/atom'.format(board_command), 'rel': 'self' }) board_feedgen.description("Posts scraped from {}".format( self.board_names[board_command])) board_feedgen.id(id_generator(board_command, 0)) for post in sorted(self.current_content[board_command].values(), key=lambda p: -p['time']): entry = board_feedgen.add_entry() entry.title(post['title']) # RSS insists on an email which is annoying. entry.author({'name': post['owner_name']}) entry.updated(datetime.fromtimestamp(post['time'], tz=self.tz)) entry.link({ 'href': '/sdb/{}/{}'.format(board_command, post['time']), 'rel': 'alternate' }) entry.content(translate_content_to_xhtml(post['content']), type='xhtml') entry.id( id_generator(name='/sdb/{}/{}'.format( board_command, post['time']), ts=post['time'])) master_entry_list.append(entry) new_feeds[board_command] = board_feedgen.atom_str(pretty=True) # Add the entries to the master feed in the right order. for entry in sorted(master_entry_list, key=lambda e: -e.updated().timestamp()): master_feedgen.add_entry(feedEntry=entry) new_feeds['master'] = master_feedgen.atom_str(pretty=True) return new_feeds
def feed(uri, verif): uri = base64.urlsafe_b64decode(uri.encode('utf8')) verif = base64.urlsafe_b64decode(verif.encode('utf8')) mac = hmac.new(HMAC_KEY, uri, digestmod=pyblake2.blake2s).digest() if not hmac.compare_digest(verif, mac): abort(403) uri = uri.decode('utf8') verify_uri(uri) cachefile = pathfor(uri, '.picklejson', FEED_DIR) modified = etag = None cached = None if os.path.isfile(cachefile): try: with open(cachefile, 'rb') as f: cached = jsonpickle.decode(f.read()) app.logger.debug("Loaded cache from cachefile:%r", cachefile) etag = cached.etag if 'etag' in cached else None modified = cached.modified if 'modified' in cached else None except Exception as e: app.logger.warn("Could not load cache:%r", e) app.logger.debug("Parse feed: %r; etag:%r; modified:%r", uri, etag, modified) parsed = feedparser.parse(uri, etag=etag, modified=modified) app.logger.debug("Parsed feed: %r; %r", uri, 'status' in parsed and parsed.status) if parsed.status < 200 or parsed.status >= 400: app.logger.warn("Non okay status code, 404?") abort(404) if cached and not parsed.entries: parsed = cached def save_to_cache(): with tempfile.NamedTemporaryFile(delete=False, dir=FEED_DIR, mode='w') as f: encoded = jsonpickle.encode(parsed) f.write(encoded) f.flush() os.rename(f.name, cachefile) os.chmod(cachefile, 0o644) app.logger.debug("Saved cache to cachefile:%r", cachefile) def done(fut): try: fut.result() except Exception: app.logger.exception("Error saving feed cache") pool.submit(save_to_cache).add_done_callback(done) feed = FeedGenerator() feed.id(uri) feed.title(parsed.feed.get('title', None) or '???') feed.link(href=parsed.feed.get('link', None) or 'about:blank') feed.description(parsed.feed.get('description', None) or '???') if 'image' in parsed.feed and 'href' in parsed.feed.image: feed.image(parsed.feed.image.href) for e in parsed.entries: try: entry = feed.add_entry(order='append') id = e.id if 'id' in e else None for l in (e.links if 'links' in e else []): if l.rel == 'enclosure' and 'href' in l: if not id: id = l.href storename = transcoded_href(l.href) entry.enclosure(urljoin(request.url, storename), l.get('size', None), l.get('type', OPUS_TYPE)) elif l.rel == 'alternate' and 'href' in l: entry.link(**l) for c in (e.content if 'content' in e else []): if 'type' in c and c.type.startswith('text/html'): entry.content(content=c.value, type='html') else: entry.content(content=c.value, type='text') entry.id(id) entry.title(e.get('title', None) or '???') entry.description(e.get('description', None) or '???') if 'updated_parsed' in e and e.updated_parsed: entry.updated( datetime.fromtimestamp(mktime(e.updated_parsed), pytz.UTC)) if 'published_parsed' in e and e.published_parsed: entry.published( datetime.fromtimestamp(mktime(e.published_parsed), pytz.UTC)) finally: pass try: resp = make_response(feed.rss_str(pretty=True)) resp.headers['content-type'] = 'application/xml' return resp except BaseException as e: raise e
def get(self, playlist): playlist = playlist.split('/') if len(playlist) < 2: playlist.append('video') playlist_name = '/'.join(playlist) self.set_header('Content-type', 'application/rss+xml') if playlist_name in playlist_feed and playlist_feed[playlist_name][ 'expire'] > datetime.datetime.now(): self.write(playlist_feed[playlist_name]['feed']) self.finish() return calls = 0 payload = {'part': 'snippet', 'id': playlist[0], 'key': key} request = requests.get( 'https://www.googleapis.com/youtube/v3/playlists', params=payload) calls += 1 response = request.json() if request.status_code == 200: logging.debug('Downloaded Playlist Information') else: logging.error('Error Downloading Playlist: %s', request.reason) self.send_error(reason='Error Downloading Playlist') return fg = FeedGenerator() fg.load_extension('podcast') fg.generator('PodTube (python-feedgen)', __version__, 'https://github.com/aquacash5/PodTube') snippet = response['items'][0]['snippet'] icon = max(snippet['thumbnails'], key=lambda x: snippet['thumbnails'][x]['width']) logging.info('Playlist: %s (%s)', playlist[0], snippet['title']) fg.title(snippet['title']) fg.id('http://' + self.request.host + self.request.uri) fg.description(snippet['description'] or ' ') fg.author(name='Podtube', email='*****@*****.**', uri='https://github.com/aquacash5/PodTube') fg.podcast.itunes_author(snippet['channelTitle']) fg.image(snippet['thumbnails'][icon]['url']) fg.link(href=f'http://youtube.com/playlist/?list={playlist}', rel='self') fg.language('en-US') fg.podcast.itunes_image(snippet['thumbnails'][icon]['url']) fg.podcast.itunes_explicit('no') fg.podcast.itunes_owner(name='Podtube', email='*****@*****.**') fg.podcast.itunes_summary(snippet['description']) fg.podcast.itunes_category(cat='Technology') fg.updated(str(datetime.datetime.utcnow()) + 'Z') video = None response = {'nextPageToken': ''} while 'nextPageToken' in response.keys(): payload = { 'part': 'snippet', 'maxResults': 50, 'playlistId': playlist[0], 'key': key, 'pageToken': response['nextPageToken'] } request = requests.get( 'https://www.googleapis.com/youtube/v3/playlistItems', params=payload) calls += 1 response = request.json() if request.status_code == 200: logging.debug('Downloaded Playlist Information') else: logging.error('Error Downloading Playlist: %s', request.reason) self.send_error(reason='Error Downloading Playlist Items') return for item in response['items']: snippet = item['snippet'] current_video = snippet['resourceId']['videoId'] if 'Private' in snippet['title']: continue logging.debug('PlaylistVideo: %s (%s)', current_video, snippet['title']) fe = fg.add_entry() fe.title(snippet['title']) fe.id(current_video) icon = max(snippet['thumbnails'], key=lambda x: snippet['thumbnails'][x]['width']) fe.podcast.itunes_image(snippet['thumbnails'][icon]['url']) fe.updated(snippet['publishedAt']) if playlist[1] == 'video': fe.enclosure( url=f'http://{self.request.host}/video/{current_video}', type="video/mp4") elif playlist[1] == 'audio': fe.enclosure( url=f'http://{self.request.host}/audio/{current_video}', type="audio/mpeg") fe.author(name=snippet['channelTitle']) fe.podcast.itunes_author(snippet['channelTitle']) fe.pubDate(snippet['publishedAt']) fe.link(href=f'http://www.youtube.com/watch?v={current_video}', title=snippet['title']) fe.podcast.itunes_summary(snippet['description']) fe.description(snippet['description']) if not video or video['expire'] < fe.pubDate(): video = {'video': fe.id(), 'expire': fe.pubDate()} feed = { 'feed': fg.rss_str(), 'expire': datetime.datetime.now() + datetime.timedelta(hours=calls) } playlist_feed[playlist_name] = feed self.write(feed['feed']) self.finish() video = video['video'] mp3_file = 'audio/{}.mp3'.format(video) if playlist[1] == 'audio' and not os.path.exists( mp3_file) and video not in conversion_queue.keys(): conversion_queue[video] = { 'status': False, 'added': datetime.datetime.now() }
def render_atom(datasette, request, sql, columns, rows, database, table, query_name, view_name, data): from datasette.views.base import DatasetteError if not REQUIRED_COLUMNS.issubset(columns): raise DatasetteError( "SQL query must return columns {}".format( ", ".join(REQUIRED_COLUMNS)), status=400, ) fg = FeedGenerator() fg.generator( generator="Datasette", version=__version__, uri="https://github.com/simonw/datasette", ) fg.id(request.url) fg.link(href=request.url, rel="self") fg.updated(max(row["atom_updated"] for row in rows)) title = request.args.get("_feed_title", sql) if table: title += "/" + table if data.get("human_description_en"): title += ": " + data["human_description_en"] # If this is a canned query the configured title for that over-rides all others if query_name: try: title = datasette.metadata( database=database)["queries"][query_name]["title"] except (KeyError, TypeError): pass fg.title(title) clean_function = clean if query_name: # Check allow_unsafe_html_in_canned_queries plugin_config = datasette.plugin_config("datasette-atom") if plugin_config: allow_unsafe_html_in_canned_queries = plugin_config.get( "allow_unsafe_html_in_canned_queries") if allow_unsafe_html_in_canned_queries is True: clean_function = lambda s: s elif isinstance(allow_unsafe_html_in_canned_queries, dict): allowlist = allow_unsafe_html_in_canned_queries.get( database) or [] if query_name in allowlist: clean_function = lambda s: s # And the rows for row in reversed(rows): entry = fg.add_entry() entry.id(str(row["atom_id"])) if "atom_content_html" in columns: entry.content(clean_function(row["atom_content_html"]), type="html") elif "atom_content" in columns: entry.content(row["atom_content"], type="text") entry.updated(row["atom_updated"]) entry.title(str(row["atom_title"])) # atom_link is optional if "atom_link" in columns: entry.link(href=row["atom_link"]) if "atom_author_name" in columns and row["atom_author_name"]: author = { "name": row["atom_author_name"], } for key in ("uri", "email"): colname = "atom_author_{}".format(key) if colname in columns and row[colname]: author[key] = row[colname] entry.author(author) return Response( fg.atom_str(pretty=True), content_type="application/xml; charset=utf-8", status=200, )
def gensite(rootdir): """ reads the site config, loads the template, and processes each file it finds """ site_config = siteconfig.SiteConfig(rootdir) template = GenSiteTemplate(os.path.join(rootdir, site_config.template)) destdir = os.path.join(rootdir, site_config.destination_dir) sourcedir = os.path.join(rootdir, site_config.source_dir) files = gather_source_files(sourcedir, [".md"], site_config) articles, unpublished_articles = get_articles(files) files_to_be_regenerated = [ x for x in articles if needs_to_be_regenerated(destdir, x) ] print("Will generate ", str(len(files_to_be_regenerated)), "files") article_menu = generate_navigation_header(site_config) for f in files_to_be_regenerated: extra_article_mustache_tags = {"article_menu": article_menu} template.process_source_file( f, destdir, site_config, additional_mustache_tags=extra_article_mustache_tags) static_pages = [ e for e in files if (e.template_type() == "static_page" and e.publish() == True) ] static_pages_to_be_regenerated = [ x for x in static_pages if needs_to_be_regenerated(destdir, x) ] if (len(static_pages_to_be_regenerated) != 0): print("Will generate ", str(len(static_pages_to_be_regenerated)), " static pages") for f in static_pages_to_be_regenerated: extra_article_mustache_tags = {"article_menu": article_menu} template.process_source_file( f, destdir, site_config, additional_mustache_tags=extra_article_mustache_tags) template.copy_template_files(destdir) """ generate feed """ fg = FeedGenerator() fg.id(site_config.blog_name) fg.language("en") fg.title(site_config.blog_name) fg.link(href=site_config.root_url, rel='alternate') fg.description(site_config.blog_description) for entry in articles: dest_file_name = entry.dest_file_name() fe = fg.add_entry() link = site_config.root_url + dest_file_name fe.id(link) fe.title(entry.title()) fe.link(link={"href": link}) if (entry.summary == ""): fe.summary(entry.title()) else: fe.summary(entry.summary) date = datetime.datetime.fromtimestamp( time.mktime(entry.original_date), UTC()) fe.published(date) fe.updated(date) fg.rss_file(os.path.join(destdir, 'rss.xml'), pretty=True) fg.atom_file(os.path.join(destdir, 'atom.xml'), pretty=True) index_element = template.generate_index(articles) index = [e for e in files if e.template_type() == "index"][0] i = str(lxml.etree.tostring(index_element, pretty_print=True), "utf-8") template.process_source_file(index, destdir, site_config, additional_mustache_tags={"index_content": i}, force_write=True) """ tag cloud stuff """ tag_cloud_template = [ e for e in files if e.template_type() == "tag_cloud" ][0] print(tag_cloud_template) tag_cloud_json = json.dumps(build_tagging_data(site_config, articles), indent=2, sort_keys=True) template.process_source_file(tag_cloud_template, destdir, site_config, additional_mustache_tags={ "tag_json": tag_cloud_json, "article_menu": article_menu }, force_write=True) """ copy static files """ static_files = get_files_in_dir(sourcedir) num_static_files = 0 for s in static_files: t = os.path.join(sourcedir, s) if (os.path.splitext(s)[1] == ".md"): continue if (s == "config.js"): continue f = FileDef(os.path.join(sourcedir, s), cache=False, relative_path=os.path.split(s)[0]) if f.copy_if_required(destdir): num_static_files += 1 print("Copied " + str(num_static_files) + " static files") if (len(unpublished_articles) != 0): print( "The following files are marked as unpublished and were not processed: " ) for u in unpublished_articles: print(" ", u.file_name)
async def playlist(request, playlist_id, return_type='video'): log.info(f'Playlist: {playlist_id}') playlist_name = f'{playlist_id}/{return_type}' if playlist_name in playlist_feed and playlist_feed[playlist_name][ 'expire'] > datetime.now(): return raw(playlist_feed[playlist_name]['feed'], content_type='application/rss+xml') calls = 0 payload = {'part': 'snippet', 'id': playlist_id, 'key': KEY} log.debug('Downloaded Playlist Information') response = json.loads(await get( 'https://www.googleapis.com/youtube/v3/playlists', params=payload)) calls += 1 fg = FeedGenerator() fg.load_extension('podcast') fg.generator('PodTube', __version__, 'https://github.com/aquacash5/PodTube') snippet = response['items'][0]['snippet'] icon = max(snippet['thumbnails'], key=lambda x: snippet['thumbnails'][x]['width']) fg.title(snippet['title']) fg.id(f'http://{request.headers["host"]}{request.url}') fg.description(snippet['description'] or ' ') fg.author(name=snippet['channelTitle']) fg.image(snippet['thumbnails'][icon]['url']) fg.link(href=f'https://www.youtube.com/playlist?list={playlist_id}') fg.podcast.itunes_image(snippet['thumbnails'][icon]['url']) fg.podcast.itunes_summary(snippet['description']) fg.podcast.itunes_category('Technology', 'Podcasting') fg.updated(f'{str(datetime.utcnow())}Z') response = {'nextPageToken': ''} while 'nextPageToken' in response.keys(): payload = { 'part': 'snippet', 'maxResults': 50, 'playlistId': playlist_id, 'key': KEY, 'pageToken': response['nextPageToken'] } response = json.loads(await get( 'https://www.googleapis.com/youtube/v3/playlistItems', params=payload)) calls += 1 for item in response['items']: snippet = item['snippet'] current_video = snippet['resourceId']['videoId'] if 'Private' in snippet['title']: continue log.debug(f'PlaylistVideo: {current_video} {snippet["title"]}') fe = fg.add_entry() fe.title(snippet['title']) fe.id(current_video) icon = max(snippet['thumbnails'], key=lambda x: snippet['thumbnails'][x]['width']) fe.podcast.itunes_image(snippet['thumbnails'][icon]['url']) fe.updated(snippet['publishedAt']) if return_type == 'audio': fe.enclosure( url= f'http://{request.headers["host"]}/audio/{current_video}', type="audio/mpeg") else: fe.enclosure( url= f'http://{request.headers["host"]}/video/{current_video}', type="video/mp4") fe.author(name=snippet['channelTitle']) fe.podcast.itunes_author(snippet['channelTitle']) fe.podcast.itunes_author(snippet['channelTitle']) fe.pubdate(snippet['publishedAt']) fe.link(href='http://www.youtube.com/watch?v=' + current_video, title=snippet['title']) fe.podcast.itunes_summary(snippet['description']) fe.description(snippet['description']) await sleep(0) feed = { 'feed': fg.rss_str(), 'expire': datetime.now() + timedelta(hours=calls) } playlist_feed[playlist_name] = feed return raw(feed['feed'], content_type='application/rss+xml')
class Feed: def __init__(self, url: str, name: str, email: str, title: str = None, generator: str = None, generator_version: str = None, logo: str = None, icon: str = None, description: str = None, language: str = None) -> None: self.name = name self.email = email self.fg = FeedGenerator() self.fg.id(url + "feed.atom") self.fg.link(href=url + "feed.xml", rel="self") self.fg.link(href=url, rel="alternate") self.fg.author(name=name, email=email) self.fg.contributor(name=name, email=email) self.fg.managingEditor(email) self.fg.webMaster(email) self.fg.title(title) self.fg.generator(generator=generator, version=generator_version) self.fg.logo(logo) self.fg.icon(icon) self.fg.description(description) self.fg.language(language) def add(self, article: Article) -> None: feed_entry = self.fg.add_entry() feed_entry.id(article.url) feed_entry.title(article.title) feed_entry.link(href=article.url) feed_entry.guid(guid=article.url, permalink=True) feed_entry.author(name=self.name, email=self.email) feed_entry.summary(article.description or article.snippet) feed_entry.content(content=article.content, type="CDATA") feed_entry.published(article.date) if article.date: feed_entry.published(article.date) feed_entry.updated(article.date) else: epoch = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) feed_entry.published(epoch) feed_entry.updated(epoch) def add_from_blog(self, url: str) -> None: blog = Blog(url) if not self.fg.title(): self.fg.title(blog.title) for article in blog.articles: self.add(article) def atom(self) -> bytes: return self.fg.atom_str(pretty=True) def rss(self) -> bytes: return self.fg.rss_str(pretty=True) def atom_file(self, filename: str = "feed.atom") -> None: self.fg.atom_file(filename, pretty=True) def rss_file(self, filename: str = "feed.xml") -> None: self.fg.rss_file(filename, pretty=True)
def pbs_show(slug): print('processing %s' % slug) cache_glob = list(p.glob(slug + "*")) if cache_glob: recent_cache_path = sorted(cache_glob)[-1] cache_time_str = re.search(slug + '.([^\.]+).xml', recent_cache_path.name).group(1) cache_time = datetime.datetime.strptime(cache_time_str, '%Y-%m-%dT%H:%M:%S') print(cache_time_str) print(datetime.datetime.now().isoformat()) if cache_time + datetime.timedelta(days=7) > datetime.datetime.now(): # cached file is still valid; return that return send_file(recent_cache_path.open(), mimetype='application/rss+xml') show_url = show_format.format(slug=slug) show_info = requests.get(show_url).json() show_title = show_info['name'] feed = FeedGenerator() feed.load_extension('podcast') feed.podcast.itunes_category('Music') feed.id(show_url) feed.link(href=show_website_format.format(slug=slug), rel='alternate') feed.title(show_title) desc = show_info['description'] presenters = show_info['broadcasters'] if presenters: feed.author(name=presenters) feed.description(desc + "Presented by " + presenters + ".") else: feed.description(desc) feed.logo(show_info['profileImageUrl']) feed.language('en') episodes = requests.get(show_info['episodesRestUrl']).json() episode_times = [] for episode in reversed(episodes): start_time = datetime.datetime.strptime(episode['start'], '%Y-%m-%d %H:%M:%S') episode_times.append(start_time) title = "{} {}".format(show_title, start_time.date()) media_url = media_format.format( slug=slug, timestamp=start_time.strftime("%Y%m%d%H%M")) feed_entry = feed.add_entry() feed_entry.id(media_url) feed_entry.title(title) feed_entry.author(name=presenters) feed_entry.enclosure(media_url, 0, 'audio/mp4') try: ep_data = requests.get(episode['episodeRestUrl']).json() tracklist_data = requests.get(ep_data['playlistRestUrl']).json() tracklist = "<h3>Tracklist</h3>" + "<br>".join( [track['title'] for track in tracklist_data]) feed_entry.description(tracklist) except: feed_entry.description(title) if episode_times: # remove all old cache files for this program for cachefile in p.glob(slug + "*"): cachefile.unlink() recent_ep_time = sorted(episode_times)[-1].isoformat() feed.rss_file(CACHE_DIR + "/" + slug + " " + recent_ep_time + ".xml", pretty=True) return Response(feed.rss_str(pretty=True), mimetype='application/rss+xml')
def generate_json_and_rss(event): bsnh = Airtable(at_base_key, "Brazilian shirtname holders", api_key=airtable_api_key).get_all(max_records=1000) simple_bsnh_unmapped = {b['id']: b['fields'] for b in bsnh} simple_bsnh = map_fields(simple_bsnh_unmapped, { 'Name': "name", 'Awarded': "awarded", 'Awardee': "awardee" }) episodes = Airtable(at_base_key, "Episodes", api_key=airtable_api_key).get_all(max_records=1000) simple_episodes_unsorted = {e['id']: e['fields'] for e in episodes} simple_episodes_unmapped = { k: v for (k, v) in sorted(simple_episodes_unsorted.items(), key=lambda x: x[1]['Date'], reverse=True) } simple_episodes = map_fields( simple_episodes_unmapped, { 'Date': 'd', 'Podcast in archive': 'a', 'Show took place': 't', 'Presenter': 'p', 'Experts': 'e', 'Reason for show not airing': 'na', 'Show particularities': 'sp' }) for f in simple_episodes.values(): if 'p' in f: f['p'] = f.pop('p')[0] experts = Airtable(at_base_key, "Experts", api_key=airtable_api_key).get_all() simple_experts_unmapped = { e['id']: prune_field(e['fields'], "Episodes") for e in experts } simple_experts = map_fields( simple_experts_unmapped, { 'Active': 'active', 'Bio': 'bio', 'Brazilian shirtname': 'bsn', 'Instagram': 'instagram', 'Name': 'name', 'Region': 'region', 'Twitter': 'twitter', 'Website': 'website' }) presenters = Airtable(at_base_key, "Presenters", api_key=airtable_api_key).get_all() simple_presenters_unmapped = { p['id']: prune_field(p['fields'], "Episodes") for p in presenters } simple_presenters = map_fields( simple_presenters_unmapped, { 'Name': 'name', 'Brazilian shirtname': 'bsn', 'Bio': 'bio', 'Twitter': 'twitter' }) full_data = { 'bsnh': simple_bsnh, 'episodes': simple_episodes, 'experts': simple_experts, 'presenters': simple_presenters } # Generate the RSS feed fg = FeedGenerator() fg.load_extension('podcast') fg.title('World Football Phone In Podcast Archive') fg.link(href='http://worldfootballphonein.com', rel='alternate') fg.logo('http://worldfootballphonein.com/img/profile.jpg') fg.subtitle( 'Unofficial, fan-curated archive of World Football Phone In show.') fg.link(href='http://worldfootballphonein.com/podcasts/rss.xml', rel='self') fg.language('en') for e in simple_episodes.values(): if 'a' not in e: continue e_url = f"https:// worldfootballphonein.com/podcasts/{e['d'].replace('-', '')}.mp3" fe = fg.add_entry() fe.id(e_url) fe.title('WFPI episode for ' + e['d']) fe.description('Footy talk.') fe.enclosure(e_url, 0, 'audio/mpeg') db_js = "var wfpiDB=" + json.dumps(full_data, separators=(',', ':')) + ";" if "GCP_PROJECT" in os.environ: storage_client = storage.Client() bucket = storage_client.bucket("wfpi-podcasts-archive") db = bucket.blob("db.js") db.upload_from_string(db_js) rss = bucket.blob("rss.xml") rss.upload_from_string(fg.rss_str()) else: with open('db.js', 'w') as outfile: outfile.write(db_js) fg.rss_file('rss.xml') return "OK"
def index(): global messages limit_tag = request.args.get('tag') # Sort by last_changed and add the uuid which is usually the key.. sorted_watches = [] for uuid, watch in datastore.data['watching'].items(): if limit_tag != None: # Support for comma separated list of tags. for tag_in_watch in watch['tag'].split(','): tag_in_watch = tag_in_watch.strip() if tag_in_watch == limit_tag: watch['uuid'] = uuid sorted_watches.append(watch) else: watch['uuid'] = uuid sorted_watches.append(watch) sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True) existing_tags = datastore.get_all_tags() rss = request.args.get('rss') if rss: fg = FeedGenerator() fg.title('changedetection.io') fg.description('Feed description') fg.link(href='https://changedetection.io') for watch in sorted_watches: if not watch['viewed']: fe = fg.add_entry() fe.title(watch['url']) fe.link(href=watch['url']) fe.description(watch['url']) fe.guid(watch['uuid'], permalink=False) dt = datetime.datetime.fromtimestamp( int(watch['newest_history_key'])) dt = dt.replace(tzinfo=pytz.UTC) fe.pubDate(dt) response = make_response(fg.rss_str()) response.headers.set('Content-Type', 'application/rss+xml') return response else: output = render_template( "watch-overview.html", watches=sorted_watches, messages=messages, tags=existing_tags, active_tag=limit_tag, has_unviewed=datastore.data['has_unviewed']) # Show messages but once. messages = [] return output
def weather(request, latitudelongitude): # API apiKey = 'db88a36f0e9a8b4b62252702452bab42' url = 'https://api.darksky.net/forecast/' googleMapsApiKey = 'AIzaSyDg9DQ1jVpiyjJtAhy01KhDOgWgYBy6tOw' # parse input positionParsed = parseQueryToPosition(latitudelongitude) positionObject = Position(positionParsed) # build URL call callURL = url + apiKey + '/' + positionObject.langitude + ',' + positionObject.longitude # Call API response = requests.get(callURL) # DEBUG: convert to Str type (which is json string) for debug purposes #json_string = json.dumps(response.json()) #print (type(json_string)) # Convert response string into JSON type weatherData = response.json() #print (weatherData.keys()) # Get necessary info currentWeather = weatherData['currently'] temperature = str(int(currentWeather['temperature'])) summary = currentWeather['summary'] # Get alert if any if 'alerts' in weatherData: alerts = weatherData['alerts'][0] alertsTitle = alerts['title'] alertsURI = alerts['uri'] else: alertsTitle = '' #print (alertsTitle + ' -- ' + alertsURI) # Weather hyperlink weatherHyperLink = 'http://forecast.weather.gov/MapClick.php?' query = { 'lat' : positionObject.langitude, 'lon' : positionObject.longitude} weatherHyperLink = weatherHyperLink + urllib.parse.urlencode(query) # Set RSS title title = ' | Temperature: ' + temperature + '°F | '+ 'Overall, it is ' + summary if alertsTitle != '': title += ' | Watch out! ' + alertsTitle # Get place in GPS place = getplace(positionObject.langitude, positionObject.longitude, googleMapsApiKey) # Build RSS channel feedChannel = FeedGenerator() feedChannel.id('weather_' + str(time.time())) feedChannel.title('WeatherPy - ' + place) feedChannel.author( {'name':'Ben','email':'*****@*****.**'} ) feedChannel.subtitle('Powered by Dark Sky API') feedChannel.link( href=weatherHyperLink, rel='self' ) feedChannel.language('en') # Build RSS weather item/entry feedEntry = feedChannel.add_entry() feedEntry.id('weather_' + str(time.time())) feedEntry.title(place + title) feedEntry.link( href=weatherHyperLink, rel='self' ) # Get the RSS feed as string rssFeed = feedChannel.rss_str(pretty=True) #print (rssfeed) # return as HTTPresponse return HttpResponse(rssFeed, content_type='application/xhtml+xml,application/xml')
) fg.subtitle( 'This site documents the Ministry of Justice (MoJ) security policies and guidance.' ) fg.link(href='https://security-guidance.service.justice.gov.uk/', rel='self') fg.language('en') fg.contributor(name='Ministry of Justice', email='*****@*****.**') with open('../changeLog.csv') as csvDataFile: csvReader = csv.reader(csvDataFile) loopCounter = 0 entryList = [] ditaEntries = [] for row in csvReader: entryList.append(fg.add_entry()) entryList[loopCounter].id("" + siteURL + row[0] + "") entryList[loopCounter].title("" + row[1] + "") entryList[loopCounter].description("" + row[2] + "") entryList[loopCounter].link(href="" + siteURL + row[3] + "") entryList[loopCounter].pubDate("" + row[4] + "") ditaEntries.insert( 0, "<dlentry><dt>" + row[4] + " <xref href='" + siteURL + row[3] + "' format='html' scope='external'>" + row[1] + "</xref></dt><dd>" + row[2] + "</dd></dlentry>") loopCounter = loopCounter + 1 atomfeed = fg.atom_str(pretty=True) rssfeed = fg.rss_str(pretty=True) fg.atom_file('atom.xml') fg.rss_file('rss.xml')