def serialize_category_atom(category, url, user, event_filter): """Export the events in a category to Atom. :param category: The category to export :param url: The URL of the feed :param user: The user who needs to be able to access the events :param event_filter: A SQLalchemy criterion to restrict which events will be returned. Usually something involving the start/end date of the event. """ query = (Event.query .filter(Event.category_chain_overlaps(category.id), ~Event.is_deleted, event_filter) .options(load_only('id', 'category_id', 'start_dt', 'title', 'description', 'protection_mode', 'access_key'), subqueryload('acl_entries')) .order_by(Event.start_dt)) events = [e for e in query if e.can_access(user)] feed = FeedGenerator() feed.id(url) feed.title(f'Indico Feed [{category.title}]') feed.link(href=url, rel='self') for event in events: entry = feed.add_entry(order='append') entry.id(event.external_url) entry.title(event.title) entry.summary(sanitize_html(str(event.description)) or None, type='html') entry.link(href=event.external_url) entry.updated(event.start_dt) return BytesIO(feed.atom_str(pretty=True))
class TestExtensionDc(unittest.TestCase): def setUp(self): self.fg = FeedGenerator() self.fg.load_extension('dc') self.fg.title('title') self.fg.link(href='http://example.com', rel='self') self.fg.description('description') def test_entryLoadExtension(self): fe = self.fg.add_item() try: fe.load_extension('dc') except ImportError: pass # Extension already loaded def test_elements(self): for method in dir(self.fg.dc): if method.startswith('dc_'): m = getattr(self.fg.dc, method) m(method) assert m() == [method] self.fg.id('123') assert self.fg.atom_str() assert self.fg.rss_str()
def advisory_atom(): data = get_advisory_data()['published'][:TRACKER_FEED_ADVISORY_ENTRIES] feed = FeedGenerator() feed.id(TRACKER_ISSUE_URL.format('advisory')) feed.title('Arch Linux Security Advisories') feed.subtitle('Feed containing the last published Arch Linux Security Advisories') feed.link(href=TRACKER_ISSUE_URL.format('advisory'), rel='alternate') feed.link(href=TRACKER_ISSUE_URL.format('advisory/feed.atom'), rel='self') feed.language('en') for entry in data: package = entry['package'] advisory = entry['advisory'] content = render_template('feed.html', content=advisory.content) impact = render_template('feed.html', content=advisory.impact) published = updated = advisory.created.replace(tzinfo=UTC) entry = feed.add_entry() entry.id(TRACKER_ISSUE_URL.format(advisory.id)) entry.title(f'[{advisory.id}] {package.pkgname}: {advisory.advisory_type}') entry.author(name='Arch Linux Security Team') entry.content(content.replace('\n', '<br/>'), type='html') entry.summary(impact.replace('\n', '<br/>'), type='html') entry.published(published) entry.updated(updated) entry.link(href=TRACKER_ISSUE_URL.format(advisory.id), rel='alternate') return Response(feed.atom_str(pretty=True), 200, content_type='application/atom+xml; charset=utf-8')
def generate_feed_for_query(query): global db logger.debug("generate_feed_for_query()") results = db.get_top_results_for_query(query.id) fg = FeedGenerator() fg.id(f"{settings.BASE_URL}/results/{query.id}") fg.title(f"Results for {query.search}") fg.author({"name": "Reef", "email": "*****@*****.**"}) fg.description("A list of latest results for a search") fg.link(href=settings.BASE_URL) fg.language("en") for result_raw in results: result = Result(json=result_raw) logger.debug( f"adding entry for {result.id}: {result.title}: {result.content}") fe = fg.add_entry() fe.id(f"{settings.BASE_URL}/results/{query.id}/{result.id}") fe.title(result.title) fe.link(href=result.content) if settings.FEED_FORMAT == "rss": return fg.rss_str(pretty=True) # else... return fg.atom_str(pretty=True)
class Feeder(): def __init__( self, url, title='', feedURL='' ): scraper = None if url.startswith( "https://twitter.com/" ): scraper = TwitterScraper( url ) if title == '': title = "Twitter: @" + url.split('/')[3] elif url.startswith( "http://www.lindwurm-linden.de/termine" ): scraper = LindwurmScraper( url ) if title == '': title = "Lindwurm: Termine" else: raise UnsupportedService( "No scraper found for this URL." ) self.feed = FeedGenerator() self.feed.id( url ) self.feed.title( title ) self.feed.author( { "name": url } ) if feedURL != '': self.feed.link( href=feedURL, rel='self' ) for entry in scraper.entries: fe = self.feed.add_entry() fe.id( entry['url'] ) fe.title( entry['title'] ) fe.link( href=entry['url'], rel='alternate' ) fe.content( entry['text'] ) def GetAtom( self ): return self.feed.atom_str( pretty=True ).decode()
def recent_releases(organization_name=None): """Generates a feed for the releases of an organization.""" organization = Organization.query.filter( Organization.name == organization_name).first() if organization is None: abort(404) fg = FeedGenerator() fg.id( url_for( "organization_bp.recent_releases", organization_name=organization.name, _external=True, )) fg.title(f"Recent releases for {organization.name}") fg.link(href=request.url, rel="self") for project in organization.projects: for release in project.releases: fe = fg.add_entry() fe.id(f"{release.project.name} {release.version}") fe.title(f"{release.project.name} {release.version}") fe.description(release.changes) fe.link(href=release.release_url) fe.updated(release.published_at.replace(tzinfo=timezone.utc)) fe.published(release.published_at.replace(tzinfo=timezone.utc)) atomfeed = fg.atom_str(pretty=True) return atomfeed
def _build_feed(changelog_entries, format): from feedgen.feed import FeedGenerator from datetime import datetime import pytz tz = pytz.timezone('Europe/Berlin') BASE_URL = 'https://manual.uberspace.de/en/changelog.' HTML_URL = BASE_URL + 'html' fg = FeedGenerator() fg.id(HTML_URL) fg.title('Uberspace 7 Updates') fg.link(href=HTML_URL, rel='alternate') fg.link(href=BASE_URL + format, rel='self') fg.language('en') for entry in changelog_entries: deeplink = '{}#v{}'.format(HTML_URL, entry['version'].replace('.', '-')) date = tz.localize(datetime.strptime(entry['date'], '%Y-%m-%d')) fe = fg.add_entry() fe.id(deeplink) fe.title('[{}] - {}'.format(entry['version'], entry['date'])) fe.link(href=deeplink) fe.updated(date) fe.content(entry['text'].replace('\n', '<br>')) if format == 'atom': return fg.atom_str(pretty=True) else: raise Exception('unkown feed format: ' + str(format))
def get_feed(atom=False): fg = FeedGenerator() domain = get_domain() items = get_posts({"limit": "10"}, full=True)["results"] fg.id("http://%s/"%domain) fg.title("Blog do MatrUFSC2") fg.description("Feed do blog do MatrUFSC2, onde noticias e novos recursos sao anunciados primeiro!") fg.language('pt-BR') fg.link({"href":"/blog/feed","rel":"self"}) fg.updated(items[0]["posted_at"].replace(tzinfo=pytz.UTC)) for item in items: entry = fg.add_entry() entry.title(item["title"]) tree = html.fromstring(item["summary"]) cleaner = Cleaner(allow_tags=[]) tree = cleaner.clean_html(tree) text = tree.text_content() entry.description(text, True) entry.link({"href":item["link"],"rel":"self"}) entry.content(item["body"]) entry.published(item["posted_at"].replace(tzinfo=pytz.UTC)) entry.updated(item["posted_at"].replace(tzinfo=pytz.UTC)) entry.category({"label": item["category"]["title"], "term": item["category"]["slug"]}) entry.id(item["id"]) if atom: return fg.atom_str(pretty=True) else: return fg.rss_str(pretty=True)
def writeRSS(papers, output): fg = FeedGenerator() fg.id(RSS_URL) fg.title(RSS_TITLE) fg.subtitle(RSS_SUBTITLE) fg.author(RSS_AUTHOR) fg.link(href='http://www.vldb.org/pvldb/', rel='alternate') fg.language('en') for p in papers: summary = "%(title)s\nAuthors: %(authors)s\nPVLDB Volume %(volume)d, Number %(number)d" % p fe = fg.add_entry() fe.author(name=p["authors"]) fe.title(p["title"]) fe.link(href=p["link"]) fe.id(p["link"]) fe.published(published=p["published"]) fe.description(description=summary, isSummary=True) ## FOR atomfeed = fg.atom_str(pretty=True) # Get the ATOM feed as string atom_file = os.path.join(output, 'pvldb-atom.xml') fg.atom_file(atom_file) # Write the ATOM feed to a file LOG.info("Created ATOM '%s'" % atom_file) rssfeed = fg.rss_str(pretty=True) # Get the RSS feed as string rss_file = os.path.join(output, RSS_FILE) fg.rss_file(rss_file) # Write the RSS feed to a file LOG.info("Created RSS '%s'" % rss_file)
def feed(): # Entries are added backwards articles = BlogPost.query.order_by(asc(BlogPost.added)).all() fg = FeedGenerator() fg.title("Mempool | Satoshi Nakamoto Institute") fg.id("https://nakamotoinstitute.org/mempool/feed/") fg.updated(date_to_localized_datetime(articles[0].added)) fg.link(href="https://nakamotoinstitute.org") fg.link(href="https://nakamotoinstitute.org/mempool/feed/", rel="self") fg.language("en") for article in articles: url = url_for("mempool.detail", slug=article.slug, _external=True) page = pages.get(f"mempool/{article.slug}") fe = fg.add_entry() fe.id(url) fe.title(article.title) fe.link(href=url) fe.updated(date_to_localized_datetime(article.added)) fe.published(date_to_localized_datetime(article.date)) fe.author(name=str(article.author[0])) fe.content(page.html) response = make_response(fg.atom_str(encoding="utf-8", pretty=True)) response.headers.set("Content-Type", "application/atom+xml") return response
def render_atom(self): fg = FeedGenerator() fg.id(self.site_url) fg.title(self.site_title) fg.link(href = self.site_url,rel = 'alternate') fg.link(href = self.site_url + 'atom.xml',rel = 'self') fg.language('zh-cn') link_list = ArticleManager.sharedManager().link_list() for link in link_list: article = ArticleManager.sharedManager().article_for_link(link) if not article: continue fe = fg.add_entry() fe.id(article.article_link) fe.link(link = {'href':self.site_url + article.article_link}) fe.title(article.article_title) fe.description(article.article_subtitle or '') fe.author(name = article.author or '', email = article.author_email or '') d = datetime.strptime(article.article_publish_date,'%Y-%m-%d') pubdate = datetime(year = d.year, month = d.month, day = d.day,tzinfo = UTC(8)) fe.pubdate(pubdate) article.render_content_html() fe.content(content = article._content_html, type = 'html') atom_feed = fg.atom_str(pretty = True) return atom_feed
def feed(request): blog = resolve_address(request) if not blog: raise Http404("Blog does not exist") all_posts = blog.post_set.filter(publish=True, is_page=False).order_by('-published_date') fg = FeedGenerator() fg.id(blog.useful_domain()) fg.author({'name': blog.subdomain, 'email': 'hidden'}) fg.title(blog.title) fg.subtitle(blog.meta_description or clean_text(unmark(blog.content)[:160]) or blog.title) fg.link(href=f"{blog.useful_domain()}/", rel='alternate') for post in all_posts: fe = fg.add_entry() fe.id(f"{blog.useful_domain()}/{post.slug}/") fe.title(post.title) fe.author({'name': blog.subdomain, 'email': 'hidden'}) fe.link(href=f"{blog.useful_domain()}/{post.slug}/") fe.content(clean_text(mistune.html(post.content)), type="html") fe.published(post.published_date) fe.updated(post.published_date) if request.GET.get('type') == 'rss': fg.link(href=f"{blog.useful_domain()}/feed/?type=rss", rel='self') rssfeed = fg.rss_str(pretty=True) return HttpResponse(rssfeed, content_type='application/rss+xml') else: fg.link(href=f"{blog.useful_domain()}/feed/", rel='self') atomfeed = fg.atom_str(pretty=True) return HttpResponse(atomfeed, content_type='application/atom+xml')
def build_artifact(self, artifact): ctx = get_ctx() feed_source = self.source page = feed_source.parent fg = FeedGenerator() fg.id(get_id(ctx.env.project.id)) fg.title(page.record_label + u" — Pallets Project") fg.link(href=url_to("/blog", external=True)) fg.link(href=url_to(feed_source, external=True), rel="self") for item in page.children.order_by('-pub_date', '-pub_order', 'title').limit(10): fe = fg.add_entry() fe.title(item["title"]) fe.content(text_type(item["body"]), type="html") fe.link(href=url_to(item, external=True)) fe.id( get_id(u"{}/{}".format(ctx.env.project.id, item["_path"].encode("utf-8")))) fe.author(name=item["author"]) updated = datetime(*item["pub_date"].timetuple()[:3]) updated = updated.isoformat() + "Z" if not updated.tzinfo else "" fe.updated(updated) with artifact.open('wb') as f: f.write(fg.atom_str(pretty=True))
def api_rss(username: str, token: str): """ Get the rss feed for a user/token combo """ auth = HTTPBasicAuth(username, token) endpoint = "https://api.github.com/notifications?all=true" req = requests.get(endpoint, auth=auth) if req.status_code != 200: # Report error back to client return req.text, req.status_code feed = FeedGenerator() feed.id("https://github.com/notifications") feed.title("Github Notifications") feed.language("en") for entry in req.json(): url = entry["subject"]["url"] content = requests.get(url) if content.status_code == 200: url = content.json()["html_url"] fentry = feed.add_entry(order="append") fentry.id(entry["url"]) fentry.title( f"[{entry['subject']['type']}] {entry['subject']['title']}") fentry.link(href=url) fentry.updated(entry["updated_at"]) return Response(feed.atom_str(), mimetype="application/xml")
def feed(request): http_host = request.META['HTTP_HOST'] if http_host == 'bearblog.dev' or http_host == 'www.bearblog.dev' or http_host == 'localhost:8000': return redirect('/') elif 'bearblog.dev' in http_host or 'localhost:8000' in http_host: extracted = tldextract.extract(http_host) blog = get_object_or_404(Blog, subdomain=extracted.subdomain) root = get_root(blog.subdomain) else: blog = get_object_or_404(Blog, domain=http_host) root = http_host all_posts = blog.post_set.filter(publish=True, is_page=False).order_by('-published_date') fg = FeedGenerator() fg.id(f'{root}/') fg.author({'name': blog.subdomain, 'email': 'hidden'}) fg.title(blog.title) fg.subtitle(unmark(blog.content)[:160]) fg.link(href=f"{root}/feed/", rel='self') fg.link(href=root, rel='alternate') for post in all_posts: fe = fg.add_entry() fe.id(f"{root}/{post.slug}") fe.title(post.title) fe.author({'name': blog.subdomain, 'email': 'hidden'}) fe.link(href=f"{root}/feed") fe.content(unmark(post.content)) atomfeed = fg.atom_str(pretty=True) return HttpResponse(atomfeed, content_type='application/atom+xml')
async def generate_hybrid_feed(feed_urls: List[str], max_age: int = 3600) -> bytes: digest = hashlib.sha256("\n".join(sorted(feed_urls)).encode()).hexdigest() async with aiohttp.ClientSession() as sess: feeds = await asyncio.gather( *[parse_feed(sess, url, max_age=max_age) for url in feed_urls]) sorted_entries = sorted( chain(*(f["entries"] for f in feeds)), key=lambda item: item.get("updated_parsed") or item.get( "published_parsed"), reverse=True, ) fg = FeedGenerator() fg.title("Hybrid Feed") fg.id(f"feed-{digest}") for entry in sorted_entries: item = fg.add_item() item.title(entry["title"]) item.guid(entry["guid"]) # TODO: ensure uniqueness across feeds? item.link(href=entry["link"]) item.published(entry.get("published")) item.updated(entry.get("updated")) # TODO: add e.g. category, tags, image, ...? return fg.atom_str(pretty=True)
def feed(column_id): api = Api(column_id) with request.urlopen(api.info) as stream: result = stream.read().decode('utf-8') if not result: return '', 404 info = json.loads(result) with request.urlopen(api.posts) as stream: result = stream.read().decode('utf-8') entries = json.loads(result) fg = FeedGenerator() fg.id(str(entries[0]['slug'])) fg.title(info['name']) fg.language('zh_CN') fg.icon(info['avatar']['template'].replace('{id}', info['avatar']['id']).replace('{size}', 's')) fg.logo(info['avatar']['template'].replace('{id}', info['avatar']['id']).replace('{size}', 'l')) fg.description(info['intro']) fg.author(dict(name=info['creator']['name'])) fg.link(href=api.base_url + info['url'], rel='alternate') for entry in entries: fe = fg.add_entry() fe.id(entry['url']) fe.title(entry['title']) fe.published(entry['publishedTime']) fe.updated(entry['publishedTime']) fe.author(dict(name=entry['author']['name'])) fe.link(href=api.base_url + entry['url'], rel='alternate') fe.content(entry['content']) return fg.atom_str(pretty=True)
def __call__(self, value, system): feed = FeedGenerator() feed.id(system["request"].url) feed.title(value["title"]) feed.description("Log Cabin") feed.link(rel="self", href=system["request"].url) feed.language("en") if system["renderer_name"] == "atom": feed.link([{ "rel": "alternate", "type": content_type, "href": url } for content_type, url in system["request"].extensions]) for entry in value["entries"]: feed.add_entry(entry) system["request"].response.headers[ "Content-type"] = extension_content_types[ system["renderer_name"]] + "; charset=UTF-8" if system["renderer_name"] == "rss": return feed.rss_str(pretty=True) else: return feed.atom_str(pretty=True)
def _format_to_xml(json_feed, format='atom'): fg = FeedGenerator() fg.id('fltrbbl') fg.title('fltrbbl') for item in json_feed: item: Article fe = fg.add_entry() fe.id(item.source_id) fe.title(item.title) for author in item.authors: fe.author(name=author) # fg.link(href=item.url, rel='alternate') # link to main page fe.link(href=item.url, rel='self') fe.guid(guid=item.source_id, permalink=item.url) if item.publish_date != None: fe.published(item.publish_date.replace(tzinfo=datetime.timezone.utc)) fe.description(item.summary) fe.content(item.html, type='CDATA') if format == 'atom': return fg.atom_str(pretty=True) # Get the ATOM feed as string elif format == 'rss': fg.rss_str(pretty=True) # Get the RSS feed as string else: raise Exception('unknown format')
class RssHistory(View): """RSS History Page Controller""" @redirect_if_not_installed def get(self, request): self.__correlation_id = request.META["X-Correlation-ID"] if "X-Correlation-ID" in request.META else "" self.__fg = FeedGenerator() self.__context = Context() self.__option_entity = OptionEntity() self.__context.autoload_options() self.__context.push({ "page_title": self.__context.get("app_name", os.getenv("APP_NAME", "Silverback")), "is_authenticated": request.user and request.user.is_authenticated }) self.__fg.id('http://silverbackhq.org') self.__fg.title('Some Testfeed') self.__fg.author({'name': 'John Doe', 'email': '*****@*****.**'}) self.__fg.link(href='http://example.com', rel='alternate') self.__fg.logo('http://ex.com/logo.jpg') self.__fg.subtitle('This is a cool feed!') self.__fg.link(href='http://silverbackhq.org/test.atom', rel='self') self.__fg.language('en') return HttpResponse(self.__fg.atom_str(), content_type='text/xml')
def feed(request): address_info = resolve_address(request) if not address_info: return redirect('/') blog = address_info['blog'] root = address_info['root'] all_posts = blog.post_set.filter(publish=True, is_page=False).order_by('-published_date') fg = FeedGenerator() fg.id(f'{root}/') fg.author({'name': blog.subdomain, 'email': 'hidden'}) fg.title(blog.title) fg.subtitle(unmark(blog.content)[:160]) fg.link(href=f"{root}/feed/", rel='self') fg.link(href=root, rel='alternate') for post in all_posts: fe = fg.add_entry() fe.id(f"{root}/{post.slug}") fe.title(post.title) fe.author({'name': blog.subdomain, 'email': 'hidden'}) fe.link(href=f"{root}/feed") fe.content(unmark(post.content)) atomfeed = fg.atom_str(pretty=True) return HttpResponse(atomfeed, content_type='application/atom+xml')
def render_atom(self): fg = FeedGenerator() fg.id(self.site_url) fg.title(self.site_title) fg.link(href=self.site_url, rel='alternate') fg.link(href=self.site_url + 'atom.xml', rel='self') fg.language('zh-cn') link_list = ArticleManager.sharedManager().link_list() for link in link_list: article = ArticleManager.sharedManager().article_for_link(link) if not article: continue fe = fg.add_entry() fe.id(article.article_link) fe.link(link={'href': self.site_url + article.article_link}) fe.title(article.article_title) fe.description(article.article_subtitle or '') fe.author(name=article.author or '', email=article.author_email or '') d = datetime.strptime(article.article_publish_date, '%Y-%m-%d') pubdate = datetime(year=d.year, month=d.month, day=d.day, tzinfo=UTC(8)) fe.pubdate(pubdate) article.render_content_html() fe.content(content=article._content_html, type='html') atom_feed = fg.atom_str(pretty=True) return atom_feed
def feed(request): address_info = resolve_address(request) if not address_info: return redirect('/') blog = address_info['blog'] root = address_info['root'] all_posts = blog.post_set.filter(publish=True, is_page=False).order_by('-published_date') fg = FeedGenerator() fg.id(f'http://{root}/') fg.author({'name': blog.subdomain, 'email': blog.user.email}) fg.title(blog.title) fg.subtitle(unmark(blog.content)[:160]) fg.link(href=f"http://{root}/", rel='alternate') for post in all_posts: fe = fg.add_entry() fe.id(f"http://{root}/{post.slug}/") fe.title(post.title) fe.author({'name': blog.subdomain, 'email': blog.user.email}) fe.link(href=f"http://{root}/feed/") fe.content(unmark(post.content)) fe.updated(post.published_date) if request.GET.get('type') == 'rss': fg.link(href=f"http://{root}/feed/?type=rss", rel='self') rssfeed = fg.rss_str(pretty=True) return HttpResponse(rssfeed, content_type='application/rss+xml') else: fg.link(href=f"http://{root}/feed/", rel='self') atomfeed = fg.atom_str(pretty=True) return HttpResponse(atomfeed, content_type='application/atom+xml')
def feed(): """ Generate atom feed """ entries = parse_posts(0, C.feed_count) fg = FeedGenerator() fg.id(str(len(entries))) fg.title(C.title) fg.subtitle(C.subtitle) fg.language(C.language) fg.author(dict(name=C.author, email=C.email)) fg.link(href=C.root_url, rel='alternate') fg.link(href=make_abs_url(C.root_url, 'feed'), rel='self') for entry in entries: fe = fg.add_entry() fe.id(entry.get('url')) fe.title(entry.get('title')) fe.published(entry.get('date')) fe.updated(entry.get('updated') or entry.get('date')) fe.link(href=make_abs_url(C.root_url, entry.get('url')), rel='alternate') fe.author(dict(name=entry.get('author'), email=entry.get('email'))) fe.content(entry.get('body')) atom_feed = fg.atom_str(pretty=True) return atom_feed
def generate_collections_atom_feed(): """Generates an ATOM feed with the recent updated collections.""" recent_collections = Collection.query.order_by( Collection.last_updated.desc()).limit(50) fg = FeedGenerator() fg.id(url_for("collections_atom", _external=True)) fg.title("Recent collections published on MOSP") # fg.subtitle("") fg.link(href=application.config["INSTANCE_URL"], rel="self") fg.author({ "name": application.config["ADMIN_URL"], "email": application.config["ADMIN_EMAIL"], }) fg.language("en") for recent_collection in recent_collections: fe = fg.add_entry() fe.id( url_for( "collection_bp.get", collection_uuid=recent_collection.uuid, _external=True, )) fe.title(recent_collection.name) fe.description(recent_collection.description) fe.published( recent_collection.last_updated.replace(tzinfo=timezone.utc)) fe.link(href=url_for( "collection_bp.get", collection_uuid=recent_collection.uuid, _external=True, )) atomfeed = fg.atom_str(pretty=True) return atomfeed
class Feeder(): def __init__(self, url, title='', feedURL=''): scraper = None if url.startswith("https://twitter.com/"): scraper = TwitterScraper(url) if title == '': title = "Twitter: @" + url.split('/')[3] elif url.startswith("http://www.lindwurm-linden.de/termine"): scraper = LindwurmScraper(url) if title == '': title = "Lindwurm: Termine" else: raise UnsupportedService("No scraper found for this URL.") self.feed = FeedGenerator() self.feed.id(url) self.feed.title(title) self.feed.author({"name": url}) if feedURL != '': self.feed.link(href=feedURL, rel='self') for entry in scraper.entries: fe = self.feed.add_entry() fe.id(entry['url']) fe.title(entry['title']) fe.link(href=entry['url'], rel='alternate') fe.content(entry['text']) def GetAtom(self): return self.feed.atom_str(pretty=True).decode()
def feed(): root_url = request.url_root.rstrip("/") home_full_url = root_url + url_for(".index") feed_full_url = root_url + url_for(".feed") site = app.config["SITE_INFO"] site_tz = s2tz(site["timezone"]) or timezone(timedelta()) # set feed info feed_gen = FeedGenerator() feed_gen.id(home_full_url) feed_gen.title(site.get("title", "")) feed_gen.subtitle(site.get("subtitle", "")) if "author" in site: feed_gen.author(name=site["author"]) feed_gen.link(href=home_full_url, rel="alternate") feed_gen.link(href=feed_full_url, rel="self") # add feed entries posts = load_posts(meta_only=True)[:10] for i in range(len(posts)): p = load_post(posts[i]["filename"]) if not p: continue feed_entry = feed_gen.add_entry() feed_entry.id(root_url + p["url"]) feed_entry.link(href=root_url + p["url"]) feed_entry.title(p["title"]) feed_entry.content(p["content"]) feed_entry.published(p["created"].replace(tzinfo=site_tz)) feed_entry.updated( p.get("updated", p["created"]).replace(tzinfo=site_tz)) if "author" in p: feed_entry.author(name=p["author"]) # make http response resp = make_response(feed_gen.atom_str(pretty=True)) resp.content_type = "application/atom+xml; charset=utf-8" return resp
def _filter_fb_rss_feeed(url): parsed_feed = feedparser.parse(url) filtered_entries = filter( lambda x: ' shared a link: "' in x.title, parsed_feed.entries) fg = FeedGenerator() fg.id('https://fb-notifications-to-pocket.herokuapp.com/') fg.title('Facebook Notifications to Pocket') fg.author({'name': 'Pankaj Singh', 'email': '*****@*****.**'}) fg.description( '''Filter FB notifications which contain a link and generate a new rss feed which will be used by IFTTT''') fg.link(href='https://fb-notifications-to-pocket.herokuapp.com/') for entry in filtered_entries: root = etree.HTML(entry.summary_detail.value) title = entry.title.split(" shared a link: ")[1].strip()[1:-2] author_name = entry.title.split(" shared a link: ")[0].strip() url = urlparse.parse_qs( urlparse.urlparse(root.findall(".//a")[-1].attrib["href"]).query)["u"][0] title = get_title_for_url(url) or title fe = fg.add_entry() fe.id(entry.id) fe.link(href=url) fe.published(entry.published) fe.author({'name': author_name}) fe.title(title) return fg.atom_str(pretty=True)
def feed(request): fg = FeedGenerator() fg.id("bearblog") fg.author({"name": "Bear Blog", "email": "*****@*****.**"}) newest = request.GET.get("newest") if newest: fg.title("Bear Blog Most Recent Posts") fg.subtitle("Most recent posts on Bear Blog") fg.link(href="https://bearblog.dev/discover/?newest=True", rel="alternate") all_posts = (Post.objects.annotate( upvote_count=Count("upvote"), ).filter( publish=True, blog__reviewed=True, blog__blocked=False, show_in_feed=True, published_date__lte=timezone.now(), ).order_by("-published_date").select_related("blog") [0:posts_per_page]) else: fg.title("Bear Blog Trending Posts") fg.subtitle("Trending posts on Bear Blog") fg.link(href="https://bearblog.dev/discover/", rel="alternate") all_posts = (Post.objects.annotate( upvote_count=Count("upvote"), score=ExpressionWrapper( ((Count("upvote") - 1) / ((Seconds(Now() - F("published_date"))) + 4)**gravity) * 100000, output_field=FloatField(), ), ).filter( publish=True, blog__reviewed=True, blog__blocked=False, show_in_feed=True, published_date__lte=timezone.now(), ).order_by("-score", "-published_date").select_related( "blog").prefetch_related("upvote_set")[0:posts_per_page]) for post in all_posts: fe = fg.add_entry() fe.id(f"{post.blog.useful_domain()}/{post.slug}/") fe.title(post.title) fe.author({"name": post.blog.subdomain, "email": "hidden"}) fe.link(href=f"{post.blog.useful_domain()}/{post.slug}/") fe.content(clean_text(mistune.html(post.content)), type="html") fe.published(post.published_date) fe.updated(post.published_date) if request.GET.get("type") == "rss": fg.link(href=f"{post.blog.useful_domain()}/feed/?type=rss", rel="self") rssfeed = fg.rss_str(pretty=True) return HttpResponse(rssfeed, content_type="application/rss+xml") else: fg.link(href=f"{post.blog.useful_domain()}/feed/", rel="self") atomfeed = fg.atom_str(pretty=True) return HttpResponse(atomfeed, content_type="application/atom+xml")
def saveFeed(listings, title, path): url = githubRepoURL + title + ".xml" # Create a feed generator fg = FeedGenerator() # Create the feed's title fg.id(url) fg.title(title) fg.author({'name': 'Ben Snell'}) fg.description("NYC 2BR Apartment Listings in " + title) fg.link(href=url, rel='alternate') fg.language('en') time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + "-05:00" fg.pubDate(time) fg.updated(time) for apt in listings: e = fg.add_entry() e.id(apt[0]) e.title("$" + apt[1] + " // " + apt[4]) e.link(href=apt[0]) text = "" if apt[5] != "": imgs = apt[5].split(" ") for i in range(len(imgs)): text += "<img src=\"" + imgs[i] + "\" /> " if i == 0: text += "<p>" + apt[8] + "</p>" else: text += "<p>" + apt[8] + "</p>" e.content(type="html", content=text) # This doesn't seem to work: e.pubDate(datetime2RSSString(clDate(apt[2]))) e.updated(datetime2RSSString(clDate(apt[2]))) fg.atom_str(pretty=True) fg.atom_file(path)
def test_content_cdata_type(self): fg = FeedGenerator() fg.title('some title') fg.id('http://lernfunk.de/media/654322/1') fe = fg.add_entry() fe.id('http://lernfunk.de/media/654322/1') fe.title('some title') fe.content('content', type='CDATA') result = fg.atom_str() assert b'<content type="CDATA"><![CDATA[content]]></content>' in result
def feed_http(request): """HTTP Cloud Function. Args: request (flask.Request): The request object. <http://flask.pocoo.org/docs/1.0/api/#flask.Request> Returns: The response text, or any set of values that can be turned into a Response object using `make_response` <http://flask.pocoo.org/docs/1.0/api/#flask.Flask.make_response>. """ request_args = request.args url = request_args['url'] g = Grab() fg = FeedGenerator() g.go(url) fg.id(url) fg.title('Rabota.UA | rss feed') url_parsed = urlparse(g.response.url) fg.link(href=url_parsed.scheme + '://' + url_parsed.hostname, rel='alternate') fg.description(g.doc('/html/head/title').text()) count = int( g.doc('//span[@id="ctl00_content_vacancyList_ltCount"]/span').one(). text()) if count == 0: itm_list = [] else: articles = g.doc.select( '//table[contains(@class, "f-vacancylist-tablewrap")]').one() itm_list = articles.select( 'tr[@id]/td/article/div[contains(@class, "card-body")]') for item in itm_list: vac_title = item.select( 'div[1]//h2[contains(@class, "card-title")]/a/@title').text( ).strip() vac_url = g.make_url_absolute( item.select( 'div[1]//h2[contains(@class, "card-title")]/a/@href').text()) try: vac_description = item.select( 'div[contains(@class, "card-description")]').text().strip() except weblib.error.DataNotFound: vac_description = 'N/A' fe = fg.add_entry() print(vac_title) fe.id(vac_url) fe.link({'href': vac_url}) fe.source(vac_url) fe.title(vac_title) fe.description(vac_description) response = make_response(fg.atom_str(pretty=True, extensions=False)) response.headers['Content-Type'] = 'application/rss+xml; charset=UTF-8' return response
def main(argv): ap = argparse.ArgumentParser( description=''' Render RSS and Atom feeds from a CSV of food inspection data. ''') ap.add_argument( '-v', '--verbose', action='count', dest='verbosity', default=0, help='increase global logging verbosity; can be used multiple times') ap.add_argument( '-f', '--format', choices=['rss', 'atom'], default='atom', help=''' specify the format to use when rendering the feed (default: %(default)s)') ''') ap.add_argument( '-n', '--num_incidents', metavar='<num>', type=int, default=10, help='render <num> recent incidents in the feed (default: %(default)s)') ap.add_argument( 'flavor', nargs='?', default='all', choices=['all', 'failures'], help='select the flavor of feed to render (default: %(default)s)') args = ap.parse_args() logging.basicConfig( level=logging.ERROR - args.verbosity * 10, style='{', format='{}: {{message}}'.format(ap.prog)) fg = FeedGenerator() fg.id('http://pgriess.github.io/dallas-foodscores/') fg.link(href=fg.id(), rel='self') fg.title('Dallas Food Inspection Scores') fg.subtitle(''' Food inspection scores from the official City of Dallas dataset; updated daily ''') fg.description(fg.subtitle()) fg.language('en') fg.author( name='Peter Griess', email='*****@*****.**', uri='https://twitter.com/pgriess') for i in get_inspections_to_feed(sys.stdin, args.num_incidents, args.flavor): fe = fg.add_entry() fe.title('{name} at {address} scored {score}'.format( name=i.name, address=i.address, score=i.score)) fe.id(fg.id() + '#!/' + str(abs(hash(i)))) fe.link(href=fe.id(), rel='alternate') fe.content(fe.title()) fe.published(TZ.localize(i.date)) if args.format == 'atom': print(fg.atom_str(pretty=True)) else: print(fg.rss_str(pretty=True))
def saveFeed(listings, title, path): url = githubRepoURL + title + ".xml" # Create a feed generator fg = FeedGenerator() # Create the feed's title fg.id(url) fg.title(title) fg.author({'name': 'Ben Snell'}) fg.description("Art Show Open Call Opportunities") fg.link(href=url, rel='alternate') fg.language('en') time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + "-05:00" fg.pubDate(time) fg.updated(time) for item in listings: e = fg.add_entry() e.id(item["ID"]) # Get a clearer title thisTitle = getShortDate(item["Application Deadline"]) + item["Title"] e.title(thisTitle) # for key, value in item.items(): # print(key, value); # print(item["url"]) # if "url" in item: e.link(href=item["url"]) text = getHtmlFormattedListing(item) e.content(type="html", content=text) # This doesn't seem to work: # e.pubDate( datetime2RSSString(clDate(apt[2])) ) # e.updated( datetime2RSSString(clDate(apt[2])) ) fg.atom_str(pretty=True) fg.atom_file(path)
def test_summary_html_type(self): fg = FeedGenerator() fg.title('some title') fg.id('http://lernfunk.de/media/654322/1') fe = fg.add_entry() fe.id('http://lernfunk.de/media/654322/1') fe.title('some title') fe.link(href='http://lernfunk.de/media/654322/1') fe.summary('<p>summary</p>', type='html') result = fg.atom_str() expected = b'<summary type="html"><p>summary</p></summary>' assert expected in result
def feed(request, slug): """ Return an RSS feed :param request: The request object. :param slug: The slug for the requested feed. :return: The rendered feed. """ out_feed = get_object_or_404(OutFeed, slug=slug) url = "{}://{}{}".format(request.scheme, request.get_host(), reverse('posts', args=[slug])) fg = FeedGenerator() fg.id(url) fg.title(out_feed.title) fg.link(href=url, rel='alternate') fg.description(out_feed.description) fg.pubDate(out_feed.updated) in_feeds = InFeed.objects.filter(out_feed=out_feed, enabled=True) posts = Post.objects.filter(in_feed__in=[f.id for f in in_feeds], enabled=True) for post in posts: if post.override_desc is not None and post.override_desc != '': description = post.override_desc else: description = post.description description = description + ' [<a href="{}">Continue reading...</a>]'.\ format(post.link) fe = fg.add_entry() fe.id(post.id) fe.title(post.title) fe.description(description) fe.author({'name': post.author}) fe.link(href=post.link) fe.guid(post.guid) if post.override_pub is not None: fe.pubDate(post.override_pub) fe.updated(post.override_pub) else: fe.pubDate(post.published) fe.updated(post.published) data = fg.atom_str(pretty=True) response = HttpResponse(data, content_type='application/rss+xml') response['Content-Length'] = len(data) return response
def render_feed(text_paths, outpath): # http://rhodesmill.org/brandon/feed # http://rhodesmill.org/brandon/category/python/feed # http://rhodesmill.org/brandon/feed/atom/ t0 = datetime.min.time() def fix(d): dt = datetime.combine(d, t0) return timezone('US/Eastern').localize(dt) posts = [post_info(path) for path in text_paths if date_of(path)] posts = sorted(posts, key=lambda post: post['date']) posts = posts[-1:] most_recent_date = max(post['date'] for post in posts) def full(url): return 'http://rhodesmill.org/' + url.lstrip('/') fg = FeedGenerator() fg.id(full('/')) fg.author({'name': 'Brandon Rhodes'}) fg.language('en') fg.link(href=full('/brandon/'), rel='alternate') if 'python' in outpath: fg.link(href=full('/brandon/category/python/feed/'), rel='self') else: fg.link(href=full('/brandon/feed/'), rel='self') fg.subtitle('Thoughts and ideas from Brandon Rhodes') fg.title("Let's Discuss the Matter Further") fg.updated(fix(most_recent_date)) for post in posts: url = full(post['url_path']) excerpt = truncate_at_more(post['body_html'], url) fe = fg.add_entry() fe.content(excerpt, type='html') fe.guid(url, permalink=True) fe.id(url) fe.link({'href': url}) fe.published(fix(post['date'])) fe.title(post['title']) fe.updated(fix(post['date'])) rss = fg.rss_str(pretty=True) fg.link(href=full('/brandon/feed/atom/'), rel='self', replace=True) atom = fg.atom_str(pretty=True) return rss, atom
def feedAtom(): todos = session.query(Todo).join(Todo.images).all() fg = FeedGenerator() fg.title('CityTodo Atom-Feed') fg.language('en') fg.id('http://localhost:5000/feed/rss') fg.link( href='http://localhost:5000/feed/rss', rel='self' ) for todo in todos: fe = fg.add_entry() fe.id(str(todo.id)) fe.link(href='http://localhost:5000/todo/'+str(todo.id), rel='self' ) fe.title(todo.name) fe.content(todo.description) atomfeed = fg.atom_str(pretty=True) return atomfeed
def serve_filter(type, filtername): try: fil = trn.select_unique(Filter, name=filtername, insert=False) except: logging.exception("serve_filter failed for filter %s" % (filtername,)) raise out_feed = FeedGenerator() out_feed.title(fil.title) out_feed.subtitle(fil.subtitle) out_feed.id(filtername) for entry in fil.entries(): d = entry.definition out_entry = out_feed.add_entry() out_entry.title(d.title) out_entry.published(getattr(d, "published", None)) out_entry.updated(getattr(d, "updated", None)) out_entry.id(d.id) out_entry.summary(d.summary) for c in getattr(d, "content", []): out_entry.content(content=c.value, type=c.type) #, src=c.base for l in getattr(d, "links", []): out_entry.link(link=l) try: if type == "atom": mimetype = "application/atom+xml" result = out_feed.atom_str() else: mimetype = "application/rss+xml" result = out_feed.rss_str() except: logging.exception("%s error", type) mimetype = "text/plain" result = """ An error occurred while trying to produce this feed. You could try using %s instead. """ % ("rss" if type == "atom" else "atom",) response.content_type = mimetype return result
def _transform_twitrss_feed_to_link_feed(url): # Add timestamp to url # This will disable caching url = "{}&replies={}&replies=off".format(url, datetime.now().toordinal()) parsed_feed = feedparser.parse(url) fg = FeedGenerator() fg.id('https://fb-notifications-to-pocket.herokuapp.com/') fg.title('Twitter Feed to Pocket') fg.author({'name': 'Pankaj Singh', 'email': '*****@*****.**'}) fg.description( '''Transform TwitRSS.me feed to a new rss feed which will be used by IFTTT''') fg.link(href='https://fb-notifications-to-pocket.herokuapp.com/') all_links = [] for entry in parsed_feed.entries: links = extract_links_from_a_tweet(entry.link) if not links: continue for link in links: if link in all_links: continue else: all_links.append(link) title = get_title_for_url(link) or entry.title # content = get_content_for_url(link) or entry.content author_name = get_twitter_handle_from_twitrss_cdata(entry.title) fe = fg.add_entry() id = generate_key_for_text(link) fe.id(id) fe.link(href=link) fe.published(entry.published) fe.author({'name': author_name}) fe.title(title) # fe.content(content) return fg.atom_str(pretty=True)
def create_feed(episodes, output_filepath=None): woe_feed = FeedGenerator() woe_feed.load_extension('podcast', atom=True) woe_feed.title(u"Willkommen Österreich") woe_feed.id(EPISODES_SCRAPING_URL) woe_feed.link(href=BASE_URL, rel='self') woe_feed.description(u"Inoffizieller RSS-Feed für 'Willkommen Österreich'-Episoden") woe_feed.language('de') for episode in episodes: episode_entry = woe_feed.add_entry() episode_entry.id(episode.page) episode_entry.link(href=episode.page, rel='alternate') episode_entry.title(u"Folge {0} - {1}: {2}".format(episode.num, episode.date, episode.description)) for video in episode.videos: episode_entry.enclosure(url=video, length=0, type='mp4') if output_filepath: woe_feed.atom_file(output_filepath) else: print(woe_feed.atom_str(pretty=True))
def get(self, login, format_): feed = yield self.thread_pool.submit( self.db.fetch, "SELECT * FROM feeds WHERE login = ? AND is_enabled = TRUE", login) if not feed: raise HTTPError(404) if feed["password"] and not self.http_basic_auth(feed["login"], feed["password"]): return # request HTTP basic authentication to protect the feed files = yield self.thread_pool.submit( self.db.fetch_all, "SELECT `key`, login, name, size, folder, caption, expires_at, created_at FROM files " "WHERE login = ? AND is_public = TRUE AND (expires_at IS NULL OR expires_at > NOW()) " "ORDER BY name", login) if format_ == "json": self.write({"feed": files}) elif format_.endswith(".xml"): fg = FeedGenerator() fg.title(feed["title"] or login) fg.description(login + ": Sharedown") fg.id(self.reverse_url("feed_index", login, absolute=True)) fg.link(href=self.reverse_url("feed_export", login, format_, absolute=True), rel="self") for file_ in files: fe = fg.add_entry() fe.id(self.reverse_url("download_index", file_["key"], absolute=True)) fe.link(href=self.reverse_url("download_index", file_["key"], absolute=True), rel="alternate") fe.title(file_["caption"] or file_["name"]) fe.author(name=file_["login"]) fe.published(utils.timezone_localize(file_["created_at"])) if file_["folder"]: fe.category(term=file_["folder"]) if format_ == "atom.xml": self.set_header("Content-Type", "application/xml; charset=UTF-8") self.write(fg.atom_str(True)) elif format_ == "rss.xml": self.set_header("Content-Type", "application/xml; charset=UTF-8") self.write(fg.rss_str(True))
def gen_rss(items, date_string, atom=False): fg = FeedGenerator() fg.title('Open Hunt: %s' % date_string) fg.id('%s/%s' % (BASE_URL, date_string)) # setting more fields that we don't really need # to appease the atom/rss generators fg.author({'name': 'Christopher Su', 'email': '*****@*****.**'}) fg.description('Open Hunt items for %s' % date_string) fg.subtitle('Open Hunt items for %s' % date_string) fg.language('en') if atom: fg.link({'href': '%s/%s/atom' % (BASE_URL, date_string), 'rel': 'self'}) else: fg.link({'href': '%s/%s/rss' % (BASE_URL, date_string), 'rel': 'self'}) counter = 0 for item in items: fe = fg.add_entry() # id is a mandatory field for atom # we're using it wrong here, but oh well # should probably use open hunt's slugs instead if atom: fe.id(counter) counter += 1 fe.link({'href': item['href'], 'rel': 'alternate'}) fe.title(item['title']) fe.description(str(item['score'])) fe.content(item['description']) fe.author({'name': item['author'], 'email': '@%s' % item['author']}) if atom: return fg.atom_str(pretty=True) return fg.rss_str(pretty=True)
def hello(): base = 'http://www.interpressnews.ge/ge/' headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; JHR Build/98234) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.95 Mobile Safari/537.36'} r = requests.get(base, headers=headers) r.encoding = 'utf-8' soup = BeautifulSoup(r.text, 'lxml') a_list = [] items = soup.find(id='mobile_topnews').find_all(class_='topnews_content') for item in items: a = my_item(item, base) a_list.append(a) fg = FeedGenerator() fg.id('http://lernfunk.de/media/654321') fg.title('Some Testfeed') fg.author( {'name':'John Doe','email':'*****@*****.**'} ) fg.link( href='http://goo.gl/OzZCmm', rel='alternate' ) fg.icon('https://goo.gl/vsNdil') fg.subtitle('This is a cool feed!') fg.link( href='http://larskiesow.de/test.atom', rel='self' ) fg.language('ge') for a in a_list: fe = fg.add_entry() fe.id(str(hash(a.get('title', '')))) fe.title(a.get('title', 'EMPTY')) fe.content('COMING SOON', type='text') fe.summary('COMING SOON') fe.link(href=a.get('link', ''), type='text/html') fe.author( {'name':'John Doe','email':'*****@*****.**'} ) fe.updated(datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%dT%H:%M:%SZ')) atomfeed = fg.atom_str(pretty=True) return atomfeed
def __call__(self, value, system): feed = FeedGenerator() feed.id(system["request"].url) feed.title(value["title"]) feed.description("Log Cabin") feed.link(rel="self", href=system["request"].url) feed.language("en") if system["renderer_name"] == "atom": feed.link([ {"rel": "alternate", "type": content_type, "href": url} for content_type, url in system["request"].extensions ]) for entry in value["entries"]: feed.add_entry(entry) system["request"].response.headers["Content-type"] = extension_content_types[system["renderer_name"]] + "; charset=UTF-8" if system["renderer_name"] == "rss": return feed.rss_str(pretty=True) else: return feed.atom_str(pretty=True)
def main(argv=None): private = False catalog_path = realpath(os.path.curdir) # by default htdocs = None catalog = None if argv is None: argv = sys.argv try: try: opts, args = getopt.getopt(argv[1:], "hp", ["help", "catalog=", "htdocs="]) except getopt.error as msg: usage() # option processing for option, value in opts: if option == "-p": private = True if option == "--noindex": indexes = False if option in ("-h", "--help"): usage() if option == "--catalog": catalog = value if option == "--htdocs": htdocs = realpath(value) except Exception as e: print(e) usage() # Try loading catalog if catalog: # specified by user catalog = join(catalog_path, catalog) if not os.path.isfile(catalog): help_message = "could not find catalog file %s" % catalog usage() else: #not specified by user try: catalog_path = realpath(os.path.curdir) while catalog_path != "/" and not catalog: if private: catalog = join(catalog_path, "private.py") else: catalog = join(catalog_path, "catalog.py") # print("Trying to open default catalog file %s" % catalog) if os.path.isfile(catalog): print("Found default catalog file %s" % catalog) else: catalog = None catalog_path = realpath(join(catalog_path, "..")) if not os.path.isfile(catalog): print("Error: could not find defaut catalog file %s" % catalog) usage() except Exception as e: print(e) usage() try: entries = otCMS.otCMSCatalog() entries.fromfile(catalog) print("Catalog read successfully. %d entries loaded" % len(entries)) except Exception as e: print(e) usage() if htdocs: #specified by user if os.path.exists(htdocs): pass else: print("Error: could not find root path for htdocs: %s" % htdocs) usage() else: htdocs=catalog_path print("Writing files with %s as htdocs root directory" % htdocs) # Setup template engine, path is known relative to the script from mako.template import Template from mako.lookup import TemplateLookup mylookup = TemplateLookup(directories=[join(dirname(__file__), "..", "templates")], output_encoding='utf-8', encoding_errors='replace') selection_template = mylookup.get_template("list_entry.html") # template for list of entries, used throughout # 1. Preparation for date-based archives / index years = list() # list of all years where there are entries yearly_selection = dict() # dict of entries per year yearly_all_html = '' # html block with all entries, per year yearly_selection_html = dict() # dictionary of html block, per year yearly_lang_selection = dict() yearly_lang_selection['en'] = dict() yearly_lang_selection['fr'] = dict() yearly_lang_html = dict() yearly_lang_html['en'] = '' yearly_lang_html['fr'] = '' for entry in entries: if entry.year != None: if entry.year not in years: years.append(entry.year) yearly_selection[entry.year] = list() yearly_lang_selection['en'][entry.year] = list() yearly_lang_selection['fr'][entry.year] = list() yearly_selection[entry.year].append(entry) yearly_lang_selection[entry.language][entry.year].append(entry) for year in years: yearly_all_html = yearly_all_html + '''<h2 id="y%(year)s">%(year)s</h2>''' % {"year": year} yearly_selection_html[year] = selection_template.render_unicode(selection = yearly_selection[year]) yearly_all_html = yearly_all_html + selection_template.render_unicode(selection = yearly_selection[year]) if year in yearly_lang_selection['en']: if len(yearly_lang_selection['en'][year]) > 0: yearly_lang_html['en'] = yearly_lang_html['en'] + '''<h2 id="y%(year)s">%(year)s</h2>''' % {"year": year} yearly_lang_html['en'] = yearly_lang_html['en']+ selection_template.render_unicode(selection = yearly_lang_selection['en'][year]) if year in yearly_lang_selection['fr']: if len(yearly_lang_selection['fr'][year]) > 0: yearly_lang_html['fr'] = yearly_lang_html['fr'] + '''<h2 id="y%(year)s">%(year)s</h2>''' % {"year": year} yearly_lang_html['fr'] = yearly_lang_html['fr']+ selection_template.render_unicode(selection = yearly_lang_selection['fr'][year]) # 2. Preparation for ocation based archives / index locations = list() # list of all locations. Should be alphabetically sorted location_types = dict() # dictionary of all locations per type (Continent, Country, etc) loc_selection = dict() # dictionary of entries, per location loc_selection_html = dict() # HTML output for entry in entries: # this is a bit of a dance in order to make the catalog flexible. # it is possible for each catalog entry to have (or not have) values # for Continent, Country, State, Region, City or Location # and each of these may either be a string or a list if entry.continent != None: continents = list() if type(entry.continent) != type(list()): continents.append(entry.continent) else: continents = entry.continent for continent in continents: if continent not in locations: locations.append(continent) location_types[continent] = "Continent" loc_selection[continent] = list() loc_selection[continent].append(entry) if entry.country != None: countries = list() if type(entry.country) != type(list()): countries.append(entry.country) else: countries = entry.country for country in countries: if country not in locations: locations.append(country) location_types[country] = "Country" loc_selection[country] = list() loc_selection[country].append(entry) if entry.city != None: cities = list() if type(entry.city) != type(list()): cities.append(entry.city) else: cities = entry.city for city in cities: if city not in locations: locations.append(city) location_types[city] = "City" loc_selection[city] = list() loc_selection[city].append(entry) if entry.state != None: states = list() if type(entry.state) != type(list()): states.append(entry.state) else: states = entry.state for state in states: if state not in locations: locations.append(state) location_types[state] = "State" loc_selection[state] = list() loc_selection[state].append(entry) if entry.region != None: regions = list() if type(entry.region) != type(list()): regions.append(entry.region) else: regions = entry.region for region in regions: if region not in locations: locations.append(region) location_types[region] = "Region" loc_selection[region] = list() loc_selection[region].append(entry) if entry.location != None: locs = list() if type(entry.location) != type(list()): locs.append(entry.location) else: locs = entry.location for loc in locs: if loc not in locations: locations.append(loc) location_types[loc] = "Location" loc_selection[loc] = list() loc_selection[loc].append(entry) locations.sort() # 3. Generate individual entries from their MarkDown source for entry in entries: i=entries.index(entry) previous = entries[i-1] if i>0 else None # ignoring non-dated entry pages if previous: if previous.year == None: previous = None next = entries[i+1] if i<len(entries)-1 else None # ignoring non-dated entry pages if next: if next.year == None: next = None previous_html_block = selection_template.render_unicode(selection = [previous]) if previous else '' next_html_block = selection_template.render_unicode(selection = [next]) if next else '' mytemplate = mylookup.get_template("prevnext.html") if entry.year == None: # for contact page etc, non-dated stuff; no need for that nav prevnext_html = '' else: prevnext_html = mytemplate.render_unicode( previous_body= previous_html_block, next_body= next_html_block, page_language = entry.language ) mytemplate = mylookup.get_template("nearby.html") nearby_list = list() if entry.city != None: try: nearby_list= nearby_list+loc_selection[entry.city] nearby_list = sorted(set(nearby_list)) nearby_list.remove(entry) except: pass if entry.state != None and len(nearby_list) < 5: try: nearby_list= nearby_list+loc_selection[entry.state] nearby_list = sorted(set(nearby_list)) nearby_list.remove(entry) except: pass if entry.region != None and len(nearby_list) < 5: try: nearby_list= nearby_list+loc_selection[entry.region] nearby_list = sorted(set(nearby_list)) nearby_list.remove(entry) except: pass if entry.country != None and len(nearby_list) < 5: try: nearby_list= nearby_list+loc_selection[entry.country] nearby_list = sorted(set(nearby_list)) nearby_list.remove(entry) except: pass # After a bit of experimentation, going beyond Country feels to broad # if entry.continent != None and len(nearby_list) < 3: # try: # nearby_list= nearby_list+loc_selection[entry.continent] # nearby_list = sorted(set(nearby_list)) # nearby_list.remove(entry) # except: # pass if len(nearby_list)> 0: nearby_list_dedup = list() nearby_list_dedup_index = list() for nearby_entry in nearby_list: if nearby_entry.uri == entry.uri: pass elif nearby_entry.uri in nearby_list_dedup_index: pass else: nearby_list_dedup_index.append(nearby_entry.uri) nearby_list_dedup.append(nearby_entry) nearby_list = nearby_list_dedup if len(nearby_list)>5: nearby_list=random.sample(nearby_list, 5) nearby_html_block = selection_template.render_unicode(selection = nearby_list) if len(nearby_list)>0 else '' nearby_html = '' if nearby_list: nearby_html = mytemplate.render_unicode( nearby_body = nearby_html_block, page_language = entry.language ) source = entry.uri source = re.sub(r"^/", "", source) if re.search(r".*\.html$", source): dest = source source = re.sub(r"\.html$", ".md", source) elif re.search(r".*/$", source): dest = re.sub(r"$", "index.html", source) source = re.sub(r"$", "index.md", source) else: dest = source+".html" source = source+".md" source_fn = join(htdocs, source) page_html = markdown2.markdown_path(source_fn) page_html = page_html.replace("<p></div></p>", "</div>") # workaround for annoying markdown behavior entry.body = page_html dest_fn = join(htdocs, dest) dest_fh = open(dest_fn, "w") mytemplate = mylookup.get_template("page.html") tag_re = re.compile(r'(<!--.*?-->|<[^>]*>)') if entry.abstract: clean_abstract = tag_re.sub('', entry.abstract) clean_abstract = re.sub('[<>]', '', clean_abstract) dest_fh.write( mytemplate.render_unicode( body= page_html, title= entry.title, page_type="Page", page_description = clean_abstract, page_language = entry.language, prevnext_body = prevnext_html, nearby_body = nearby_html )) dest_fh.close() if re.search(r"index", source): rdf = re.sub(r"index\..*", "", source)+"meta.rdf" rdf_fn = join(htdocs, rdf) rdf_fh = open(rdf_fn, "w") mytemplate = mylookup.get_template("meta.rdf") rdf_fh.write( mytemplate.render_unicode( uri= entry.uri, title= entry.title) ) rdf_fh.close() # 4. Generate archives pages if private == False: # do not generate archives and indexes for private entries # 4.1 Generate full archive mytemplate = mylookup.get_template("index_all.html") index = open(join(htdocs, 'all.html.tmp'), 'w') index.write( mytemplate.render_unicode( yearly_entries=yearly_all_html, title='Archives', page_type="Index", page_description = "", page_intro = '', page_language = "", page_include_nav = 1 )) os.rename(join(htdocs, 'all.html.tmp'), join(htdocs, 'all.html')) # 4.1 Generate lang-based archive for lang in ['en', 'fr']: if lang == 'en': title= "Archives: in English" else: #fr title= "Archives: en Français" filename = "all_"+lang+'.html' filename_tmp = "all_"+lang+'.html.tmp' desc_template = mylookup.get_template("intro_"+lang+".html") desc_template.render_unicode() mytemplate = mylookup.get_template("index_all.html") index = open(join(htdocs, filename_tmp), 'w') index.write( mytemplate.render_unicode( yearly_entries=yearly_lang_html[lang], title= title, page_type="Index", page_description = '', page_intro = desc_template.render_unicode(), page_language = lang, page_include_nav = None ) ) os.rename(join(htdocs, filename_tmp), join(htdocs, filename)) # 4.2 Generate per-year archive pages for year in years: mytemplate = mylookup.get_template("index_generic.html") index = open(join(htdocs, str(year), 'index.html.tmp'), 'w') index.write( mytemplate.render_unicode( entries=yearly_selection_html[year], title='Archives: ' + str(year) , page_type="Index", intro = '', page_description = "", page_language = "" ) ) os.rename(join(htdocs, str(year), 'index.html.tmp'), join(htdocs, str(year), 'index.html')) # 4.3 Generate main /geo index geo_html = '' reverse_loc_bytype = dict() geo_index_template = mylookup.get_template("index_generic.html") geo_block_template = mylookup.get_template("list_location.html") for loctype in ['Continent', 'Country', 'Region', 'State', 'City', 'Location']: reverse_loc_bytype[loctype] = list() for loc_name in locations: loc_obj = otCMS.otCMSLocation() loc_obj.uri = "/geo/"+re.sub (" ", "_", loc_name.lower()) loc_obj.uri = re.sub (",", "", loc_obj.uri) loc_obj.name = loc_name loc_obj.count = len(loc_selection[loc_name]) reverse_loc_bytype[location_types[loc_name]].append(loc_obj) for loctype in ['Continent', 'Country', 'Region', 'State', 'City', 'Location']: geo_html = geo_html + geo_block_template.render_unicode( loctype= loctype, locations = reverse_loc_bytype[loctype] ) index = open(join(htdocs, "geo", 'index.html.tmp'), 'w') index.write( geo_index_template.render_unicode( entries= '', title='Archives: Around the world', page_type="Index", intro = geo_html, page_description = "", page_language = "" ) ) os.rename(join(htdocs, "geo", 'index.html.tmp'), join(htdocs, "geo", 'index.html')) # 4.4 Generate individual geo pages for loc_name in locations: loc = re.sub (" ", "_", loc_name.lower()) loc = re.sub (",", "", loc) loc_selection_html[loc] = selection_template.render_unicode(selection = loc_selection[loc_name]) mytemplate = mylookup.get_template("index_generic.html") index = open(join(htdocs, "geo", loc+'.html.tmp'), 'w') index.write( mytemplate.render_unicode( entries= loc_selection_html[loc], title='Entries in ' + location_types[loc_name] +": "+ loc_name, page_type="Index", intro = '', page_description = "", page_language = "" ) ) os.rename(join(htdocs, "geo", loc+'.html.tmp'), join(htdocs, "geo", loc+'.html')) # 5. Generate the Home Page latest_selection=entries[0:4] entries_featurable = list() for entry in entries: if entry.abstract != None and entry.thumbnail != None and entry not in latest_selection: entries_featurable.append(entry) random_selection=random.sample(entries_featurable, 4) latest_selection_html = selection_template.render_unicode(selection = latest_selection) random_selection_html = selection_template.render_unicode(selection = random_selection) title= "2 Neurones & 1 Camera - by @olivierthereaux" page_description = 'Travelogue, street photography, a bit of poetry, and the simple pleasure of telling stories. Around the world, from Europe to Japan, from Paris to London via Tokyo and Montreal' page_type = "Home" mytemplate = mylookup.get_template("index_main.html") index = open(join(htdocs, 'index.html.tmp'), 'w') index.write( mytemplate.render_unicode( latest_selection=latest_selection_html, random_selection=random_selection_html, title=title, page_description=page_description, page_type=page_type, page_language = "" ) ) index.close() os.rename(join(htdocs, 'index.html.tmp'), join(htdocs, 'index.html')) # 5. Generate the Atom Feed atom_selection=entries[0:20] fg = FeedGenerator() fg.id('tag:olivier.thereaux.net,2000:1337') fg.title('2 Neurones and 1 Camera') # fg.author( {'name':'Olivier Thereaux','uri':'http://olivier.thereaux.net/contact'} ) fg.author( {'name':'Olivier Thereaux'} ) fg.link( href='http://olivier.thereaux.net/', rel='alternate' ) fg.subtitle('Olivier Thereaux') fg.link( href='http://olivier.thereaux.net/atom.xml', rel='self' ) for entry in atom_selection: fe = fg.add_entry() fe.updated(entry.pubdate) entry_id = "http://olivier.thereaux.net"+entry.uri fe.published(entry.pubdate) fe.id(entry_id) fe.author( {'name':'Olivier Thereaux'} ) entry_link = {"rel": "alternate", "type":"text/html", "href": "http://olivier.thereaux.net"+entry.uri} fe.link(entry_link) fe.title(entry.title) if entry.abstract: fe.summary(entry.abstract) if entry.abstract: entry_content = '<p>%s</p>' % entry.abstract if entry.language == "fr": if entry.photos != None: entry_content =entry_content +'<p><a href="%s">À suivre / %s photos</a></p>' % (entry_id, entry.photos) else: entry_content =entry_content +'<p><a href="%s">À suivre</a></p>' % entry_id else: if entry.photos != None: entry_content =entry_content +'<p><a href="%s">À suivre / %s photos</a></p>' % (entry_id, entry.photos) else: entry_content =entry_content +'<p><a href="%s">À suivre</a></p>' % entry_id if entry.thumbnail: entry_thumbnail_big = entry.thumbnail entry_thumbnail_big = re.sub("tn/tn_", "tn/lg_", entry.thumbnail) entry_content =entry_content +'<img src="http://olivier.thereaux.net%s" width="500px" height="500px" />' % entry_thumbnail_big fe.content(entry_content, type="html") # entry.body_abs = entry.body # fe.content(entry.body,type="html") atom_xml = fg.atom_str(pretty=True).decode("utf-8") # Nasty Hack to add a type=html property to the summary element ... atom_fh = open(join(htdocs, 'atom.xml.tmp'), "w") # Write the ATOM feed to a file atom_fh.write(atom_xml) atom_fh.close() os.rename(join(htdocs, 'atom.xml.tmp'), join(htdocs, 'atom.xml'))
class TestExtensionMedia(unittest.TestCase): def setUp(self): self.fg = FeedGenerator() self.fg.load_extension('media') self.fg.id('id') self.fg.title('title') self.fg.link(href='http://example.com', rel='self') self.fg.description('description') def test_media_content(self): fe = self.fg.add_item() fe.id('id') fe.title('title') fe.content('content') fe.media.content(url='file1.xy') fe.media.content(url='file2.xy') fe.media.content(url='file1.xy', group=2) fe.media.content(url='file2.xy', group=2) fe.media.content(url='file.xy', group=None) ns = {'media': 'http://search.yahoo.com/mrss/', 'a': 'http://www.w3.org/2005/Atom'} # Check that we have the item in the resulting RSS root = etree.fromstring(self.fg.rss_str()) url = root.xpath('/rss/channel/item/media:group/media:content[1]/@url', namespaces=ns) assert url == ['file1.xy', 'file1.xy'] # There is one without a group url = root.xpath('/rss/channel/item/media:content[1]/@url', namespaces=ns) assert url == ['file.xy'] # Check that we have the item in the resulting Atom feed root = etree.fromstring(self.fg.atom_str()) url = root.xpath('/a:feed/a:entry/media:group/media:content[1]/@url', namespaces=ns) assert url == ['file1.xy', 'file1.xy'] fe.media.content(content=[], replace=True) assert fe.media.content() == [] def test_media_thumbnail(self): fe = self.fg.add_item() fe.id('id') fe.title('title') fe.content('content') fe.media.thumbnail(url='file1.xy') fe.media.thumbnail(url='file2.xy') fe.media.thumbnail(url='file1.xy', group=2) fe.media.thumbnail(url='file2.xy', group=2) fe.media.thumbnail(url='file.xy', group=None) ns = {'media': 'http://search.yahoo.com/mrss/', 'a': 'http://www.w3.org/2005/Atom'} # Check that we have the item in the resulting RSS root = etree.fromstring(self.fg.rss_str()) url = root.xpath( '/rss/channel/item/media:group/media:thumbnail[1]/@url', namespaces=ns) assert url == ['file1.xy', 'file1.xy'] # There is one without a group url = root.xpath('/rss/channel/item/media:thumbnail[1]/@url', namespaces=ns) assert url == ['file.xy'] # Check that we have the item in the resulting Atom feed root = etree.fromstring(self.fg.atom_str()) url = root.xpath('/a:feed/a:entry/media:group/media:thumbnail[1]/@url', namespaces=ns) assert url == ['file1.xy', 'file1.xy'] fe.media.thumbnail(thumbnail=[], replace=True) assert fe.media.thumbnail() == []
fg.language('de') fe = fg.add_entry() fe.id('http://lernfunk.de/_MEDIAID_123#1') fe.title('First Element') fe.content('''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si ista mala sunt, placet. Aut etiam, ut vestitum, sic sententiam habeas aliam domesticam, aliam forensem, ut in fronte ostentatio sit, intus veritas occultetur? Cum id fugiunt, re eadem defendunt, quae Peripatetici, verba.''') fe.summary('Lorem ipsum dolor sit amet, consectetur adipiscing elit...') fe.link( href='http://example.com', rel='alternate' ) fe.author( name='Lars Kiesow', email='*****@*****.**' ) if arg == 'atom': print_enc (fg.atom_str(pretty=True)) elif arg == 'rss': print_enc (fg.rss_str(pretty=True)) elif arg == 'podcast': # Load the podcast extension. It will automatically be loaded for all # entries in the feed, too. Thus also for our “fe”. fg.load_extension('podcast') fg.podcast.itunes_author('Lars Kiesow') fg.podcast.itunes_category('Technology', 'Podcasting') fg.podcast.itunes_explicit('no') fg.podcast.itunes_complete('no') fg.podcast.itunes_new_feed_url('http://example.com/new-feed.rss') fg.podcast.itunes_owner('John Doe', '*****@*****.**') fg.podcast.itunes_summary('Lorem ipsum dolor sit amet, ' + \ 'consectetur adipiscing elit. ' + \ 'Verba tu fingas et ea dicas, quae non sentias?')
def atom(dat, **cfg): "Return XML text of an Atom feed file" fg = FeedGenerator() siteroot = cfg['siteroot'] feed_base_id = cfg['feed_base_id'] feed_path = cfg['feed_path'] if feed_path.startswith('/'): # remove any feed_path = feed_path[1:] # leading slash feedurl = os.path.join(siteroot, feed_path) fg.id(feed_base_id + '/' + feed_path) fg.title(cfg['title']) fg.author(name = cfg['author'], email = cfg['email']) fg.link( href=cfg['siteroot'], rel='alternate') fg.link( href=feedurl, rel='self' ) fg.language('en') logo = cfg.get('feed_logo_url',None) if logo: fg.logo(logo) subtitle = cfg.get('feed_subtitle',None) or cfg.get('subtitle',None) if subtitle: fg.subtitle(subtitle) content_type = cfg.get('feed_content_type', 'link') for org,meta in zip(dat['org'], dat['meta']): fe = fg.add_entry() fe.id(feed_base_id + '/' + org['path']) title = meta.get('title') if not title: raise RuntimeError,'Org file with no title "%s"' % (os.path.join(org['path'],org['name'])) fe.title(title) description = meta.get('description',None) if description: fe.summary(description) category = meta.get('category',None) if category: fe.category(term=category) created = meta.get('timestamp',None) if created: #print 'CREATED TIMESTAMP:',created fe.pubdate(created) else: #print 'COMMITTED TIMESTAMP:',created created = meta['created'] fe.pubdate(created.isoformat()+'-00:00') revised = meta['revised'] fe.updated(revised.isoformat()+'-00:00') kwds = dict(cfg) kwds.update(org) kwds.update(meta) link = cfg['feed_content_link'].format(**kwds) fe.link(href= link, rel="alternate") if content_type == 'link': fe.content(src = link) if content_type in ['text','org']: fe.content(type = "text", content = org['text']) if content_type in ['plain','ascii']: fe.content(type = "text", content = org['plain']) if content_type in ['html','body']: fe.content(type = "html", content = org['body']) xml = fg.atom_str(pretty=True) return xml
def _build_feed(id, lang, url, return_type=None): fg = None fg = FeedGenerator() fg.id(url) fg.link( href=url, rel='self' ) req = urllib2.Request('%s://%s:%i%sview/series/%s?with_name=true' % ( app.config['LERNFUNK_CORE_PROTOCOL'], app.config['LERNFUNK_CORE_HOST'], app.config['LERNFUNK_CORE_PORT'], app.config['LERNFUNK_CORE_PATH'], id)) req.add_header('Accept', 'application/json') u = urllib2.urlopen(req) try: series = json.loads(u.read()) finally: u.close() s = series['result']['lf:series'][0] fg.title(s['dc:title']) fg.language(s['dc:language']) for cat in s['dc:subject']: fg.category( term=cat.lower(), label=cat ) fg.description(s['dc:description'] or s['dc:title']) for name in s.get('lf:creator') or ['']: fg.author( name=name ) # Get media req = urllib2.Request('%s://%s:%i%sview/series/%s/media/%s%s' % ( app.config['LERNFUNK_CORE_PROTOCOL'], app.config['LERNFUNK_CORE_HOST'], app.config['LERNFUNK_CORE_PORT'], app.config['LERNFUNK_CORE_PATH'], id, lang or '', '?with_file=1&with_name=1')) req.add_header('Accept', 'application/json') u = urllib2.urlopen(req) try: media = json.loads(u.read()) finally: u.close() # Add media to feed for media in media['result']['lf:media']: fe = fg.add_entry() fe.id('%s/%s/%s' % (url, media['dc:identifier'], media['lf:version'])) fe.title(media['dc:title']) for name in media.get('lf:creator') or ['']: fe.author( name=name ) fg.contributor( name=name ) for name in media.get('lf:contributor') or []: fe.contributor( name=name ) fg.contributor( name=name ) fe.content(media['dc:description']) is_av = lambda x: x.startswith('video') or x.startswith('audio') for file in media['lf:file']: fe.link( href=file['lf:uri'], rel=( 'enclosure' if is_av(file['dc:format']) else 'alternate' ), type=file['dc:format'] ) fe.published(media['dc:date'] + ' +0') rssfeed = fg.rss_str(pretty=False) atomfeed = fg.atom_str(pretty=False) # Podcast specific values fg.load_extension('podcast') podcast = fg.rss_str(pretty=False) r_server = get_redis() r_server.set('%slast_update_%s_%s' % (REDIS_NS, id, lang), int(time.time())) r_server.set('%srss_%s_%s' % (REDIS_NS, id, lang), rssfeed) r_server.set('%satom_%s_%s' % (REDIS_NS, id, lang), atomfeed) r_server.set('%spodcast_%s_%s' % (REDIS_NS, id, lang), podcast) if return_type == 'rss': return rssfeed if return_type == 'atom': return atomfeed if return_type == 'podcast': return podcast
# along with PyCeed. If not, see <http://www.gnu.org/licenses/>. import feedparser from feedgen.feed import FeedGenerator out_feed = FeedGenerator() url = 'http://www.courantpositif.fr/feed/' d = feedparser.parse(url) print("~~ %s ~~" % d.feed.title) out_feed.title(d.feed.title) out_feed.subtitle(d.feed.subtitle) out_feed.id(d.feed.get("id", "no id")) out_feed.updated(d.feed.updated) for e in d.entries: print(" * [%s] %s" % (e.published, e.title)) out_entry = out_feed.add_entry() out_entry.title(e.title) out_entry.published(e.published) out_entry.updated(e.updated) out_entry.id(e.id) out_entry.summary(e.summary) for c in e.content: out_entry.content(content=c.value, type=c.type) #, src=c.base for l in e.links: print(" > [%s] %s" % (l.rel, l.href)) out_entry.link(link=l) print("\n\n%s" % out_feed.atom_str(pretty=True))
def main(): if len(sys.argv) != 2 or not ( sys.argv[1].endswith('rss') or sys.argv[1].endswith('atom') or sys.argv[1] == 'torrent' or sys.argv[1] == 'podcast'): print(USAGE) exit() arg = sys.argv[1] fg = FeedGenerator() fg.id('http://lernfunk.de/_MEDIAID_123') fg.title('Testfeed') fg.author({'name': 'Lars Kiesow', 'email': '*****@*****.**'}) fg.link(href='http://example.com', rel='alternate') fg.category(term='test') fg.contributor(name='Lars Kiesow', email='*****@*****.**') fg.contributor(name='John Doe', email='*****@*****.**') fg.icon('http://ex.com/icon.jpg') fg.logo('http://ex.com/logo.jpg') fg.rights('cc-by') fg.subtitle('This is a cool feed!') fg.link(href='http://larskiesow.de/test.atom', rel='self') fg.language('de') fe = fg.add_entry() fe.id('http://lernfunk.de/_MEDIAID_123#1') fe.title('First Element') fe.content('''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si ista mala sunt, placet. Aut etiam, ut vestitum, sic sententiam habeas aliam domesticam, aliam forensem, ut in fronte ostentatio sit, intus veritas occultetur? Cum id fugiunt, re eadem defendunt, quae Peripatetici, verba.''') fe.summary(u'Lorem ipsum dolor sit amet, consectetur adipiscing elit…') fe.link(href='http://example.com', rel='alternate') fe.author(name='Lars Kiesow', email='*****@*****.**') if arg == 'atom': print_enc(fg.atom_str(pretty=True)) elif arg == 'rss': print_enc(fg.rss_str(pretty=True)) elif arg == 'podcast': # Load the podcast extension. It will automatically be loaded for all # entries in the feed, too. Thus also for our “fe”. fg.load_extension('podcast') fg.podcast.itunes_author('Lars Kiesow') fg.podcast.itunes_category('Technology', 'Podcasting') fg.podcast.itunes_explicit('no') fg.podcast.itunes_complete('no') fg.podcast.itunes_new_feed_url('http://example.com/new-feed.rss') fg.podcast.itunes_owner('John Doe', '*****@*****.**') fg.podcast.itunes_summary('Lorem ipsum dolor sit amet, consectetur ' + 'adipiscing elit. Verba tu fingas et ea ' + 'dicas, quae non sentias?') fe.podcast.itunes_author('Lars Kiesow') print_enc(fg.rss_str(pretty=True)) elif arg == 'torrent': fg.load_extension('torrent') fe.link(href='http://example.com/torrent/debian-8-netint.iso.torrent', rel='alternate', type='application/x-bittorrent, length=1000') fe.torrent.filename('debian-8.4.0-i386-netint.iso.torrent') fe.torrent.infohash('7661229811ef32014879ceedcdf4a48f256c88ba') fe.torrent.contentlength('331350016') fe.torrent.seeds('789') fe.torrent.peers('456') fe.torrent.verified('123') print_enc(fg.rss_str(pretty=True)) elif arg.startswith('dc.'): fg.load_extension('dc') fg.dc.dc_contributor('Lars Kiesow') if arg.endswith('.atom'): print_enc(fg.atom_str(pretty=True)) else: print_enc(fg.rss_str(pretty=True)) elif arg.startswith('syndication'): fg.load_extension('syndication') fg.syndication.update_period('daily') fg.syndication.update_frequency(2) fg.syndication.update_base('2000-01-01T12:00+00:00') if arg.endswith('.rss'): print_enc(fg.rss_str(pretty=True)) else: print_enc(fg.atom_str(pretty=True)) elif arg.endswith('atom'): fg.atom_file(arg) elif arg.endswith('rss'): fg.rss_file(arg)
fg.language('de') fe = fg.add_entry() fe.id('http://lernfunk.de/_MEDIAID_123#1') fe.title('First Element') fe.content('''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si ista mala sunt, placet. Aut etiam, ut vestitum, sic sententiam habeas aliam domesticam, aliam forensem, ut in fronte ostentatio sit, intus veritas occultetur? Cum id fugiunt, re eadem defendunt, quae Peripatetici, verba.''') fe.summary('Lorem ipsum dolor sit amet, consectetur adipiscing elit...') fe.link( href='http://example.com', rel='alternate' ) fe.author( name='Lars Kiesow', email='*****@*****.**' ) if arg == 'atom': print fg.atom_str(pretty=True) elif arg == 'rss': print fg.rss_str(pretty=True) elif arg == 'podcast': # Load the podcast extension. It will automatically be loaded for all # entries in the feed, too. Thus also for our “fe”. fg.load_extension('podcast') fg.podcast.itunes_author('Lars Kiesow') fg.podcast.itunes_category('Technology', 'Podcasting') fg.podcast.itunes_explicit('no') fg.podcast.itunes_complete('no') fg.podcast.itunes_new_feed_url('http://example.com/new-feed.rss') fg.podcast.itunes_owner('John Doe', '*****@*****.**') fg.podcast.itunes_summary('Lorem ipsum dolor sit amet, ' + \ 'consectetur adipiscing elit. ' + \ 'Verba tu fingas et ea dicas, quae non sentias?')
def generate_feeds(chat): print "Processing: %s"%chat['title'] chatid = chat['id'] cur.execute(""" SELECT author, body_xml, chatname, datetime(timestamp, 'unixepoch', 'localtime') as timestamp, datetime(edited_timestamp, 'unixepoch', 'localtime') as edited_timestamp, fullname FROM messages m LEFT OUTER JOIN Contacts c on m.author=c.skypename WHERE datetime(timestamp, 'unixepoch', 'localtime') > date("%s") and chatname="%s" and body_xml not null order by timestamp desc; """%(start_date, chatid)) messages = [] fg = FeedGenerator() fg.id('https://chats.fhir.me/feeds/skype/%s.atom'%chat['slug']) fg.link(href='https://chats.fhir.me/feeds/skype/%s.atom'%chat['slug'], rel='self') fg.link(href='https://chats.fhir.me/feeds/skype/%s.json'%chat['slug'], rel='alternate') fg.link(href='https://chats.fhir.me/feeds/skype/%s.html'%chat['slug'], rel='alternate') fg.link(href='urn:skypechat:%s'%chatid, rel='related') fg.title('FHIR Skype Chat: %s'%chat['title']) fg.author( {'name':'FHIR Core Team','email':'*****@*****.**'} ) fg.language('en') for praw in cur.fetchall(): p = dict(praw) p['timestamp'] = p['timestamp']+'Z' if p['edited_timestamp']: p['edited_timestamp'] = p['edited_timestamp']+'Z' authorname = p['fullname'] if not authorname: authorname = p['author'] m = md5.new() m.update(json.dumps({'author': p['author'], 'timestamp': p['timestamp']})) chathash = m.hexdigest() body = escape(p['body_xml']) body = re.sub("\n", "\n<br/>", body) body = body updated = p['timestamp'] if p['edited_timestamp']: updated = p['edited_timestamp'] messages.append({ 'skypename': p['author'], 'author': authorname, 'timestamp': p['timestamp'], 'updated': updated, 'body': strip_tags(unescape(body)) }) fe = fg.add_entry() fe.id('https://chats.fhir.me/feeds/skype/%s/messages/%s'%(chat['slug'], chathash)) fe.author({'name': authorname, 'uri': 'urn:skypename:%s'%p['author']}) fe.title('Message from %s'%authorname); fe.pubdate(p['timestamp']) fe.updated(updated) fe.content(body, type="html") for d in [["feeds"], ["feeds", "skype"]]: try: os.mkdir(os.path.join(options.output_dir, *d)) except: pass chat_path = os.path.join(options.output_dir,"feeds","skype", chat['slug']) with codecs.open(chat_path+'.atom', "w", "utf-8") as fo: fo.write(fg.atom_str(pretty=True)) with codecs.open(chat_path+'.json', "w", "utf-8") as fo: fo.write(json.dumps(feed_to_json(fg), indent=2)) with codecs.open(chat_path+'.html', "w", "utf-8") as fo: fo.write(page.render({ 'chat_name': chat['title'], 'messages': messages, 'slug': chat['slug'], 'other_chats': chats }))
async def generate_nhk_easy_news_feed( entry_count=ENTRY_COUNT, return_content_only=False, ): feed_items = [] fg = FeedGenerator() fg.id('https://www.reddit.com/r/NHKEasyNews') fg.title('NHK Easy News') fg.language('ja') feed = feedparser.parse( 'https://www.reddit.com/r/NHKEasyNews.rss?limit={}' .format(entry_count)) reddit = praw.Reddit( client_id=settings.REDDIT_CLIENT_ID, client_secret=settings.REDDIT_CLIENT_SECRET, username=settings.REDDIT_CLIENT_USERNAME, password=settings.REDDIT_CLIENT_PASSWORD, user_agent='Manabi Reader', ) entries = [] for post in reversed(feed.entries): if 'discord server' in post.title.lower(): continue reddit_content = post.content[0].value nhk_url_match = re.search( r'(http://www3.nhk.or.jp/news/easy/.*?\.html)', reddit_content) if nhk_url_match is None: continue nhk_url = nhk_url_match.group() for attempt in range(ATTEMPTS_PER_ENTRY): session = AsyncHTMLSession() r = await session.get(nhk_url, timeout=60) await r.html.arender(keep_page=True) try: entry = await _process_and_add_entry( post, nhk_url, r, fg, reddit) except NoArticleBodyError: if attempt < ATTEMPTS_PER_ENTRY - 1: continue raise if entry is not None: entries.append(entry) #r.html.page.close() await session.close() break if entry is None: continue if return_content_only: html = '' for entry in reversed(entries): title = entry.title() content = entry.content()['content'] html += f'<h1>{title}</h1>{content}' return html if fg.entry() == 0: raise Exception("Generated zero feed entries from NHK Easy News.") return fg.atom_str(pretty=True, encoding='utf-8')