コード例 #1
0
def feed():
    # Entries are added backwards
    articles = BlogPost.query.order_by(asc(BlogPost.added)).all()

    fg = FeedGenerator()
    fg.title("Mempool | Satoshi Nakamoto Institute")
    fg.id("https://nakamotoinstitute.org/mempool/feed/")
    fg.updated(date_to_localized_datetime(articles[0].added))
    fg.link(href="https://nakamotoinstitute.org")
    fg.link(href="https://nakamotoinstitute.org/mempool/feed/", rel="self")
    fg.language("en")

    for article in articles:
        url = url_for("mempool.detail", slug=article.slug, _external=True)
        page = pages.get(f"mempool/{article.slug}")

        fe = fg.add_entry()
        fe.id(url)
        fe.title(article.title)
        fe.link(href=url)
        fe.updated(date_to_localized_datetime(article.added))
        fe.published(date_to_localized_datetime(article.date))
        fe.author(name=str(article.author[0]))
        fe.content(page.html)

    response = make_response(fg.atom_str(encoding="utf-8", pretty=True))
    response.headers.set("Content-Type", "application/atom+xml")
    return response
コード例 #2
0
ファイル: gemfeed.py プロジェクト: loopdreams/blogscripts
def build_feed(directory,
               time_func,
               base_url,
               output="atom.xml",
               n=10,
               title="",
               subtitle="",
               author="",
               email="",
               verbose=False):
    """
    Build an Atom feed for all world readable Gemini files in the current
    directory, and write it to atom.xml.
    """
    # If a title hasn't been provided, try to get one from an index page
    if not title:
        title = get_feed_title(directory)

    # Let user know feed title and URL
    feed_url = urljoin(base_url, output)
    if verbose:
        print('Generating feed "{}", which should be served from {}'.format(
            title, feed_url))

    # Setup feed
    feed = FeedGenerator()
    feed.id(base_url)
    feed.title(title)
    if subtitle:
        feed.subtitle(subtitle)
    author_details = {}
    if author:
        author_details["name"] = author
    if email:
        author_details["email"] = email
    if author_details:
        feed.author(author_details)
    feed.link(href=feed_url, rel='self')
    feed.link(href=base_url, rel='alternate')

    # Add one entry per .gmi file
    files = find_files(directory, time_func, n)
    if not files:
        if verbose:
            print("No world-readable Gemini content found! :(")
        return
    for n, filename in enumerate(files):
        entry = feed.add_entry()
        populate_entry_from_file(filename, base_url, entry, time_func)
        if n == 0:
            feed.updated(entry.updated())
        if verbose:
            print("Adding {} with title '{}'...".format(
                os.path.basename(filename), entry.title()))

    # Write file
    output = os.path.join(directory, output)
    feed.atom_file(output, pretty=True)
    if verbose:
        print("Wrote Atom feed to {}.".format(output))
コード例 #3
0
ファイル: blog.py プロジェクト: matrufsc2/matrufsc2
def get_feed(atom=False):
    fg = FeedGenerator()
    domain = get_domain()
    items = get_posts({"limit": "10"}, full=True)["results"]
    fg.id("http://%s/"%domain)
    fg.title("Blog do MatrUFSC2")
    fg.description("Feed do blog do MatrUFSC2, onde noticias e novos recursos sao anunciados primeiro!")
    fg.language('pt-BR')
    fg.link({"href":"/blog/feed","rel":"self"})
    fg.updated(items[0]["posted_at"].replace(tzinfo=pytz.UTC))
    for item in items:
        entry = fg.add_entry()
        entry.title(item["title"])

        tree = html.fromstring(item["summary"])
        cleaner = Cleaner(allow_tags=[])
        tree = cleaner.clean_html(tree)

        text = tree.text_content()
        entry.description(text, True)
        entry.link({"href":item["link"],"rel":"self"})
        entry.content(item["body"])
        entry.published(item["posted_at"].replace(tzinfo=pytz.UTC))
        entry.updated(item["posted_at"].replace(tzinfo=pytz.UTC))
        entry.category({"label": item["category"]["title"], "term": item["category"]["slug"]})
        entry.id(item["id"])
    if atom:
        return fg.atom_str(pretty=True)
    else:
        return fg.rss_str(pretty=True)
コード例 #4
0
ファイル: generate.py プロジェクト: nishad/peterdowns.com
def write_feeds(posts):
    g = FeedGenerator()
    g.id('http://peterdowns.com/blog')
    g.link(href='http://peterdowns.com/blog')
    g.description('incredibly on-brand')
    g.title(u'Peter Downs — Posts')
    first_post = posts[0]
    updated = first_post['updated'] or first_post['date']
    g.updated(_utcstrp(updated, _time_fmt))

    for post in posts:
        e = g.add_entry()
        post_url = os.path.join(_blog_root, post['html_path'])
        e.id(post_url)
        e.link(href=post_url, rel='alternate')
        e.title(post['title'])
        e.author(name=post['author'][0])
        e.published(_utcstrp(post['date'], _time_fmt))
        if post['updated'] is None:
            e.updated(e.published())
        else:
            e.updated(_utcstrp(post['updated'], _time_fmt))

    g.atom_file('{}/atom.xml'.format(_feeds_dir))
    g.rss_file('{}/rss.xml'.format(_feeds_dir))
コード例 #5
0
def build_xml_feed(allchapters, verbose=True):

    if verbose:
        print
        print "Generating feeds..."

    if len(allchapters) == 0: raise CRMangaFeedException("Empty chapter list")

    crtz = pytz.timezone('America/New_York')

    fg = FeedGenerator()
    fg.id('http://utils.senpai.moe/')
    fg.title('Crunchyroll Manga - Latest Chapters (Unofficial)')
    fg.author({'name': 'Nosgoroth', 'email': '*****@*****.**'})
    fg.link(href='http://utils.senpai.moe/')
    fg.subtitle(
        'Latest manga chapters, updated daily, using undocumented API.')
    fg.language('en')
    fg.ttl(15)

    allchapters = sorted(allchapters,
                         key=itemgetter('updated_t'),
                         reverse=True)

    first = allchapters[0]["updated_t"].replace(tzinfo=crtz)
    fg.updated(first)
    fg.lastBuildDate(first)

    for chapter in allchapters[0:100]:
        fe = fg.add_entry()
        fe.id(chapter["url"])
        fe.link({
            "href": chapter["url"],
            "rel": "alternate",
            "title": "Read online"
        })
        fe.title("%s - %s" % (chapter["series"], chapter["name"]))
        fe.summary("<p>%s has been added to %s in Crunchyroll Manga.</p>" %
                   (chapter["name"], chapter["series"]))
        fe.published(chapter["updated_t"].replace(tzinfo=crtz))

        chapter_serial = chapter.copy()
        chapter_serial.pop("updated_t", None)
        chapter_serial.pop("url", None)
        chapter_serial.pop("thumb", None)
        chapter_serial["chapter_id"] = chapter_serial["guid"]
        chapter_serial.pop("guid", None)

        content = "<p>%s has been added to %s in Crunchyroll Manga.</p><p>Updated: %s</p><img src=\"%s\" />" % (
            chapter["name"], chapter["series"], chapter["updated"],
            chapter["thumb"])
        content += "<!--JSON:[[%s]]-->" % json.dumps(chapter_serial)
        fe.content(content)

    fg.rss_file(os.path.join(DESTINATION_FOLDER, 'updates_rss.xml'),
                pretty=DEBUG)  # Write the RSS feed to a file
    fg.atom_file(os.path.join(DESTINATION_FOLDER, 'updates_atom.xml'),
                 pretty=DEBUG)  # Write the ATOM feed to a file
コード例 #6
0
ファイル: github-rss.py プロジェクト: billryan/github-rss
 def init_fg(self, repo_info):
     fg = FeedGenerator()
     title = 'Recent commits to ' + repo_info['full_name']
     fg.title(title)
     fg.link(href=repo_info['html_url'])
     fg.updated(repo_info['updated_at'])
     fg.id(repo_info['html_url'])
     fg.author(repo_info['author'])
     return fg
コード例 #7
0
ファイル: atom.py プロジェクト: arXiv/arxiv-search
    def _get_feed(cls,
                  query: Optional[ClassicAPIQuery] = None) -> FeedGenerator:
        fg = FeedGenerator()
        fg.generator("")
        fg.register_extension("opensearch", OpenSearchExtension)
        fg.register_extension("arxiv",
                              ArXivExtension,
                              ArXivEntryExtension,
                              rss=False)

        if query:
            if query.phrase is not None:
                query_string = phrase_to_query_string(query.phrase)
            else:
                query_string = ""

            if query.id_list:
                id_list = ",".join(query.id_list)
            else:
                id_list = ""

            fg.title(f"arXiv Query: {query.to_query_string()}")

            # From perl documentation of the old site:
            # search_id is calculated by taking SHA-1 digest of the query
            # string. Digest is in bytes form and it's 20 bytes long. Then it's
            # base64 encoded, but perls version returns only 27 characters -
            # it omits the `=` sign at the end.
            search_id = base64.b64encode(
                hashlib.sha1(query.to_query_string().encode(
                    "utf-8")).digest()).decode("utf-8")[:-1]
            fg.id(
                cls._fix_url(
                    url_for("classic_api.query").replace(
                        "/query", f"/{search_id}")))

            fg.link({
                "href":
                cls._fix_url(
                    url_for(
                        "classic_api.query",
                        search_query=query_string,
                        start=query.page_start,
                        max_results=query.size,
                        id_list=id_list,
                    )),
                "type":
                "application/atom+xml",
            })
        else:
            # TODO: Discuss better defaults
            fg.title("arXiv Search Results")
            fg.id("https://arxiv.org/")

        fg.updated(to_utc(datetime.utcnow()))
        return fg
コード例 #8
0
class Box:
    def __init__(self, config=None):
        if config is None:
            config = {}
        self.fg = FeedGenerator()
        self.fg.title(config['title'] if 'title' in config else 'PyFeed')
        self.fg.description(config['description'] if 'description' in config else 'github.com/BugenZhao/PyFeed')
        self.fg.author({'name': 'Bugen Zhao', 'email': '*****@*****.**'})
        self.fg.link(href=config['url'] if 'url' in config else 'github.com/BugenZhao/PyFeed')
        self.fg.language('zh-CN')

        self.dict = {}
        self.version = 'v0.3'

    def add_article(self, article: Article):
        fe = self.fg.add_entry()
        fe.id(article.link)
        fe.link(href=article.link)
        fe.title(article.title)
        fe.description(article.description)
        fe.pubDate(article.date.replace(tzinfo=gettz("Asia/Shanghai")))

        self.fg.updated()

    def update_article(self, article: Article):
        fe = FeedEntry()
        fe.id(article.link)
        fe.link(href=article.link)
        fe.title(article.title)
        fe.description(article.description)
        fe.pubDate(article.date.replace(tzinfo=gettz("Asia/Shanghai")))
        self.fg.entry(fe, replace=True)

        self.fg.updated()

    def article(self, article: Article):
        if article.link not in self.dict:
            logging.info('New article: ' + article.title)
            self.add_article(article)
            self.dict[article.link] = article
        elif article != self.dict[article.link]:
            logging.info('Update article: ' + article.title)
            self.update_article(article)
            self.dict[article.link] = article
        else:
            logging.info('Article already existed: ' + article.title)

    def rss_file(self, filename):
        self.fg.rss_file(filename, pretty=True)

    def size(self):
        return len(self.dict)
コード例 #9
0
def news_feed():
    feed = FeedGenerator()
    feed.id("https://jazzband.co/news/feed")
    feed.link(href="https://jazzband.co/", rel="alternate")
    feed.title("Jazzband News Feed")
    feed.subtitle("We are all part of this.")
    feed.link(href=full_url(request.url), rel="self")

    # the list of updates of all news for setting the feed's updated value
    updates = []

    for page in news_pages:
        if page.path == "index":
            continue

        # make the datetime timezone aware if needed
        published = page.meta.get("published", None)
        if published and published.tzinfo is None:
            published = pytz.utc.localize(published)
        updated = page.meta.get("updated", published)
        if updated:
            if updated.tzinfo is None:
                updated = pytz.utc.localize(updated)
            updates.append(updated)

        summary = page.meta.get("summary", None)
        author = page.meta.get("author", None)
        author_link = page.meta.get("author_link", None)
        url = full_url(url_for("content.news", path=page.path))

        entry = feed.add_entry()
        entry.id(url)
        entry.title(page.meta["title"])
        entry.summary(summary)
        entry.content(content=str(page.html), type="html")

        if author is not None:
            author = {"name": author}
            if author_link is not None:
                author["uri"] = author_link
            entry.author(author)

        entry.link(href=url)
        entry.updated(updated)
        entry.published(published)

    sorted_updates = sorted(updates)
    feed.updated(sorted_updates and sorted_updates[-1] or datetime.utcnow())

    return Response(feed.atom_str(pretty=True),
                    mimetype="application/atom+xml")
コード例 #10
0
ファイル: builder.py プロジェクト: brandon-rhodes/blog
def render_feed(text_paths, outpath):
    # http://rhodesmill.org/brandon/feed
    # http://rhodesmill.org/brandon/category/python/feed
    # http://rhodesmill.org/brandon/feed/atom/

    t0 = datetime.min.time()

    def fix(d):
        dt = datetime.combine(d, t0)
        return timezone('US/Eastern').localize(dt)

    posts = [post_info(path) for path in text_paths if date_of(path)]
    posts = sorted(posts, key=lambda post: post['date'])
    posts = posts[-1:]
    most_recent_date = max(post['date'] for post in posts)

    def full(url):
        return 'http://rhodesmill.org/' + url.lstrip('/')

    fg = FeedGenerator()
    fg.id(full('/'))
    fg.author({'name': 'Brandon Rhodes'})
    fg.language('en')
    fg.link(href=full('/brandon/'), rel='alternate')
    if 'python' in outpath:
        fg.link(href=full('/brandon/category/python/feed/'), rel='self')
    else:
        fg.link(href=full('/brandon/feed/'), rel='self')
    fg.subtitle('Thoughts and ideas from Brandon Rhodes')
    fg.title("Let's Discuss the Matter Further")
    fg.updated(fix(most_recent_date))

    for post in posts:
        url = full(post['url_path'])
        excerpt = truncate_at_more(post['body_html'], url)

        fe = fg.add_entry()
        fe.content(excerpt, type='html')
        fe.guid(url, permalink=True)
        fe.id(url)
        fe.link({'href': url})
        fe.published(fix(post['date']))
        fe.title(post['title'])
        fe.updated(fix(post['date']))

    rss = fg.rss_str(pretty=True)
    fg.link(href=full('/brandon/feed/atom/'), rel='self', replace=True)
    atom = fg.atom_str(pretty=True)

    return rss, atom
コード例 #11
0
ファイル: feed.py プロジェクト: vanaoff/rssfeed
 def __init__(
     self,
     id_: str,
     links: List[dict],
     title: str,
     updated: Optional[str],
 ):
     fg = FeedGenerator()
     fg.id(id_)
     fg.link(links)
     fg.title(title)
     fg.updated(updated)
     fg.generator("")
     self._feed_generator = fg
コード例 #12
0
def render_feed(text_paths, outpath):
    # http://rhodesmill.org/brandon/feed
    # http://rhodesmill.org/brandon/category/python/feed
    # http://rhodesmill.org/brandon/feed/atom/

    t0 = datetime.min.time()

    def fix(d):
        dt = datetime.combine(d, t0)
        return timezone('US/Eastern').localize(dt)

    posts = [post_info(path) for path in text_paths if date_of(path)]
    posts = sorted(posts, key=lambda post: post['date'])
    posts = posts[-1:]
    most_recent_date = max(post['date'] for post in posts)

    def full(url):
        return 'http://rhodesmill.org/' + url.lstrip('/')

    fg = FeedGenerator()
    fg.id(full('/'))
    fg.author({'name': 'Brandon Rhodes'})
    fg.language('en')
    fg.link(href=full('/brandon/'), rel='alternate')
    if 'python' in outpath:
        fg.link(href=full('/brandon/category/python/feed/'), rel='self')
    else:
        fg.link(href=full('/brandon/feed/'), rel='self')
    fg.subtitle('Thoughts and ideas from Brandon Rhodes')
    fg.title("Let's Discuss the Matter Further")
    fg.updated(fix(most_recent_date))

    for post in posts:
        url = full(post['url_path'])
        excerpt = truncate_at_more(post['body_html'], url)

        fe = fg.add_entry()
        fe.content(excerpt, type='html')
        fe.guid(url, permalink=True)
        fe.id(url)
        fe.link({'href': url})
        fe.published(fix(post['date']))
        fe.title(post['title'])
        fe.updated(fix(post['date']))

    rss = fg.rss_str(pretty=True)
    fg.link(href=full('/brandon/feed/atom/'), rel='self', replace=True)
    atom = fg.atom_str(pretty=True)

    return rss, atom
コード例 #13
0
def build():
    feed = FeedGenerator()
    call_dict_as_methods(feed, FEED_SETTINGS)

    paths = sorted(glob("editions/*.md"), reverse=True)[:10]

    for i, path in enumerate(paths):
        entry = feed.add_entry(order="append")
        data = create_item(path)
        call_dict_as_methods(entry, data)
        if i == 0:
            feed.updated(data["updated"])

    return feed
コード例 #14
0
def saveFeed(listings, title, path):

    url = githubRepoURL + title + ".xml"

    # Create a feed generator
    fg = FeedGenerator()

    # Create the feed's title
    fg.id(url)
    fg.title(title)
    fg.author({'name': 'Ben Snell'})
    fg.description("NYC 2BR Apartment Listings in " + title)
    fg.link(href=url, rel='alternate')
    fg.language('en')
    time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + "-05:00"
    fg.pubDate(time)
    fg.updated(time)

    for apt in listings:

        e = fg.add_entry()

        e.id(apt[0])
        e.title("$" + apt[1] + "  //  " + apt[4])
        e.link(href=apt[0])

        text = ""
        if apt[5] != "":
            imgs = apt[5].split(" ")
            for i in range(len(imgs)):
                text += "<img src=\"" + imgs[i] + "\" /> "
                if i == 0:
                    text += "<p>" + apt[8] + "</p>"
        else:
            text += "<p>" + apt[8] + "</p>"
        e.content(type="html", content=text)

        # This doesn't seem to work:
        e.pubDate(datetime2RSSString(clDate(apt[2])))
        e.updated(datetime2RSSString(clDate(apt[2])))

    fg.atom_str(pretty=True)
    fg.atom_file(path)
コード例 #15
0
ファイル: get-open-calls.py プロジェクト: bensnell/art-opp
def saveFeed(listings, title, path):

    url = githubRepoURL + title + ".xml"

    # Create a feed generator
    fg = FeedGenerator()

    # Create the feed's title
    fg.id(url)
    fg.title(title)
    fg.author({'name': 'Ben Snell'})
    fg.description("Art Show Open Call Opportunities")
    fg.link(href=url, rel='alternate')
    fg.language('en')
    time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + "-05:00"
    fg.pubDate(time)
    fg.updated(time)

    for item in listings:

        e = fg.add_entry()

        e.id(item["ID"])
        # Get a clearer title
        thisTitle = getShortDate(item["Application Deadline"]) + item["Title"]
        e.title(thisTitle)
        # for key, value in item.items():
        # print(key, value);
        # print(item["url"])
        # if "url" in item:
        e.link(href=item["url"])

        text = getHtmlFormattedListing(item)
        e.content(type="html", content=text)

        # This doesn't seem to work:
        # e.pubDate( datetime2RSSString(clDate(apt[2])) )
        # e.updated( datetime2RSSString(clDate(apt[2])) )

    fg.atom_str(pretty=True)
    fg.atom_file(path)
コード例 #16
0
ファイル: build.py プロジェクト: karlicoss/beepb00p
def feed(posts: Tuple[Post], kind: str) -> FeedGenerator:
    log.debug('generating %s feed', kind)
    fg = FeedGenerator()
    fg.title('beepb00p')
    fg.author(name='karlicoss', email='*****@*****.**')
    # TODO better description?
    fg.description('feed')

    bb = lambda x: f'https://beepb00p.xyz{x}'
    fg.id(bb(f'/{kind}.xml'))
    fg.link(rel='self', href=bb(f'/{kind}.xml'))
    fg.link(href=bb(''))
    if len(posts) > 0:
        dates = (p.date for p in posts)
        fg.updated(
            max(tz.localize(d) if d is not None else throw() for d in dates))

    # eh, apparnetly in adds items to the feed from bottom to top...
    for post in reversed(posts):
        fe = fg.add_entry()
        # not sure why id() doesn't allow to set permalink=True
        fe.guid(bb(post.url), permalink=True)
        fe.link(href=bb(post.url))
        fe.title(post.title)
        # TOOD FIXME meh.
        d = post.date
        assert d is not None
        td = tz.localize(d)
        fe.published(td)
        fe.updated(td)
        # TODO meh, later use proper update date...
        #
        # TODO use type=text/html for comparisons?
        # TODO meh
        if post.upid == 'infra_diagram':
            content = "Sorry, this post contains a huge diagram and isn't RSS friendly. It's best viewed on the website"
        else:
            content = post.body
        fe.content(content, type='html')
    return fg
コード例 #17
0
ファイル: app.py プロジェクト: DavisGoglin/python-web-feed
def display_feed(feed_name):
    if feed_name not in config['feeds']:
        abort(404)

    f = Feed(config['feeds'][feed_name])
    f.load()
    f.parse()

    fg = FeedGenerator()

    fg.generator(**_generator)
    fg.id(request.base_url)
    fg.link(
        href=request.base_url,
        rel='self',
    )
    fg.title(f.properties.get('title', feed_name))
    fg.author(name=f.properties.get('author', ''))
    fg.updated(
        timezone.localize(
            _round_date(max([e['updated'] for e in f.entries]),
                        config.get('date_rounding', None))))

    for entry in f.entries:
        fe = fg.add_entry()
        fe.id(entry['url'])
        fe.title(entry['title'])
        fe.link(href=entry['url'])
        fe.updated(
            timezone.localize(
                _round_date(entry['updated'],
                            config.get('date_rounding', None))))
        fe.content(entry['content'])
    atomfeed = fg.atom_str()

    resp = make_response(atomfeed)
    resp.headers['content-type'] = 'application/xml'
    return resp
コード例 #18
0
ファイル: __main__.py プロジェクト: jml/notebook
def generate_feed(posts):
    fg = FeedGenerator()
    fg.id('%s/' % SITE_URL)
    fg.title("jml's notebook")
    fg.author({'name': 'Jonathan M. Lange', 'email': '*****@*****.**'})
    fg.link(href=SITE_URL, rel='alternate')
    fg.link(href='%s/feed.xml' % (SITE_URL, ), rel='self')
    fg.language('en')

    dates = []

    for post in reversed(posts):
        fe = fg.add_entry()
        fe.id(SITE_URL + post.url)
        fe.link(href=SITE_URL + post.url)
        fe.title(post.title or post.name)
        fe.content(post.body)
        updated = subprocess.check_output([
            "git",
            "log",
            "-1",
            '--date=iso8601',
            '--format="%ad"',
            "--",
            post.original_file,
        ]).decode('ascii').strip().strip('"')
        if updated:
            updated = dateutil.parser.parse(updated)
        else:
            updated = datetime.strptime(post.name.replace(
                '.html', ''), POST_DATE_FORMAT).replace(tzinfo=tz.gettz())
        dates.append(updated)
        fe.updated(updated)

    fg.updated(max(dates))

    fg.atom_file(os.path.join(HTML_ROOT, 'feed.xml'), pretty=True)
コード例 #19
0
def newsfeed():
    def makedate(strdate):
        dt = datetime.datetime.strptime(strdate, '%Y-%m-%d %H:%M')
        return datetime.datetime(
            year=dt.year,
            month=dt.month,
            day=dt.day,
            hour=dt.hour,
            minute=dt.minute,
            tzinfo=datetime.timezone.utc,
        )

    news = load_news()
    feed = FeedGenerator()
    feed.icon(make_external('static/favicon.ico'))
    feed.id(request.url)
    feed.language('en-US')
    feed.link(href=make_external('news'))
    feed.link(href=request.url, rel='self')
    feed.title('The State of Taisei')
    feed.updated(makedate(news[0][0]))

    for article in news:
        date = makedate(article[0])
        url = make_external("/news/" + article[3])

        entry = feed.add_entry()
        entry.author(name='Taisei team')
        entry.content(article[2], type='html')
        entry.id(url)
        entry.link(href=make_external("/news/" + article[3]))
        entry.published(date)
        entry.title(article[1])
        entry.updated(date)

    return Response(feed.atom_str(), mimetype='text/xml')
コード例 #20
0
from datetime import datetime

import pytz
from algoliasearch.search_client import SearchClient
from feedgen.feed import FeedGenerator

if __name__ == '__main__':
    client = SearchClient.create(os.environ['APP_ID'], os.environ['API_KEY'])
    index = client.init_index('interviews_publishedAt_desc')
    articles = index.search('')['hits']

    fg = FeedGenerator()
    fg.title('IH Interviews')
    fg.id('ih-interviews-20201123-205642')
    pubs = []
    for article in articles:
        pub = datetime.fromtimestamp(article['publishedAt'] /
                                     1000).replace(tzinfo=pytz.timezone('UTC'))
        pubs.append(pub)
        fe = fg.add_entry()
        fe.id(article['interviewId'])
        fe.published(pub)
        fe.pubDate(pub)
        fe.updated(pub)
        fe.title(article['title'])
        fe.link(
            href=
            f"https://www.indiehackers.com/interview/{article['interviewId']}")
    fg.updated(max(pubs))
    print(fg.atom_str(pretty=True).decode())
コード例 #21
0
async def playlist(request, playlist_id, return_type='video'):
    log.info(f'Playlist: {playlist_id}')
    playlist_name = f'{playlist_id}/{return_type}'
    if playlist_name in playlist_feed and playlist_feed[playlist_name][
            'expire'] > datetime.now():
        return raw(playlist_feed[playlist_name]['feed'],
                   content_type='application/rss+xml')
    calls = 0
    payload = {'part': 'snippet', 'id': playlist_id, 'key': KEY}
    log.debug('Downloaded Playlist Information')
    response = json.loads(await get(
        'https://www.googleapis.com/youtube/v3/playlists', params=payload))
    calls += 1
    fg = FeedGenerator()
    fg.load_extension('podcast')
    fg.generator('PodTube', __version__,
                 'https://github.com/aquacash5/PodTube')
    snippet = response['items'][0]['snippet']
    icon = max(snippet['thumbnails'],
               key=lambda x: snippet['thumbnails'][x]['width'])
    fg.title(snippet['title'])
    fg.id(f'http://{request.headers["host"]}{request.url}')
    fg.description(snippet['description'] or ' ')
    fg.author(name=snippet['channelTitle'])
    fg.image(snippet['thumbnails'][icon]['url'])
    fg.link(href=f'https://www.youtube.com/playlist?list={playlist_id}')
    fg.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
    fg.podcast.itunes_summary(snippet['description'])
    fg.podcast.itunes_category('Technology', 'Podcasting')
    fg.updated(f'{str(datetime.utcnow())}Z')
    response = {'nextPageToken': ''}
    while 'nextPageToken' in response.keys():
        payload = {
            'part': 'snippet',
            'maxResults': 50,
            'playlistId': playlist_id,
            'key': KEY,
            'pageToken': response['nextPageToken']
        }
        response = json.loads(await get(
            'https://www.googleapis.com/youtube/v3/playlistItems',
            params=payload))
        calls += 1
        for item in response['items']:
            snippet = item['snippet']
            current_video = snippet['resourceId']['videoId']
            if 'Private' in snippet['title']:
                continue
            log.debug(f'PlaylistVideo: {current_video} {snippet["title"]}')
            fe = fg.add_entry()
            fe.title(snippet['title'])
            fe.id(current_video)
            icon = max(snippet['thumbnails'],
                       key=lambda x: snippet['thumbnails'][x]['width'])
            fe.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
            fe.updated(snippet['publishedAt'])
            if return_type == 'audio':
                fe.enclosure(
                    url=
                    f'http://{request.headers["host"]}/audio/{current_video}',
                    type="audio/mpeg")
            else:
                fe.enclosure(
                    url=
                    f'http://{request.headers["host"]}/video/{current_video}',
                    type="video/mp4")
            fe.author(name=snippet['channelTitle'])
            fe.podcast.itunes_author(snippet['channelTitle'])
            fe.podcast.itunes_author(snippet['channelTitle'])
            fe.pubdate(snippet['publishedAt'])
            fe.link(href='http://www.youtube.com/watch?v=' + current_video,
                    title=snippet['title'])
            fe.podcast.itunes_summary(snippet['description'])
            fe.description(snippet['description'])
            await sleep(0)
    feed = {
        'feed': fg.rss_str(),
        'expire': datetime.now() + timedelta(hours=calls)
    }
    playlist_feed[playlist_name] = feed
    return raw(feed['feed'], content_type='application/rss+xml')
コード例 #22
0
ファイル: podtube.py プロジェクト: kaesi0/PodTube
 def get(self, playlist):
     playlist = playlist.split('/')
     if len(playlist) < 2:
         playlist.append('video')
     playlist_name = '/'.join(playlist)
     self.set_header('Content-type', 'application/rss+xml')
     if playlist_name in playlist_feed and playlist_feed[playlist_name][
             'expire'] > datetime.datetime.now():
         self.write(playlist_feed[playlist_name]['feed'])
         self.finish()
         return
     calls = 0
     payload = {'part': 'snippet', 'id': playlist[0], 'key': key}
     request = requests.get(
         'https://www.googleapis.com/youtube/v3/playlists', params=payload)
     calls += 1
     response = request.json()
     if request.status_code == 200:
         logging.debug('Downloaded Playlist Information')
     else:
         logging.error('Error Downloading Playlist: %s', request.reason)
         self.send_error(reason='Error Downloading Playlist')
         return
     fg = FeedGenerator()
     fg.load_extension('podcast')
     fg.generator('PodTube (python-feedgen)', __version__,
                  'https://github.com/aquacash5/PodTube')
     snippet = response['items'][0]['snippet']
     icon = max(snippet['thumbnails'],
                key=lambda x: snippet['thumbnails'][x]['width'])
     logging.info('Playlist: %s (%s)', playlist[0], snippet['title'])
     fg.title(snippet['title'])
     fg.id('http://' + self.request.host + self.request.uri)
     fg.description(snippet['description'] or ' ')
     fg.author(name='Podtube',
               email='*****@*****.**',
               uri='https://github.com/aquacash5/PodTube')
     fg.podcast.itunes_author(snippet['channelTitle'])
     fg.image(snippet['thumbnails'][icon]['url'])
     fg.link(href=f'http://youtube.com/playlist/?list={playlist}',
             rel='self')
     fg.language('en-US')
     fg.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
     fg.podcast.itunes_explicit('no')
     fg.podcast.itunes_owner(name='Podtube',
                             email='*****@*****.**')
     fg.podcast.itunes_summary(snippet['description'])
     fg.podcast.itunes_category(cat='Technology')
     fg.updated(str(datetime.datetime.utcnow()) + 'Z')
     video = None
     response = {'nextPageToken': ''}
     while 'nextPageToken' in response.keys():
         payload = {
             'part': 'snippet',
             'maxResults': 50,
             'playlistId': playlist[0],
             'key': key,
             'pageToken': response['nextPageToken']
         }
         request = requests.get(
             'https://www.googleapis.com/youtube/v3/playlistItems',
             params=payload)
         calls += 1
         response = request.json()
         if request.status_code == 200:
             logging.debug('Downloaded Playlist Information')
         else:
             logging.error('Error Downloading Playlist: %s', request.reason)
             self.send_error(reason='Error Downloading Playlist Items')
             return
         for item in response['items']:
             snippet = item['snippet']
             current_video = snippet['resourceId']['videoId']
             if 'Private' in snippet['title']:
                 continue
             logging.debug('PlaylistVideo: %s (%s)', current_video,
                           snippet['title'])
             fe = fg.add_entry()
             fe.title(snippet['title'])
             fe.id(current_video)
             icon = max(snippet['thumbnails'],
                        key=lambda x: snippet['thumbnails'][x]['width'])
             fe.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
             fe.updated(snippet['publishedAt'])
             if playlist[1] == 'video':
                 fe.enclosure(
                     url=f'http://{self.request.host}/video/{current_video}',
                     type="video/mp4")
             elif playlist[1] == 'audio':
                 fe.enclosure(
                     url=f'http://{self.request.host}/audio/{current_video}',
                     type="audio/mpeg")
             fe.author(name=snippet['channelTitle'])
             fe.podcast.itunes_author(snippet['channelTitle'])
             fe.pubDate(snippet['publishedAt'])
             fe.link(href=f'http://www.youtube.com/watch?v={current_video}',
                     title=snippet['title'])
             fe.podcast.itunes_summary(snippet['description'])
             fe.description(snippet['description'])
             if not video or video['expire'] < fe.pubDate():
                 video = {'video': fe.id(), 'expire': fe.pubDate()}
     feed = {
         'feed': fg.rss_str(),
         'expire': datetime.datetime.now() + datetime.timedelta(hours=calls)
     }
     playlist_feed[playlist_name] = feed
     self.write(feed['feed'])
     self.finish()
     video = video['video']
     mp3_file = 'audio/{}.mp3'.format(video)
     if playlist[1] == 'audio' and not os.path.exists(
             mp3_file) and video not in conversion_queue.keys():
         conversion_queue[video] = {
             'status': False,
             'added': datetime.datetime.now()
         }
コード例 #23
0
ファイル: podtube.py プロジェクト: kaesi0/PodTube
 def get(self, channel):
     channel = channel.split('/')
     if len(channel) < 2:
         channel.append('video')
     channel_name = ['/'.join(channel)]
     self.set_header('Content-type', 'application/rss+xml')
     if channel_name[0] in channel_feed and channel_feed[
             channel_name[0]]['expire'] > datetime.datetime.now():
         self.write(channel_feed[channel_name[0]]['feed'])
         self.finish()
         return
     fg = None
     video = None
     calls = 0
     response = {'nextPageToken': ''}
     while 'nextPageToken' in response.keys():
         next_page = response['nextPageToken']
         payload = {
             'part': 'snippet,contentDetails',
             'maxResults': 50,
             'channelId': channel[0],
             'key': key,
             'pageToken': next_page
         }
         request = requests.get(
             'https://www.googleapis.com/youtube/v3/activities',
             params=payload)
         calls += 1
         if request.status_code != 200:
             payload = {
                 'part': 'snippet',
                 'maxResults': 1,
                 'forUsername': channel[0],
                 'key': key
             }
             request = requests.get(
                 'https://www.googleapis.com/youtube/v3/channels',
                 params=payload)
             response = request.json()
             channel[0] = response['items'][0]['id']
             channel_name.append('/'.join(channel))
             payload = {
                 'part': 'snippet,contentDetails',
                 'maxResults': 50,
                 'channelId': channel[0],
                 'key': key,
                 'pageToken': next_page
             }
             request = requests.get(
                 'https://www.googleapis.com/youtube/v3/activities',
                 params=payload)
             calls += 2
         response = request.json()
         if request.status_code == 200:
             logging.debug('Downloaded Channel Information')
         else:
             logging.error('Error Downloading Channel: %s', request.reason)
             self.send_error(reason='Error Downloading Channel')
             return
         if not fg:
             fg = FeedGenerator()
             fg.load_extension('podcast')
             fg.generator('PodTube (python-feedgen)', __version__,
                          'https://github.com/aquacash5/PodTube')
             for item in response['items']:
                 if item['snippet']['type'] != 'upload':
                     continue
                 elif 'Private' in item['snippet']['title']:
                     continue
                 else:
                     snippet = item['snippet']
                     break
             logging.info('Channel: %s (%s)', channel[0],
                          snippet['channelTitle'])
             icon = max(snippet['thumbnails'],
                        key=lambda x: snippet['thumbnails'][x]['width'])
             fg.title(snippet['channelTitle'])
             fg.id('http://' + self.request.host + self.request.uri)
             fg.description(snippet['description'] or ' ')
             fg.author(name='Podtube',
                       email='*****@*****.**',
                       uri='https://github.com/aquacash5/PodTube')
             fg.podcast.itunes_author(snippet['channelTitle'])
             fg.image(snippet['thumbnails'][icon]['url'])
             fg.link(href=f'http://youtube.com/channel/{channel}',
                     rel='self')
             fg.language('en-US')
             fg.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
             fg.podcast.itunes_explicit('no')
             fg.podcast.itunes_owner(name='Podtube',
                                     email='*****@*****.**')
             fg.podcast.itunes_summary(snippet['description'])
             fg.podcast.itunes_category(cat='Technology')
             fg.updated(str(datetime.datetime.utcnow()) + 'Z')
         for item in response['items']:
             snippet = item['snippet']
             if snippet['type'] != 'upload':
                 continue
             if 'private' in snippet['title'].lower():
                 continue
             current_video = item['contentDetails']['upload']['videoId']
             logging.debug('ChannelVideo: %s (%s)', current_video,
                           snippet['title'])
             fe = fg.add_entry()
             fe.title(snippet['title'])
             fe.id(current_video)
             icon = max(snippet['thumbnails'],
                        key=lambda x: snippet['thumbnails'][x]['width'])
             fe.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
             fe.updated(snippet['publishedAt'])
             if channel[1] == 'video':
                 fe.enclosure(
                     url=f'http://{self.request.host}/video/{current_video}',
                     type="video/mp4")
             elif channel[1] == 'audio':
                 fe.enclosure(
                     url=f'http://{self.request.host}/audio/{current_video}',
                     type="audio/mpeg")
             fe.author(name=snippet['channelTitle'])
             fe.podcast.itunes_author(snippet['channelTitle'])
             fe.pubDate(snippet['publishedAt'])
             fe.link(href=f'http://www.youtube.com/watch?v={current_video}',
                     title=snippet['title'])
             fe.podcast.itunes_summary(snippet['description'])
             fe.description(snippet['description'])
             if not video or video['expire'] < fe.pubDate():
                 video = {'video': fe.id(), 'expire': fe.pubDate()}
     feed = {
         'feed': fg.rss_str(),
         'expire': datetime.datetime.now() + datetime.timedelta(hours=calls)
     }
     for chan in channel_name:
         channel_feed[chan] = feed
     self.write(feed['feed'])
     self.finish()
     video = video['video']
     mp3_file = 'audio/{}.mp3'.format(video)
     if channel[1] == 'audio' and not os.path.exists(
             mp3_file) and video not in conversion_queue.keys():
         conversion_queue[video] = {
             'status': False,
             'added': datetime.datetime.now()
         }
コード例 #24
0
ファイル: __main__.py プロジェクト: DRMacIver/notebook
def do_build(rebuild=False, full=True, name=""):
    only = name

    try:
        os.makedirs(HTML_POSTS)
    except FileExistsError:
        pass

    for name in tqdm(post_names()):
        source = os.path.join(POSTS, name + ".md")
        if not name.startswith(only):
            continue

        dest = os.path.join(HTML_POSTS, name + ".html")

        if not (rebuild or not os.path.exists(dest)
                or os.path.getmtime(source) > os.path.getmtime(dest)):
            continue

        with open(source) as i:
            source_text = i.read()

        with open(dest, "w") as o:
            o.write(post_html(template_cache_key(), name, source_text))

    if not full:
        return

    for post in glob(os.path.join(HTML_POSTS, "*.html")):
        source = os.path.join(POSTS,
                              os.path.basename(post).replace(".html", ".md"))
        if not os.path.exists(source):
            os.unlink(post)

    posts = [post_object(name) for name in tqdm(post_names())]

    posts.sort(key=lambda p: p.name, reverse=True)

    new_count = 0
    new_posts = posts[:new_count]

    old_posts = []

    for post in posts[new_count:]:
        date = dateutil.parser.parse(post.date)
        date = f"{date.year}-{date.month:02d}"

        if not old_posts or date != old_posts[-1][0]:
            old_posts.append((date, []))
        old_posts[-1][-1].append(post)

    with open(INDEX_PAGE, "w") as o:
        o.write(
            TEMPLATE_LOOKUP.get_template("index.html").render(
                new_posts=new_posts,
                old_posts=old_posts,
                title="Thoughts from David R. MacIver",
            ))

    fg = FeedGenerator()
    fg.id("https://notebook.drmaciver.com/")
    fg.title("DRMacIver's notebook")
    fg.author({"name": "David R. MacIver", "email": "*****@*****.**"})
    fg.link(href="https://notebook.drmaciver.com", rel="alternate")
    fg.link(href="https://notebook.drmaciver.com/feed.xml", rel="self")
    fg.language("en")

    dates = []

    for post in sorted(posts, key=lambda p: p.date, reverse=True)[:10]:
        fe = fg.add_entry()
        fe.id("https://notebook.drmaciver.com" + post.url)
        fe.link(href="https://notebook.drmaciver.com" + post.url)
        fe.title(post.title or post.name)
        fe.content(post.body, type="html")
        updated = (subprocess.check_output([
            "git",
            "log",
            "-1",
            "--date=iso8601",
            '--format="%ad"',
            "--",
            post.original_file,
        ]).decode("ascii").strip().strip('"'))
        if updated:
            updated = dateutil.parser.parse(updated)
        else:
            updated = datetime.strptime(post.name.replace(
                ".html", ""), POST_DATE_FORMAT).replace(tzinfo=tz.gettz())
        dates.append(updated)
        fe.updated(updated)

    fg.updated(max(dates))

    fg.atom_file(os.path.join(HTML_ROOT, "feed.xml"), pretty=True)
コード例 #25
0
ファイル: __init__.py プロジェクト: simonw/datasette-atom
def render_atom(datasette, request, sql, columns, rows, database, table,
                query_name, view_name, data):
    from datasette.views.base import DatasetteError

    if not REQUIRED_COLUMNS.issubset(columns):
        raise DatasetteError(
            "SQL query must return columns {}".format(
                ", ".join(REQUIRED_COLUMNS)),
            status=400,
        )
    fg = FeedGenerator()
    fg.generator(
        generator="Datasette",
        version=__version__,
        uri="https://github.com/simonw/datasette",
    )
    fg.id(request.url)
    fg.link(href=request.url, rel="self")
    fg.updated(max(row["atom_updated"] for row in rows))
    title = request.args.get("_feed_title", sql)
    if table:
        title += "/" + table
    if data.get("human_description_en"):
        title += ": " + data["human_description_en"]
    # If this is a canned query the configured title for that over-rides all others
    if query_name:
        try:
            title = datasette.metadata(
                database=database)["queries"][query_name]["title"]
        except (KeyError, TypeError):
            pass
    fg.title(title)

    clean_function = clean
    if query_name:
        # Check allow_unsafe_html_in_canned_queries
        plugin_config = datasette.plugin_config("datasette-atom")
        if plugin_config:
            allow_unsafe_html_in_canned_queries = plugin_config.get(
                "allow_unsafe_html_in_canned_queries")
            if allow_unsafe_html_in_canned_queries is True:
                clean_function = lambda s: s
            elif isinstance(allow_unsafe_html_in_canned_queries, dict):
                allowlist = allow_unsafe_html_in_canned_queries.get(
                    database) or []
                if query_name in allowlist:
                    clean_function = lambda s: s

    # And the rows
    for row in reversed(rows):
        entry = fg.add_entry()
        entry.id(str(row["atom_id"]))
        if "atom_content_html" in columns:
            entry.content(clean_function(row["atom_content_html"]),
                          type="html")
        elif "atom_content" in columns:
            entry.content(row["atom_content"], type="text")
        entry.updated(row["atom_updated"])
        entry.title(str(row["atom_title"]))
        # atom_link is optional
        if "atom_link" in columns:
            entry.link(href=row["atom_link"])
        if "atom_author_name" in columns and row["atom_author_name"]:
            author = {
                "name": row["atom_author_name"],
            }
            for key in ("uri", "email"):
                colname = "atom_author_{}".format(key)
                if colname in columns and row[colname]:
                    author[key] = row[colname]
            entry.author(author)

    return Response(
        fg.atom_str(pretty=True),
        content_type="application/xml; charset=utf-8",
        status=200,
    )
コード例 #26
0
ファイル: test_feed.py プロジェクト: lkiesow/python-feedgen
    def setUp(self):

        fg = FeedGenerator()

        self.nsAtom = "http://www.w3.org/2005/Atom"
        self.nsRss = "http://purl.org/rss/1.0/modules/content/"

        self.feedId = 'http://lernfunk.de/media/654321'
        self.title = 'Some Testfeed'

        self.authorName = 'John Doe'
        self.authorMail = '*****@*****.**'
        self.author = {'name': self.authorName, 'email': self.authorMail}

        self.linkHref = 'http://example.com'
        self.linkRel = 'alternate'

        self.logo = 'http://ex.com/logo.jpg'
        self.subtitle = 'This is a cool feed!'

        self.link2Href = 'http://larskiesow.de/test.atom'
        self.link2Rel = 'self'

        self.language = 'en'

        self.categoryTerm = 'This category term'
        self.categoryScheme = 'This category scheme'
        self.categoryLabel = 'This category label'

        self.cloudDomain = 'example.com'
        self.cloudPort = '4711'
        self.cloudPath = '/ws/example'
        self.cloudRegisterProcedure = 'registerProcedure'
        self.cloudProtocol = 'SOAP 1.1'

        self.icon = "http://example.com/icon.png"
        self.contributor = {'name': "Contributor Name",
                            'uri': "Contributor Uri",
                            'email': 'Contributor email'}
        self.copyright = "The copyright notice"
        self.docs = 'http://www.rssboard.org/rss-specification'
        self.managingEditor = '*****@*****.**'
        self.rating = '(PICS-1.1 "http://www.classify.org/safesurf/" ' + \
            '1 r (SS~~000 1))'
        self.skipDays = 'Tuesday'
        self.skipHours = 23

        self.textInputTitle = "Text input title"
        self.textInputDescription = "Text input description"
        self.textInputName = "Text input name"
        self.textInputLink = "Text input link"

        self.ttl = 900

        self.webMaster = '*****@*****.**'

        fg.id(self.feedId)
        fg.title(self.title)
        fg.author(self.author)
        fg.link(href=self.linkHref, rel=self.linkRel)
        fg.logo(self.logo)
        fg.subtitle(self.subtitle)
        fg.link(href=self.link2Href, rel=self.link2Rel)
        fg.language(self.language)
        fg.cloud(domain=self.cloudDomain, port=self.cloudPort,
                 path=self.cloudPath,
                 registerProcedure=self.cloudRegisterProcedure,
                 protocol=self.cloudProtocol)
        fg.icon(self.icon)
        fg.category(term=self.categoryTerm, scheme=self.categoryScheme,
                    label=self.categoryLabel)
        fg.contributor(self.contributor)
        fg.copyright(self.copyright)
        fg.docs(docs=self.docs)
        fg.managingEditor(self.managingEditor)
        fg.rating(self.rating)
        fg.skipDays(self.skipDays)
        fg.skipHours(self.skipHours)
        fg.textInput(title=self.textInputTitle,
                     description=self.textInputDescription,
                     name=self.textInputName, link=self.textInputLink)
        fg.ttl(self.ttl)
        fg.webMaster(self.webMaster)
        fg.updated('2017-02-05 13:26:58+01:00')
        fg.pubDate('2017-02-05 13:26:58+01:00')
        fg.generator('python-feedgen', 'x', uri='http://github.com/lkie...')
        fg.image(url=self.logo,
                 title=self.title,
                 link=self.link2Href,
                 width='123',
                 height='123',
                 description='Example Inage')

        self.fg = fg
コード例 #27
0
ファイル: atom_1_0.py プロジェクト: arXiv/arxiv-feed
    def get_xml(self: Serializer, response: Response) -> Tuple[str, int]:
        """
        Serialize the provided response data into Atom, version 1.0.

        Parameters
        ----------
        response : Response
            The search response data to be serialized.

        Returns
        -------
        data : str
            The serialized XML results.
        status
            The HTTP status code for the operation.

        """
        fg = FeedGenerator()
        fg.register_extension("arxiv",
                              ArxivExtension,
                              ArxivEntryExtension,
                              rss=False)
        fg.id("http://arxiv.org/rss/version=atom_1.0")
        archive = response.hits[0]["primary_classification"]["archive"]
        fg.title(archive["id"] + " updates on arXiv.org")
        fg.link(href='http://arxiv.org/rss/version=atom_1.0',
                rel='self',
                type='application/atom+xml')
        fg.updated(datetime.utcnow().replace(tzinfo=utc))

        # TODO - Try to remove generator element?  This doesn't work - code ignores "None"
        # fg.generator(None)
        # TODO - We don't currently set "subtitle", but could do it like this
        # fg.subtitle(
        #     f"{archive['name']} ({archive['id']}) updates on the arXiv.org e-print archive")

        # Add each search result "hit" to the feed
        for hit in response:
            entry = fg.add_entry()
            entry.id("http://arxiv.org/abs/" + hit['id'])
            entry.title(hit['title'])
            entry.summary(hit['abstract'])
            entry.published(hit['submitted_date'])
            entry.updated(hit['updated_date'])

            entry.link({
                "href": url_for("abs_by_id", paper_id=hit['id']),
                "type": "text/html"
            })
            pdf_link = {
                "title": "pdf",
                "rel": "related",
                "type": "application/pdf"
            }
            pdf_link["href"] = url_for("pdf_by_id", paper_id=hit['id'])
            entry.link(pdf_link)

            # Add categories
            categories = [hit['primary_classification'].to_dict()['category']]
            for dict in hit['secondary_classification']:
                categories += [dict['category'].to_dict()]
            for cat in categories:
                label = cat['name'] + " (" + cat['id'] + ")"
                category = {
                    "term": cat['id'],
                    "scheme": "http://arxiv.org/schemas/atom",
                    "label": label
                }
                entry.category(category)

            # Add arXiv-specific element "comment"
            if not hit['comments'].strip():
                entry.arxiv.comment(hit['comments'])

            # Add arXiv-specific element "journal_ref"
            if not hit['journal_ref'].strip():
                entry.arxiv.journal_ref(hit['journal_ref'])

            # Add arXiv-specific element "primary_category"
            prim_cat = hit['primary_classification'].to_dict()['category']
            label = prim_cat['name'] + " (" + prim_cat['id'] + ")"
            category = {
                "term": prim_cat['id'],
                "scheme": "http://arxiv.org/schemas/atom",
                "label": label
            }
            entry.arxiv.primary_category(category)

            # Add arXiv-specific element "doi"
            if hit['doi']:
                entry.arxiv.doi(hit['doi'])

            # Add each author
            for author in hit['authors']:
                author_list = {"name": author['full_name']}
                entry.author(author_list)
                # TODO - How can arxiv-specific affiliation elements be added to authors?

        data = fg.atom_str(pretty=True)
        status_code = status.HTTP_200_OK
        return data, status_code
コード例 #28
0
ファイル: nozzle.py プロジェクト: paregorios/zotnozzle
def main (args):
    """
    main functions
    """
    logger = logging.getLogger(sys._getframe().f_code.co_name)

    with open(args.config) as f:
        config = json.load(f)
    logger.debug(pformat(config, indent=4))
    # override any loaded config with items specified on command line
    for arg, argv in vars(args).iteritems():
        if argv is not None:
            config[arg] = argv
    if 'maximum' not in config.keys():
        config['maximum'] = DEFAULTMAXIMUM
    logger.debug(pformat(config, indent=4))

    # get metadata about the collection
    context = '{0}/collections/{1}'.format(config['zotero_account'], config['zotero_collection'])
    url = '/'.join((ZOT_BASE, context, '?format=json'))
    response = zot_get(url)
    
    alt_html = json.loads(response['content'])['links']['alternate']['href']

    # get list of items in collection
    context = '{0}/collections/{1}/items/top'.format(config['zotero_account'], config['zotero_collection'])
    url = '/'.join((ZOT_BASE, context, '?format=keys&sort=dateModified&direction=desc&limit={0}'.format(config['maximum'])))
    logger.debug('fetching: {0}'.format(url))
    response = zot_get(url)
    if int(response['length']) > 0:
        keys = response['content'].split('\n')
    else:
        print "boom"
    if len(keys) > config['maximum']+1:
        logger.error("gigantic: {0}".format(len(keys)))
        raise Exception

    fg = FeedGenerator()
    feed_id = u'tag:{domain},{date}:{slug}'.format(
        domain=config['tag_domain'],
        date=config['tag_date'],
        slug=config['tag_slug'])

    fg.id(feed_id)
    fg.title(config['title'])
    fg.author( {'name':config['author_name'],'email':config['author_email']} )
    fg.link( href=config['self'], rel='self' )
    fg.link( href=alt_html, rel='alternate' )
    fg.logo('https://www.zotero.org/support/_media/logo/zotero_256x256x32.png')
    fg.language('en')
    fg.updated(datetime.now(pytz.utc))
    context = '{0}/items'.format(config['zotero_account'])
    entries = {}
    for key in [k for k in keys if len(k.strip()) > 0]:
        logger.info(u'zotero key: "{0}"'.format(key))
        url = '/'.join((ZOT_BASE, context, key))
        response = zot_get(url)
        data = json.loads(response['content'])
        zot_link_html = data['links']['alternate']['href']
        zot_link_json = data['links']['self']['href']
        data = data['data']
        logger.info(u'zotero itemType: "{0}"'.format(data['itemType']))
        if data['itemType'] == 'note':
            logger.warning('ignored note (key="{0}")'.format(key))
        elif data['itemType'] == 'attachment':
            if data['linkMode'] == u'linked_url':
                fe = entries[data['parentItem']]
                fe.link(href=data['url'], title=data['title'], rel='alternate')
            else:
                raise NotImplemented('Zotero attachment (key="{0}") with unhandled linkMode="{1}"'.format(key, data['linkMode']))
        else:
            fe = fg.add_entry()
            entries[key] = fe
            entry_id = u'tag:{domain},{date}:{slug}'.format(
                domain='zotero.org',
                date=data['dateAdded'].split('T')[0],
                slug='/'.join((context, key)))

            fe.id(entry_id)
            try:
                fe.title(data['title'])
            except KeyError:
                logger.warning("unexpected lack of title in zotero record")
                logger.debug(pformat(data, indent=2))
                raise
            try:
                creators = data['creators']
            except KeyError:
                pass
            else:
                authors = [c for c in data['creators'] if c['creatorType'] == u'author']
                for a in authors:
                    if 'name' in a.keys():
                        fe.author({'name':a['name']})
                    else:
                        fe.author({'name':u'{0} {1}'.format(a['firstName'], a['lastName']), })
            try:
                fe.link(href=data['url'], rel='alternate', title='link to resource')
            except KeyError:
                pass
            fe.link(href=zot_link_html, rel='alternate', title='link to zotero record (html)')
            #fe.link(href=zot_link_json, rel='alternate', title='link to zotero record (json)')
            try:            
                fe.description(data['abstractNote'], isSummary=True)
            except KeyError:
                pass
            url = '/'.join((ZOT_BASE, context, key, '?format=bib'))
            bib = zot_get(url)
            logger.debug(pformat(bib, indent=4))
            bib = bib['content'].split('\n')[2].strip()
            logger.debug("bib: '{0}'".format(bib))
            fe.content(content=bib, type='xhtml')
            fe.published(data['dateAdded'])
            fe.updated(data['dateModified'])
            #fe.updated(datetime.now(pytz.utc))    
    with open(config['out_path'], 'w') as f:       
        fg.atom_file(f)
コード例 #29
0
ファイル: main.py プロジェクト: vadviktor/animetorrent-feed
class Spider:
    def __init__(self):
        self.config = toml.load("config.toml")
        self.aws_session = boto3.session.Session()
        self.cloudwatch = self.aws_session.client(
            service_name="cloudwatch",
            region_name=self.config["secretsmanager"]["region"],
        )
        self._report_execution()
        self.metric_retry_count = 0

        self.environment = getenv("APP_ENVIRONMENT", "development")
        with open("version.txt", "r") as f:
            self.version = f.readline().strip()

        loglevel = logging.DEBUG
        if self.environment == "production":
            loglevel = logging.ERROR

        logging.basicConfig(
            stream=stdout,
            level=loglevel,
            format="%(asctime)s - %(levelname)s - %(message)s",
        )

        sentry_sdk.init(
            "https://[email protected]/1509686",
            environment=self.environment,
            release=self.version,
        )

        self.feed = FeedGenerator()
        self.session = HTMLSession()
        self.s3 = self.aws_session.client(service_name="s3")

    def _anti_hammer_sleep(self):
        logging.debug("zzzZZzzzZZZZZzzzzz")
        sleep(randrange(1, self.config["anti_hammer_sleep"]))

    def _secrets(self):
        logging.debug("fetching secrets from AWS")
        try:
            client = self.aws_session.client(
                service_name="secretsmanager",
                region_name=self.config["secretsmanager"]["region"],
            )
            get_secret_value_response = client.get_secret_value(
                SecretId=self.config["secretsmanager"]["secret_name"])
        except ClientError as e:
            capture_exception(e)

            if e.response["Error"]["Code"] == "DecryptionFailureException":
                # Secrets Manager can't decrypt the protected secret text using the provided KMS key.
                # Deal with the exception here, and/or rethrow at your discretion.
                raise e
            elif e.response["Error"][
                    "Code"] == "InternalServiceErrorException":
                # An error occurred on the server side.
                # Deal with the exception here, and/or rethrow at your discretion.
                raise e
            elif e.response["Error"]["Code"] == "InvalidParameterException":
                # You provided an invalid value for a parameter.
                # Deal with the exception here, and/or rethrow at your discretion.
                raise e
            elif e.response["Error"]["Code"] == "InvalidRequestException":
                # You provided a parameter value that is not valid for the current state of the resource.
                # Deal with the exception here, and/or rethrow at your discretion.
                raise e
            elif e.response["Error"]["Code"] == "ResourceNotFoundException":
                # We can't find the resource that you asked for.
                # Deal with the exception here, and/or rethrow at your discretion.
                raise e
        else:
            # Decrypts secret using the associated KMS CMK.
            # Depending on whether the secret is a string or binary, one of these fields will be populated.
            if "SecretString" in get_secret_value_response:
                return json.loads(get_secret_value_response["SecretString"])

    def crawl(self):
        self._login()

        self.feed.id(f"{self.version}.vadviktor.xyz")
        self.feed.updated(datetime.utcnow().isoformat("T") + "Z")
        self.feed.author({
            "name": "Viktor (Ikon) VAD",
            "email": "*****@*****.**",
            "uri": "https://www.github.com/vadviktor",
        })
        self.feed.title("Animetorrents.me feed")
        self.feed.link(
            href=self.config["s3"]["object_url"].format(
                bucket=self.config["s3"]["bucket"],
                region=self.config["s3"]["region"],
                filekey=self.config["s3"][f"feed_filename_{self.environment}"],
            ),
            rel="self",
        )

        for profile_url in self._torrent_profile_links(self._max_pages()):
            profile_data = self._parse_profile(profile_url)
            if profile_data is None:
                continue

            fe = self.feed.add_entry(order="append")
            fe.id(profile_url)
            fe.title(profile_data["title"])
            fe.link(href=profile_url, rel="self")

            cover_image_url = None
            if profile_data["cover_image_src"] is not None:
                cover_image_url = self._cover_image_upload_and_get_url(
                    profile_data["cover_image_src"])

            thumbnail_small_image_urls = self._thumbnail_small_image_upload_and_get_urls(
                profile_data["thumbnail_small_image_srcs"])
            thumbnail_large_image_urls = self._thumbnail_large_image_upload_and_get_urls(
                profile_data["thumbnail_large_image_srcs"])

            torrent_public_url = self._torrent_upload_and_get_url(
                profile_data["torrent_download_url"],
                profile_data["torid"],
                slugify(profile_data["title"]),
                profile_data["publish_date"],
            )

            content_lines = []
            if cover_image_url is not None:
                content_lines.append(f'<p><img src="{cover_image_url}" /></p>')

            content_lines.append(f'<p>[{profile_data["category"]}]</p>')
            content_lines.append(f'<p>Tags: {profile_data["tags"]}</p>')
            content_lines.append(
                f'<p>Published: {profile_data["publish_date"]}</p>')
            content_lines.append(
                f'<p><a href="{profile_url}" target="blank">{profile_url}</a></p>'
            )
            content_lines.append(
                f'<p style="white-space: pre-wrap;">{profile_data["description"]}</p>'
            )

            content_lines.append(f"<p>")
            for k, v in enumerate(thumbnail_small_image_urls):
                content_lines.append(f"""
                    <a href="{thumbnail_large_image_urls[k]}" target="blank">
                        <img src="{v}" width="200" height="100" />
                    </a>""")
            content_lines.append(f"</p>")

            content_lines.append(
                f'<p><a href="{torrent_public_url}" target="blank">Download</a></p>'
            )
            content_lines.append(f'<p>{profile_data["torrent_details"]}</p>')
            content_lines.append(f'<p>{profile_data["file_list"]}</p>')

            if profile_data["media_info"] is not None:
                content_lines.append(f'<p>{profile_data["media_info"]}</p>')

            fe.content(self._valid_xhtml_content(content_lines), type="xhtml")

        self._upload_feed()
        self._report_retry_count()

    @staticmethod
    def _valid_xhtml_content(content_lines: List) -> str:
        broken_html = "".join(content_lines)
        # parse as HTML
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(broken_html), parser)
        # output as valid XML
        result = etree.tostring(tree.getroot(),
                                pretty_print=True,
                                method="xml")

        return result.decode("utf-8")

    def _upload_feed(self):
        logging.debug("construct and upload feed")

        atomfeed = self.feed.atom_str()
        bucket = self.config["s3"]["bucket"]
        key = self.config["s3"][f"feed_filename_{self.environment}"].format(
            version=getenv("FEED_VERSION", "v0"))
        self.s3.upload_fileobj(BytesIO(atomfeed), bucket, key)
        resp = self.s3.put_object_acl(ACL="public-read",
                                      Bucket=bucket,
                                      Key=key)
        if resp is None:
            capture_message(f"Failed to set object ACL for {bucket}/{key}")

    def _parse_profile(self, profile_url):
        logging.debug(f"processing profile {profile_url}")
        resp = self._get(profile_url)

        if ("Error 404: Torrent not found" in resp.text
                or "Torrent not found" in resp.text):
            msg = f"No torrent found for {profile_url}"
            logging.info(msg)
            capture_message(msg)
            return None

        profile_data = {}
        profile_data["category"] = resp.html.find("h1.headline img",
                                                  first=True).attrs["alt"]
        if any(category in profile_data["category"]
               for category in self.config["exclude_categories"]):
            return None

        profile_data["torid"] = re.match(r".*=(\d+)$", profile_url)[1]

        try:
            profile_data["torrent_download_url"] = next(
                l for l in resp.html.links if "download.php?torid=" in l)
        except StopIteration:
            msg = f"did not find download link for {profile_url}"
            capture_message(msg)
            raise RuntimeError(msg)

        profile_data["hashid"] = re.match(
            r".*torid=([a-z0-9]+)$",
            profile_data["torrent_download_url"]).group(1)

        profile_data["title"] = resp.html.find("h1.headline", first=True).text
        profile_data["description"] = resp.html.find("#torDescription",
                                                     first=True).text
        profile_data["tags"] = resp.html.find("#tagLinks", first=True).text
        profile_data["publish_date"] = self._parse_publish_date(
            resp.html.find("div.ribbon span.blogDate", first=True).text)
        profile_data["torrent_details"] = resp.html.find(
            "#tabs-1 table.dataTable", first=True).html
        profile_data["media_info"] = self._download_media_info(
            profile_data["torid"])
        profile_data["file_list"] = self._download_file_list(
            profile_data["hashid"])

        try:
            profile_data["cover_image_src"] = next(
                link.attrs["src"]
                for link in resp.html.find("div.contentArea img")
                if "imghost/covers/" in link.attrs["src"])
        except StopIteration:
            logging.debug(f"did not find cover image for {profile_url}")
            profile_data["cover_image_src"] = None

        profile_data["thumbnail_small_image_srcs"] = [
            i.attrs["src"] for i in resp.html.find("#torScreens img")
        ]
        profile_data["thumbnail_large_image_srcs"] = [
            i.attrs["href"] for i in resp.html.find("#torScreens a")
        ]

        return profile_data

    @retry((TimeOutException, ConnectionError), tries=5, delay=3, backoff=2)
    def _get(self, url, **kwargs) -> Response:
        self._anti_hammer_sleep()
        resp = self.session.get(url, **kwargs)

        if resp.status_code in TIMEOUT_STATUS_CODES:
            self.metric_retry_count += 1
            raise TimeOutException

        return resp

    @staticmethod
    def _parse_publish_date(text) -> datetime:
        return datetime.fromtimestamp(
            mktime(strptime(text, "%d %b, %Y [%I:%M %p]")))

    def _torrent_profile_links(self, max_pages) -> List:
        links = []
        for page in range(1, self.config["torrent_pages_to_scan"] + 1):
            resp = self._torrent_list_response(page, max_pages)

            [
                links.append(l) for l in resp.html.links
                if "torrent-details.php?torid=" in l
            ]

        return links

    @retry(TimeOutException, tries=5, delay=3, backoff=2)
    def _torrent_list_response(self, current_page: int,
                               max_pages: int) -> Response:
        logging.debug(f"getting torrent list page no. {current_page}")
        headers = {"X-Requested-With": "XMLHttpRequest"}
        url = self.config["site"]["torrent_list_url"].format(
            max=max_pages, current=current_page)
        resp = self._get(url=url, headers=headers)
        if resp.status_code in TIMEOUT_STATUS_CODES:
            self.metric_retry_count += 1
            raise TimeOutException

        logging.debug(f"response status code {resp.status_code}")
        logging.debug(f"response length {len(resp.text)}")

        if "Access Denied!" in resp.text:
            raise RuntimeError("AJAX request was denied")

        return resp

    @retry(TimeOutException, tries=5, delay=3, backoff=2)
    def _login(self):
        login_url = self.config["site"]["login_url"]
        username = self._secrets()["username"]
        password = self._secrets()["password"]

        self._get(login_url)
        resp = self.session.post(
            login_url,
            data={
                "form": "login",
                "username": username,
                "password": password
            },
        )

        if resp.status_code in TIMEOUT_STATUS_CODES:
            self.metric_retry_count += 1
            raise TimeOutException

        if "Error: Invalid username or password." in resp.text:
            raise RuntimeError("login failed because of invalid credentials")
        else:
            logging.debug("logged in")

    @retry(TimeOutException, tries=5, delay=3, backoff=2)
    def _max_pages(self):
        logging.debug("finding out torrents max page number")

        try:
            resp = self._get(self.config["site"]["torrents_url"])

            if resp.status_code in TIMEOUT_STATUS_CODES:
                self.metric_retry_count += 1
                raise TimeOutException

            if resp.status_code != 200:
                raise RuntimeError(
                    "the torrents page is not responding correctly")

            pattern = r"ajax/torrents_data\.php\?total=(?P<max>\d+)&page=1"
            match = re.search(pattern, resp.text)
            if match is None:
                raise RuntimeError("could not find max page number")

            max_page = match.group("max")
            logging.debug(f"max pages figured out: {max_page}")

            return int(max_page)
        except ConnectionError as e:
            capture_exception(e)
            raise RuntimeError("failed to get the torrents page")

    def _download_media_info(self, torid) -> Optional[str]:
        logging.debug(f"getting torrent media info for {torid}")

        headers = {"X-Requested-With": "XMLHttpRequest"}
        url = self.config["site"]["torrent_techspec_url"].format(torid)
        resp = self._get(url=url, headers=headers)

        logging.debug(f"response status code {resp.status_code}")
        logging.debug(f"response length {len(resp.text)}")

        if len(resp.text) == 0:
            return None

        if "Access Denied!" in resp.text:
            raise RuntimeError("AJAX request was denied")

        return resp.html.html

    def _download_file_list(self, hashid) -> str:
        logging.debug(f"getting torrent file list for {hashid}")

        headers = {"X-Requested-With": "XMLHttpRequest"}
        url = self.config["site"]["torrent_filelist_url"].format(hashid)
        resp = self._get(url=url, headers=headers)

        logging.debug(f"response status code {resp.status_code}")
        logging.debug(f"response length {len(resp.text)}")

        if "Access Denied!" in resp.text:
            raise RuntimeError("AJAX request was denied")

        return resp.html.html

    def _cover_image_upload_and_get_url(self, url) -> str:
        matches = re.match(r".*/covers/(\d{4})/(\d{2})/(.*)", url)
        year = matches[1]
        month = matches[2]
        filename = matches[3]
        key = f"covers/{year}/{month}/{filename}"

        return self._upload(key, url)

    def _upload(self, key, url) -> str:
        """
        Check if key exists in the bucket.
        If not, then download it from url and upload it to S3 as key.
        Set the object ACL to public readable.
        Return the public URL for the object.

        Args:
            key (str): S3 object key
            url (str): source URL to download the data from

        Returns:
            (str): the public URL in S3
        """
        bucket = self.config["s3"]["bucket"]
        try:
            self.s3.head_object(Bucket=bucket, Key=key)
        except ClientError:
            resp = self._get(url)
            self.s3.upload_fileobj(
                BytesIO(resp.content),
                bucket,
                key,
                ExtraArgs={"StorageClass": "STANDARD_IA"},
            )

            resp = self.s3.put_object_acl(ACL="public-read",
                                          Bucket=bucket,
                                          Key=key)
            if resp is None:
                capture_message(f"Failed to set object ACL for {bucket}/{key}")

        return self.config["s3"]["object_url"].format(
            bucket=self.config["s3"]["bucket"],
            region=self.config["s3"]["region"],
            filekey=key,
        )

    def _thumbnail_small_image_upload_and_get_urls(self, urls) -> List:
        pub_urls = []
        for url in urls:
            matches = re.match(r".*/screenthumb/(\d{4})/(\d{2})/(.*)", url)
            year = matches[1]
            month = matches[2]
            filename = matches[3]
            key = f"screenthumbs/small/{year}/{month}/{filename}"
            pub_urls.append(self._upload(key, url))

        return pub_urls

    def _thumbnail_large_image_upload_and_get_urls(self, urls) -> List:
        pub_urls = []
        for url in urls:
            matches = re.match(r".*/screens/(\d{4})/(\d{2})/(.*)", url)
            year = matches[1]
            month = matches[2]
            filename = matches[3]
            key = f"screenthumbs/large/{year}/{month}/{filename}"
            pub_urls.append(self._upload(key, url))

        return pub_urls

    def _torrent_upload_and_get_url(self, url, torid, filename,
                                    publish_date) -> str:
        """

        Args:
            url (str): Source URL to torrent
            torid (str): Torrent ID
            filename (str): The filename to use in the S3 key
            publish_date (datetime): Torrent publish date

        Returns:
            (str) S3 public URL for the file
        """
        key = f"torrents/{publish_date.year}/{publish_date.month}/{filename}_{torid}.torrent"
        return self._upload(key, url)

    def _report_execution(self):
        self.cloudwatch.put_metric_data(
            Namespace="Animetorrents",
            MetricData=[{
                "MetricName": "execution",
                "Value": 0.0
            }],
        )

    def _report_retry_count(self):
        self.cloudwatch.put_metric_data(
            Namespace="Animetorrents",
            MetricData=[{
                "MetricName": "retries",
                "Value": self.metric_retry_count
            }],
        )
コード例 #30
0
ファイル: test-feeds.py プロジェクト: cadrian/pyceed
#
#  You should have received a copy of the GNU General Public License
#  along with PyCeed.  If not, see <http://www.gnu.org/licenses/>.

import feedparser
from feedgen.feed import FeedGenerator

out_feed = FeedGenerator()

url = 'http://www.courantpositif.fr/feed/'
d = feedparser.parse(url)
print("~~ %s ~~" % d.feed.title)
out_feed.title(d.feed.title)
out_feed.subtitle(d.feed.subtitle)
out_feed.id(d.feed.get("id", "no id"))
out_feed.updated(d.feed.updated)

for e in d.entries:
	print(" * [%s] %s" % (e.published, e.title))
	out_entry = out_feed.add_entry()
	out_entry.title(e.title)
	out_entry.published(e.published)
	out_entry.updated(e.updated)
	out_entry.id(e.id)
	out_entry.summary(e.summary)
	for c in e.content:
		out_entry.content(content=c.value, type=c.type) #, src=c.base
	for l in e.links:
		print("	  > [%s] %s" % (l.rel, l.href))
		out_entry.link(link=l)
コード例 #31
0
    return [arg]


parser = argparse.ArgumentParser()
parser.add_argument('condition', type=condition)
parser.add_argument('currency', type=currency)
parser.add_argument('minimum_discount', type=int)
parser.add_argument('outfile')

args = parser.parse_args()

try:
    fg = FeedGenerator()
    fg.id(FEED_URL)
    fg.title('Discogs Deals')
    fg.updated(now())
    fg.link(href=FEED_URL, rel='self')
    fg.author(FEED_AUTHOR)

    with httpx.Client() as client:

        for deal in get_deals(
                client,
                args.condition,
                args.currency,
                args.minimum_discount
        ):
            fe = fg.add_entry()
            fe.id(deal['id'])
            fe.title(deal['title'])
            fe.updated(deal['updated'])
コード例 #32
0
async def channel(request, channel_id, return_type='video'):
    log.info(f'Channel: {channel_id}')
    channel_name = [f'{channel_id}/{return_type}']
    if channel_name[0] in channel_feed and channel_feed[
            channel_name[0]]['expire'] > datetime.now():
        return raw(channel_feed[channel_name[0]]['feed'],
                   content_type='application/rss+xml')
    fg = None
    calls = 0
    response = {'nextPageToken': ''}
    while 'nextPageToken' in response:
        next_page = response['nextPageToken']
        payload = {
            'part': 'snippet,contentDetails',
            'maxResults': 50,
            'channelId': channel_id,
            'key': KEY,
            'pageToken': next_page
        }
        response = json.loads(
            await get('https://www.googleapis.com/youtube/v3/activities',
                      params=payload))
        calls += 1
        if 'error' in response:
            payload = {
                'part': 'snippet',
                'maxResults': 1,
                'forUsername': channel_id,
                'key': KEY
            }
            response = json.loads(await get(
                'https://www.googleapis.com/youtube/v3/channels',
                params=payload))
            channel_id = response['items'][0]['id']
            channel_name.append(f'{channel_id}/{return_type}')
            payload = {
                'part': 'snippet,contentDetails',
                'maxResults': 50,
                'channelId': channel_id,
                'key': KEY,
                'pageToken': next_page
            }
            response = json.loads(await get(
                'https://www.googleapis.com/youtube/v3/activities',
                params=payload))
            calls += 2
        if not fg:
            fg = FeedGenerator()
            fg.load_extension('podcast')
            fg.generator('PodTube', __version__,
                         'https://github.com/aquacash5/PodTube')
            snippet = response['items'][0]['snippet']
            if 'Private' in snippet['title']:
                continue
            icon = max(snippet['thumbnails'],
                       key=lambda x: snippet['thumbnails'][x]['width'])
            fg.title(snippet['title'])
            fg.id(f'http://{request.headers["host"]}{request.url}')
            fg.description(snippet['description'] or ' ')
            fg.author(name=snippet['channelTitle'])
            fg.image(snippet['thumbnails'][icon]['url'])
            fg.link(href=f'https://www.youtube.com/playlist?list={channel_id}')
            fg.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
            fg.podcast.itunes_summary(snippet['description'])
            fg.podcast.itunes_category('Technology', 'Podcasting')
            fg.updated(f'{str(datetime.utcnow())}Z')
        for item in response['items']:
            snippet = item['snippet']
            if snippet['type'] != 'upload':
                continue
            current_video = item['contentDetails']['upload']['videoId']
            log.debug(f'ChannelVideo: {current_video} {snippet["title"]}')
            fe = fg.add_entry()
            fe.title(snippet['title'])
            fe.id(current_video)
            icon = max(snippet['thumbnails'],
                       key=lambda x: snippet['thumbnails'][x]['width'])
            fe.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
            fe.updated(snippet['publishedAt'])
            if return_type == 'audio':
                fe.enclosure(
                    url=
                    f'http://{request.headers["host"]}/audio/{current_video}',
                    type="audio/mpeg")
            else:
                fe.enclosure(
                    url=
                    f'http://{request.headers["host"]}/video/{current_video}',
                    type="video/mp4")
            fe.author(name=snippet['channelTitle'])
            fe.podcast.itunes_author(snippet['channelTitle'])
            fe.podcast.itunes_author(snippet['channelTitle'])
            fe.pubdate(snippet['publishedAt'])
            fe.link(href=f'http://www.youtube.com/watch?v={current_video}',
                    title=snippet['title'])
            fe.podcast.itunes_summary(snippet['description'])
            fe.description(snippet['description'])
            await sleep(0)
    feed = {
        'feed': fg.rss_str(),
        'expire': datetime.now() + timedelta(hours=calls)
    }
    for _name in channel_name:
        channel_feed[_name] = feed
    return raw(feed['feed'], content_type='application/rss+xml')
コード例 #33
0
ファイル: feed.py プロジェクト: deafmute1/refeed
class Feed():
    """ Instanceable class to manage a named feed including storage, retrieval and genration functions.

    :param feed_name: a string containg a feed name present in config.Feed.names() 
    """
    def __init__(self, feed_name: str) -> None:
        self.feed_name = feed_name
        self.alternates = {}
        self.added_mail_uuids = []
        self.written_mail_uuids = None

        # Retrieve fg from shelf is it exists otherwise create it using config options
        with shelve.open(
                str(Path(
                    config.paths["data"]).joinpath('feeds.shelf'))) as shelf:
            try:
                self.fg = shelf[self.feed_name]
            except KeyError as e:
                if not self.feed_name in shelf:
                    # Mandatory ATOM values
                    fg_config = config.ParseFeed.info(self.feed_name)
                    self.fg = FeedGenerator()
                    self.fg.id('tag:{},{}/feeds/{}.xml'.format(
                        fg_config['fqdn'], date.today(), feed_name))
                    href_ = '{}{}/feeds/{}.xml'.format(fg_config['protocol'],
                                                       fg_config['fqdn'],
                                                       feed_name)
                    self.fg.link(rel='self',
                                 type='application/atom+xml',
                                 href=href_)
                    self.fg.title(feed_name)
                    self.fg.subtitle(
                        'Feed generated from mail messages recieved at {} by refeed'
                        .format(config.ParseFeed.account_name(self.feed_name)))
                    self.fg.author(name=fg_config['author-name'])

                    # Optional values
                    try:
                        self.fg.logo(
                            str(
                                Path(config.paths["static"]).joinpath(
                                    fg_config['logo'])))
                    except KeyError:
                        pass

                    try:
                        self.fg.language(fg_config['language'])
                    except KeyError:
                        pass
                else:
                    raise KeyError(e)

    # context manager
    def __enter__(self) -> Feed:
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback) -> None:
        self._dump_shelves()

    def add_entries_from_dict_if_new(self, mails: Dict[int,
                                                       MailParser]) -> bool:
        try:
            for uuid, mail in mails:
                if FeedTools.uuid_not_in_feed(self.feed_name, uuid):
                    self.add_entry((uuid, mail))
        except (TypeError, ValueError):
            logging.error(
                'Given NoneType as mailobject to Feed, some error in mail with IMAP.',
                exc_info=True)
        except Exception:
            logging.error('Unexpected error', exc_info=True)

    def add_entry(self, mail: Tuple[int, MailParser]) -> None:
        random.seed(None, 2)
        fe = self.fg.add_entry(order='prepend')
        fg_config = config.ParseFeed.info(self.feed_name)

        # id
        try:
            fe.id('tag:{},{}/feeds/{}.xml:{}'.format(fg_config['fqdn'],
                                                     date.today(),
                                                     self.feed_name, mail[0]))
        except (AttributeError, MailParserReceivedParsingError):
            fe.id('tag:{},{}/feeds/{}.xml:ID_NOT_FOUND-{}'.format(
                fg_config['fqdn'], date.today(), self.feed_name, ''.join(
                    random.choices(string.ascii_lowercase + string.digits,
                                   k=10))))

        # title
        try:
            fe.title(mail[1].subject)
        except (AttributeError, MailParserReceivedParsingError):
            fe.title('SUBJECT_NOT_FOUND-{}'.format(''.join(
                random.choices(string.ascii_lowercase + string.digits, k=10))))

        # alt link and body contents
        try:
            alt_id = FeedTools.generate_unique_alt_id()
            self.alternates[alt_id] = mail[1].body
            alt_link = '{}{}/alt-html/{}.html'.format(fg_config['protocol'],
                                                      fg_config['fqdn'],
                                                      alt_id)
            fe.link(rel='alternate', type='text/html', href=alt_link)
            fe.contents(content=mail[1].body, src=alt_link, type='text/html')
        except (AttributeError, MailParserReceivedParsingError):
            fe.contents(content='MAIL_BODY_NOT_FOUND', type='text/plain')

        #update time
        now = datetime.now(
        )  # entry and feed should match exactly, not be a few seconds off.
        fe.updated(now)
        self.fg.updated(now)

        # cache uuids added to feed
        self.added_mail_uuids.append(mail[0])

    def generate_feed(self) -> None:
        # generate htmls
        if self.alternates != {}:
            try:
                for alt_id, body in self.alternates.items():
                    with Path(config.paths["static"]).joinpath(
                            'alt',
                            '{}.html'.format(str(alt_id))).open(mode='w') as f:
                        f.write(body)
            except Exception:  # Exception gets *most* inbuilt exceptions, except KeyboardInterrupt, SystemInterrupt and some others which are out of scope
                logging.error(
                    'Failed to write some html alt pages to file for new entries for feed {}'
                    .format(self.feed_name),
                    exc_info=True)
            finally:
                logging.info(
                    'Successfully generated html alt pages: {} for feed {}'.
                    format(list(self.alternates.keys()), self.feed_name))
                FeedTools.cleanup_alts(
                    self.feed_name,
                    config.ParseFeed.alternate_cache(self.feed_name))

        # generate xml
        try:
            self.fg.atom_file(
                str(
                    Path(config.paths["static"]).joinpath(
                        'feed', '{}.xml'.format(self.feed_name))))
        except Exception:  # TODO: Find out what f*****g exceptions that feedgen actually raises, if any(not documented - check source)
            logging.error(
                'Failed to generate and write new copy of feed {} to file'.
                format(self.feed_name))
        finally:
            self.written_mail_uuids = self.added_mail_uuids

    def _dump_shelves(self) -> None:
        with shelve.open(
                str(Path(
                    config.paths["data"]).joinpath('feeds.shelf'))) as shelf:
            shelf[self.feed_name] = self.fg
            logging.info('Atom data for feed {} stored to disk'.format(
                self.feed_name))

        with shelve.open(
                str(
                    Path(config.paths["data"]).joinpath(
                        'alternate_ids.shelf'))) as shelf:
            try:
                shelf[self.feed_name] = shelf[self.feed_name].extend(
                    list(self.alternates.keys()))
            except (KeyError,
                    AttributeError):  # feed alternates list does not exist yet
                shelf[self.feed_name] = list(self.alternates.keys())
                logging.info(
                    'Alt id data for feed {} stored to disk for first time'.
                    format(self.feed_name))
            finally:
                logging.info(
                    'Alt id data for feed {} stored back to disk'.format(
                        self.feed_name))

        with shelve.open(
                str(Path(config.paths["data"]).joinpath(
                    'mail_uuids.shelf'))) as shelf:
            try:
                shelf[self.feed_name] = shelf[self.feed_name].extend(
                    self.written_mail_uuids)
            except (KeyError,
                    AttributeError):  # feed id list does not exist yet
                shelf[self.feed_name] = self.written_mail_uuids
                logging.info(
                    'Mail UUID data for feed {} stored to disk for first time'.
                    format(self.feed_name))
            except TypeError:
                if self.written_mail_uuids is None:
                    logging.info(
                        'Failed to write mail UUIDs to shelf file for feed {}: Newly written mail UUID data is None. Feed._dump_shelves() was likely called without any new items beeing added to feed'
                        .format(self.feed_name),
                        exc_info=True)
                else:
                    logging.error(
                        'Failed to write mail UUIDs to shelf file for feed {}: Newly written mail UUID is not None, some unexpected error has occured. '
                        .format(self.feed_name),
                        exc_info=True)
            finally:
                logging.info(
                    'Mail UUID data for feed {} stored back to disk'.format(
                        self.feed_name))
コード例 #34
0
ファイル: test_feed.py プロジェクト: rachmann/python-feedgen
    def setUp(self):

        fg = FeedGenerator()

        self.nsAtom = "http://www.w3.org/2005/Atom"
        self.nsRss = "http://purl.org/rss/1.0/modules/content/"

        self.feedId = 'http://lernfunk.de/media/654321'
        self.title = 'Some Testfeed'

        self.authorName = 'John Doe'
        self.authorMail = '*****@*****.**'
        self.author = {'name': self.authorName, 'email': self.authorMail}

        self.linkHref = 'http://example.com'
        self.linkRel = 'alternate'

        self.logo = 'http://ex.com/logo.jpg'
        self.subtitle = 'This is a cool feed!'

        self.link2Href = 'http://larskiesow.de/test.atom'
        self.link2Rel = 'self'

        self.language = 'en'

        self.categoryTerm = 'This category term'
        self.categoryScheme = 'This category scheme'
        self.categoryLabel = 'This category label'

        self.cloudDomain = 'example.com'
        self.cloudPort = '4711'
        self.cloudPath = '/ws/example'
        self.cloudRegisterProcedure = 'registerProcedure'
        self.cloudProtocol = 'SOAP 1.1'

        self.icon = "http://example.com/icon.png"
        self.contributor = {
            'name': "Contributor Name",
            'uri': "Contributor Uri",
            'email': 'Contributor email'
        }
        self.copyright = "The copyright notice"
        self.docs = 'http://www.rssboard.org/rss-specification'
        self.managingEditor = '*****@*****.**'
        self.rating = '(PICS-1.1 "http://www.classify.org/safesurf/" ' + \
            '1 r (SS~~000 1))'
        self.skipDays = 'Tuesday'
        self.skipHours = 23

        self.textInputTitle = "Text input title"
        self.textInputDescription = "Text input description"
        self.textInputName = "Text input name"
        self.textInputLink = "Text input link"

        self.ttl = 900

        self.webMaster = '*****@*****.**'

        fg.id(self.feedId)
        fg.title(self.title)
        fg.author(self.author)
        fg.link(href=self.linkHref, rel=self.linkRel)
        fg.logo(self.logo)
        fg.subtitle(self.subtitle)
        fg.link(href=self.link2Href, rel=self.link2Rel)
        fg.language(self.language)
        fg.cloud(domain=self.cloudDomain,
                 port=self.cloudPort,
                 path=self.cloudPath,
                 registerProcedure=self.cloudRegisterProcedure,
                 protocol=self.cloudProtocol)
        fg.icon(self.icon)
        fg.category(term=self.categoryTerm,
                    scheme=self.categoryScheme,
                    label=self.categoryLabel)
        fg.contributor(self.contributor)
        fg.copyright(self.copyright)
        fg.docs(docs=self.docs)
        fg.managingEditor(self.managingEditor)
        fg.rating(self.rating)
        fg.skipDays(self.skipDays)
        fg.skipHours(self.skipHours)
        fg.textInput(title=self.textInputTitle,
                     description=self.textInputDescription,
                     name=self.textInputName,
                     link=self.textInputLink)
        fg.ttl(self.ttl)
        fg.webMaster(self.webMaster)
        fg.updated('2017-02-05 13:26:58+01:00')
        fg.pubDate('2017-02-05 13:26:58+01:00')
        fg.generator('python-feedgen', 'x', uri='http://github.com/lkie...')
        fg.image(url=self.logo,
                 title=self.title,
                 link=self.link2Href,
                 width='123',
                 height='123',
                 description='Example Inage')

        self.fg = fg