コード例 #1
0
ファイル: test_entry.py プロジェクト: lkiesow/python-feedgen
    def setUp(self):
        fg = FeedGenerator()
        self.feedId = 'http://example.com'
        self.title = 'Some Testfeed'

        fg.id(self.feedId)
        fg.title(self.title)
        fg.link(href='http://lkiesow.de', rel='alternate')[0]
        fg.description('...')

        fe = fg.add_entry()
        fe.id('http://lernfunk.de/media/654321/1')
        fe.title('The First Episode')
        fe.content(u'…')

        # Use also the different name add_item
        fe = fg.add_item()
        fe.id('http://lernfunk.de/media/654321/1')
        fe.title('The Second Episode')
        fe.content(u'…')

        fe = fg.add_entry()
        fe.id('http://lernfunk.de/media/654321/1')
        fe.title('The Third Episode')
        fe.content(u'…')

        self.fg = fg
コード例 #2
0
ファイル: snapfeed.py プロジェクト: matthazinski/snapfeed
def gen_feed(user, base_url, path, debug=False):
    # Create feed
    feed = FeedGenerator()
    feed.id(urlparse.urljoin(base_url, user + '.xml'))
    feed.title('Snapchat story for ' + user)
    feed.link( href=urlparse.urljoin(base_url, user + '.xml'), rel='self' )
    feed.language('en')
    feed.description('Snapchat media')


    # Iterate through files in path, sort by unix timestamp (newest first), then add to feed
    files = sorted(os.listdir(path), reverse=True)

    for filename in files:
        split = filename.split('~')

        if split[0] != user:
            continue
        
        if os.path.splitext(filename)[1] in ['.mp4', '.jpg']:
            entry = feed.add_entry()
            entry.id(urlparse.urljoin(base_url, filename))
            entry.link(href=urlparse.urljoin(base_url, filename))
            entry.title(filename)

    
    # Write feed to disk
    feed.rss_file(os.path.join(path, user + '.xml'))
    date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")

    if debug:
        print('{0}  Regenerated {1}'.format(date, urlparse.urljoin(base_url, 
                                                               user + '.xml')))
コード例 #3
0
def generateFeeds(buffered, meta):
    utc = pytz.utc
    fg = FeedGenerator()
    fg.id(meta['id'])
    fg.title(meta['title'])
    fg.author(meta['author'])
    fg.subtitle(meta['subtitle'])
    fg.link( href=meta['link'], rel='self' )
    fg.language(meta['language'])

    for tweet in buffered:
        fe = fg.add_entry()
        fe.id(tweet['url'].decode('utf-8'))
        fe.published(utc.localize(tweet['created_at']).astimezone(pytz.timezone(locale)))
        
        #fe.guid(tweet['url'].decode('utf-8'))
        fe.link(href=tweet['url'].decode('utf-8'), rel='alternate')
        fe.title(tweet['readable_title'])
        fe.description(tweet['readable_article'])
                
        try:
            fe.author({'name': '', 'email':tweet['user_name'].decode('utf-8') + ": " + tweet['text'].decode('utf-8')})
        except Exception, e:
            logger.error(e)
            fe.author({'name': 'a', 'email':'*****@*****.**'})
コード例 #4
0
ファイル: tests.py プロジェクト: etalab/udata-gouvfr
    def feed(self, feed_title, title, content, url, published=None, summary=None,
             enclosure=None, media_thumbnail=None):
        feed = FeedGenerator()
        feed.title(feed_title)
        feed.description(faker.sentence())
        feed.link({'href': WP_FEED_URL})

        entry = feed.add_entry()
        entry.title(title)
        entry.link({'href': url})
        entry.author(name=faker.name())
        entry.content(content, type="cdata")
        if summary:
            entry.description(summary)
        if enclosure:
            entry.enclosure(url=enclosure['url'],
                            type=enclosure['type'],
                            length=str(faker.pyint()))
        if media_thumbnail:
            feed.load_extension('media')
            entry.media.thumbnail({'url': media_thumbnail})
        tz = pytz.timezone(faker.timezone())
        published = published or faker.date_time(tzinfo=tz)
        entry.published(published)
        entry.updated(faker.date_time_between(start_date=published, tzinfo=tz))

        return feed.rss_str().decode('utf8')
コード例 #5
0
ファイル: models.py プロジェクト: rgegriff/podiddly
  def generate_feed(self):
    fg = FeedGenerator()
    fg.load_extension('podcast')
    for field in self.MAPPINGS:
      value_names = field[0]
      methods = field[1]
      
      values = []
      # collect the values from self
      for value_name in value_names:
        values.append( getattr(self, value_name) )
      # decend the attribute tree
      method = get_method(methods, fg)
      # apply the values to the found method
      method(*values)

      for episode in self.episodes.all():
        # This is the same pattern as above, I wonder if I can DRY this out.
        entry = fg.add_entry()
        value_names, method_names = zip(*episode.MAPPINGS)
        values = []
        for ind, value_name in enumerate(value_names):
          print value_name
          values  = [getattr(episode, v) for v in value_name]
          if None not in values:
            print values
            method = get_method(method_names[ind], entry)
            method(*values)
    print "DONE"
        
    return fg
コード例 #6
0
ファイル: feed.py プロジェクト: majestrate/pytracker
def generate(app, category, torrents):
    """
    generate an rss feed from category with torrents as results
    if category is None this feed is for all categories
    """
    feed = FeedGenerator()
    if category:
        url = util.fullSiteURL(app, 'feed', '{}.rss'.format(category))
    else:
        url = util.fullSiteURL(app, 'feed', 'all.rss')
    feed.link(href=url, rel="self")
    feed.id(url)
    if category:
        title = "new {} torrents on index ex invisibilis".format(category)
    else:
        title = "new torrents on index ex invisibilis"
    feed.title(title)
    feed.description(title)
    feed.author({"name": "anonymous"})
    feed.language("en")
    for torrent in torrents:
        item = feed.add_entry()
        url = util.fullSiteURL(app, torrent.downloadURL())
        item.id(torrent.infohash)
        item.link(href=url)
        item.title(torrent.title)
        item.description(torrent.summary(100))
    return feed
コード例 #7
0
ファイル: hypecast.py プロジェクト: blackmad/hypecast
  def makeRss(self):
    fg = FeedGenerator()
    fg.load_extension('podcast')
    fg.id('http://hypecast.blackmad.com/' + self.mode)
    fg.title('Hype Machine Robot Radio: ' + self.mode)
    fg.author( {'name':'David Blackmad','email':'*****@*****.**'} )
    fg.logo('http://dump.blackmad.com/the-hype-machine.jpg')
    fg.language('en')
    fg.link(href='http://hypecast.blackmad.com/' + self.mode)
    fg.description('Hype Machine Robot Radio: ' + self.mode)

    description = ' <br/>'.join(['%s. %s' % (index + 1, self.mk_song_id(s)) for index, s in enumerate(self.songs)])

    fe = fg.add_entry()
    fe.title(self.track_name)
    fe.description(description)
    fe.id(self.filename)
    # add length
    print(self.relative_dir)
    print(self.filename)
    fe.enclosure(url = 'http://hypecast.blackmad.com/%s' % (self.filename), type="audio/mpeg")

    rss_str = fg.rss_str()
    newItem = ET.fromstring(rss_str)[0].find('item')
    out = open(self.get_filename('xml'), 'w')
    out.write(ET.tostring(newItem))
    out.close()
    self.updateRss()
コード例 #8
0
ファイル: handler.py プロジェクト: BlogTANG/blog-a
def feed():
    """
    Generate atom feed
    """
    entries = parse_posts(0, C.feed_count)
    fg = FeedGenerator()
    fg.id(str(len(entries)))
    fg.title(C.title)
    fg.subtitle(C.subtitle)
    fg.language(C.language)
    fg.author(dict(name=C.author, email=C.email))
    fg.link(href=C.root_url, rel='alternate')
    fg.link(href=make_abs_url(C.root_url, 'feed'), rel='self')
    for entry in entries:
        fe = fg.add_entry()
        fe.id(entry.get('url'))
        fe.title(entry.get('title'))
        fe.published(entry.get('date'))
        fe.updated(entry.get('updated') or entry.get('date'))
        fe.link(href=make_abs_url(C.root_url, entry.get('url')), rel='alternate')
        fe.author(dict(name=entry.get('author'), email=entry.get('email')))
        fe.content(entry.get('body'))

    atom_feed = fg.atom_str(pretty=True)
    return atom_feed
コード例 #9
0
ファイル: ming.py プロジェクト: adow/ming
 def render_atom(self):
     fg = FeedGenerator()
     fg.id(self.site_url)
     fg.title(self.site_title)
     fg.link(href = self.site_url,rel = 'alternate')
     fg.link(href = self.site_url + 'atom.xml',rel = 'self')
     fg.language('zh-cn')
     link_list = ArticleManager.sharedManager().link_list()
     for link in link_list:
         article = ArticleManager.sharedManager().article_for_link(link)
         if not article:
             continue
         fe = fg.add_entry()
         fe.id(article.article_link)
         fe.link(link = {'href':self.site_url + article.article_link})
         fe.title(article.article_title)
         fe.description(article.article_subtitle or '')
         fe.author(name = article.author or '',
                 email = article.author_email or '')
         d = datetime.strptime(article.article_publish_date,'%Y-%m-%d') 
         pubdate = datetime(year = d.year, month = d.month, day = d.day,tzinfo = UTC(8))
         fe.pubdate(pubdate) 
         article.render_content_html()
         fe.content(content = article._content_html,
                 type = 'html')
     atom_feed = fg.atom_str(pretty = True)
     return atom_feed
コード例 #10
0
ファイル: feed.py プロジェクト: d2m/pub-dartlang
 def generate_feed(page=1):
     feed = FeedGenerator()
     feed.id("https://pub.dartlang.org/feed.atom")
     feed.title("Pub Packages for Dart")
     feed.link(href="https://pub.dartlang.org/", rel="alternate")
     feed.link(href="https://pub.dartlang.org/feed.atom", rel="self")
     feed.description("Last Updated Packages")
     feed.author({"name": "Dart Team"})
     i = 1
     pager = QueryPager(int(page), "/feed.atom?page=%d",
                        Package.all().order('-updated'),
                        per_page=10)
     for item in pager.get_items():
         i += 1
         entry = feed.add_entry()
         for author in item.latest_version.pubspec.authors:
             entry.author({"name": author[0]})
         entry.title("v" + item.latest_version.pubspec.get("version") +\
             " of " + item.name)
         entry.link(link={"href": item.url, "rel": "alternate",
             "title": item.name})
         entry.id(
             "https://pub.dartlang.org/packages/" + item.name + "#" +\
             item.latest_version.pubspec.get("version"))
         entry.description(
             item.latest_version.pubspec
             .get("description", "Not Available"))
         readme = item.latest_version.readme
         if not readme is None:
             entry.content(item.latest_version.readme.render(), type='html')
         else:
             entry.content("<p>No README Found</p>", type='html')
     return feed
コード例 #11
0
ファイル: views.py プロジェクト: jniebuhr/WatchPeopleCode
def podcast_feed():
    logo_url = url_for("static", filename="wpclogo_big.png", _external=True)

    fg = FeedGenerator()
    fg.load_extension('podcast')
    fg.podcast.itunes_category('Technology', 'Podcasting')
    fg.podcast.itunes_image(logo_url)
    fg.author({'name': 'Nathan Kellert', 'email': '*****@*****.**'})
    fg.link(href='http://watchpeoplecode.com/podcast_feed.xml', rel='self')
    fg.title('WPC Coders Podcast')
    fg.description('WPC Coders Podcast is a weekly peek into the lives of developers and the WatchPeopleCode community. Our goal is to keep our listeners entertained by giving them new and interesting insights into our industry as well as awesome things happening within our own community. Here, you can expect hear about some of the latest news, tools, and opportunities for developers in nearly every aread of our industry. Most importantly, we hope to have some fun and a few laughs in ways only other nerds know how.')  # NOQA

    episodes = [('ep1.mp3', 'Episode 1', datetime(2015, 02, 21, 23), 'Learn all about the WPC hosts, and where we came from in Episode 1!'),
                ('ep2.mp3', 'Episode 2', datetime(2015, 02, 28, 23), 'This week we cover your news, topics and questions in episode 2!'),
                ('ep3.mp3', 'Episode 3', datetime(2015, 03, 07, 23), "On todays podcast we talk to WatchPeopleCode's founder Alex Putilin. Hear about how the reddit search engine thousands watched him write. Also, hear the inside scoop of how WatchPeopleCode got started!"),  # NOQA
                ('ep4.mp3', 'Episode 4', datetime(2015, 03, 14, 23), "This week we talk to FreeCodeCamps Quincy Larson(http://www.freecodecamp.com) about their project that combines teaching new developers how to code and completing projects for non-profits! Lets find out how this group of streamers code with a cause!")]  # NOQA

    for epfile, eptitle, epdate, epdescription in episodes[::-1]:
        epurl = "https://s3.amazonaws.com/wpcpodcast/{}".format(epfile)
        fe = fg.add_entry()
        fe.id(epurl)
        fe.title(eptitle)
        fe.description(epdescription)
        fe.podcast.itunes_image(logo_url)
        fe.pubdate(epdate.replace(tzinfo=pytz.UTC))
        fe.enclosure(epurl, 0, 'audio/mpeg')

    return Response(response=fg.rss_str(pretty=True),
                    status=200,
                    mimetype='application/rss+xml')
コード例 #12
0
ファイル: controller.py プロジェクト: alexandreblin/tvshows
def latestRss(userID):
    userID = userID.lower()

    shows = {}
    episodes = []
    today = date.today().strftime('%Y-%m-%d')
    for showID in series.getUserShowList(userID):
        shows[showID] = series.getShowInfo(userID, showID, withEpisodes=True, onlyUnseen=True)
        episodes.extend((showID, episode) for episode in shows[showID]['episodes'] if episode['airdate'] and airdateKey(episode['airdate']) < today)

    episodes.sort(key=episodeAirdateKey, reverse=True)

    feed = FeedGenerator()
    feed.id(userID)
    feed.title('%s\'s shows' % userID)
    feed.description('Unseen episodes')
    feed.link(href=request.url_root)
    feed.language('en')

    for showID, episode in episodes:
        entry = feed.add_entry()
        entry.id('%s/%s' % (showID, episode['episode_id']))
        entry.title('%s S%02dE%02d: %s' % (shows[showID]['name'], episode['season'], episode['episode'], episode['title']))

    return feed.rss_str(pretty=True)
コード例 #13
0
ファイル: rc2pc.py プロジェクト: dmascialino/rc2pc
def write_podcast(show, podcast_dir, base_public_url, showlocal_tz):
    """Create the podcast file."""
    fg = FeedGenerator()
    fg.load_extension('podcast')

    url = "{}{}.xml".format(base_public_url, show.id)
    fg.id(url.split('.')[0])
    fg.title(show.name)
    fg.image(show.image_url)
    fg.description(show.description)
    fg.link(href=url, rel='self')

    # collect all mp3s for the given show
    all_mp3s = glob.glob(os.path.join(podcast_dir, "{}_*.mp3".format(show.id)))

    for filepath in all_mp3s:
        filename = os.path.basename(filepath)
        mp3_date = _get_date_from_mp3_path(filepath, showlocal_tz)
        mp3_size = os.stat(filepath).st_size
        mp3_url = base_public_url + filename
        mp3_id = filename.split('.')[0]
        title = "Programa del {0:%d}/{0:%m}/{0:%Y}".format(mp3_date)

        # build the rss entry
        fe = fg.add_entry()
        fe.id(mp3_id)
        fe.pubdate(mp3_date)
        fe.title(title)
        fe.enclosure(mp3_url, str(mp3_size), 'audio/mpeg')

    fg.rss_str(pretty=True)
    fg.rss_file(os.path.join(podcast_dir, '{}.xml'.format(show.id)))
コード例 #14
0
ファイル: feeder.py プロジェクト: moridius/feeder
class Feeder():
    def __init__( self, url, title='', feedURL='' ):
        scraper = None
        if url.startswith( "https://twitter.com/" ):
            scraper = TwitterScraper( url )
            if title == '':
                title = "Twitter: @" + url.split('/')[3]
        elif url.startswith( "http://www.lindwurm-linden.de/termine" ):
            scraper = LindwurmScraper( url )
            if title == '':
                title = "Lindwurm: Termine"
        else:
            raise UnsupportedService( "No scraper found for this URL." )

        self.feed = FeedGenerator()        
        self.feed.id( url )
        self.feed.title( title )
        self.feed.author( { "name": url } )

        if feedURL != '':
            self.feed.link( href=feedURL, rel='self' )

        for entry in scraper.entries:
            fe = self.feed.add_entry()
            fe.id( entry['url'] )
            fe.title( entry['title'] )
            fe.link( href=entry['url'], rel='alternate' )
            fe.content( entry['text'] )

    def GetAtom( self ):
        return self.feed.atom_str( pretty=True ).decode()
コード例 #15
0
ファイル: feed.py プロジェクト: dyeray/podtube
def get_feed(query, title, description, link, image):
    """Get an RSS feed from the results of a query to the YouTube API."""
    service = _get_youtube_client()
    videos = service.search().list(part='snippet', **query, order='date',
                                   type='video', safeSearch='none').execute()
    fg = FeedGenerator()
    fg.load_extension('podcast')
    fg.title(title)
    fg.description(description)
    fg.link(href=link, rel='alternate')
    fg.image(image)
    youtube_plugin = get_plugin_from_settings()

    for video in videos['items']:
        try:
            video_url = youtube_plugin.extract_link(
                "https://www.youtube.com/watch?v=" + video['id']['videoId'])
        except PluginException:
            continue
        fe = fg.add_entry()
        fe.id(video['id']['videoId'])
        fe.title(video['snippet']['title'])
        fe.description(video['snippet']['description'])
        fe.pubdate(dateutil.parser.parse(video['snippet']['publishedAt']))
        fe.podcast.itunes_image(video['snippet']['thumbnails']['high']['url'])
        video_info = requests.head(video_url)
        fe.enclosure(video_url, video_info.headers['Content-Length'],
                     video_info.headers['Content-Type'])
    return fg.rss_str(pretty=True)
コード例 #16
0
ファイル: generate.py プロジェクト: aaearon/lebatard-show-rss
def generate_feed(output_file, exclude_highlights=True):
    # Parse RSS feed
    d = feedparser.parse(ESPN_RSS_FEED)
    IMAGE_URL = d.feed.image["href"]

    # RSS feed generation
    fg = FeedGenerator()
    fg.load_extension("podcast", rss=True)

    ## RSS tags
    # Required
    fg.title(d.feed.title)
    fg.link(href="https://github.com/aaearon/lebatard-show-rss")
    fg.description(d.feed.description)
    # Optional
    fg.language(d.feed.language)
    fg.image(IMAGE_URL)
    fg.subtitle(d.feed.subtitle)
    # iTunes
    fg.podcast.itunes_author(d.feed.author)
    fg.podcast.itunes_category(itunes_category=d.feed.category)
    fg.podcast.itunes_image(itunes_image=IMAGE_URL)
    fg.podcast.itunes_explicit(itunes_explicit="clean")
    fg.podcast.itunes_owner(name=CONTACT["name"], email=CONTACT["email"])

    tz = pytz.timezone("America/Los_Angeles")

    for e in d.entries:

        if exclude_highlights and episode_duration_string_to_int(e["itunes_duration"]) > 3600:
            pass
        else:
            fe = fg.add_entry()

            fe.id(e.id)
            fe.title(e.title)
            fe.description(e.description)
            fe.enclosure(url=e.enclosures[0]["href"], length=e.enclosures[0]["length"], type=e.enclosures[0]["type"])

            fe.podcast.itunes_summary(e.description)
            fe.podcast.itunes_subtitle(e.description)
            fe.podcast.itunes_duration(e["itunes_duration"])

            dt = datetime.fromtimestamp(time.mktime(e.published_parsed))
            date = tz.localize(dt)

            # Local hour
            if "Show: " in e.title:
                fe.published(date)
            elif "Hour 1" in e.title:
                fe.published(date + timedelta(hours=1))
            elif "Hour 2" in e.title:
                fe.published(date + timedelta(hours=2))
            elif "Hour 3" in e.title:
                fe.published(date + timedelta(hours=3))
            else:
                fe.published(date + timedelta(hours=-1))

    fg.rss_str(pretty=True)
    fg.rss_file(output_file)
コード例 #17
0
ファイル: youtube_feed.py プロジェクト: gju/youtube-podcast
class YoutubeFeed:  
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': '%(id)s.%(ext)s',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }]
    }

    def __init__(self, name):
        self.name = name
        self.ydl = youtube_dl.YoutubeDL(self.ydl_opts)

        self.fg = FeedGenerator()
        self.fg.title(name)
        self.fg.author({"name": "Youtube Audio Feed", "email": ""})
        self.fg.link(href="http://www.foo.bar.baz.com", rel="alternate")
        self.fg.description("Personalized Youtube audio feed")
        self.fg.generator("")
        self.fg.docs("")

    def add_video(self, url):
        info = self.ydl.extract_info(url, download=True)
        entry = self.fg.add_entry()
        entry.id(info['id'])
        entry.title(info['title'])
        entry.description(info['description'])
        entry.enclosure(info['id'] + ".mp3", str(info['duration']), 'audio/mpeg')

    def save(self):
        self.fg.rss_file(name + '.xml')
コード例 #18
0
ファイル: feedTumblr.py プロジェクト: fernand0/scripts
def main():

    client = moduleSocial.connectTumblr()

    posts = client.posts('fernand0')
    
    fg = FeedGenerator()
    fg.id(posts['blog']['url'])
    fg.title(posts['blog']['title'])
    fg.author( {'name':posts['blog']['name'],'email':'*****@*****.**'} )
    fg.link( href=posts['blog']['url'], rel='alternate' )
    fg.subtitle('Alternate feed due to Tumblr GDPR restrictions')
    fg.language('en')

    print(len(posts['posts']))
    for i in range(len(posts['posts'])):
        fe = fg.add_entry()
        print(posts['posts'][i]['post_url'])
        if 'title' in posts['posts'][i]:
            title = posts['posts'][i]['title']
            print('T', posts['posts'][i]['title'])
        else:
            title = posts['posts'][i]['summary'].split('\n')[0]
            print('S', posts['posts'][i]['summary'].split('\n')[0])
        fe.title(title)
        fe.link(href=posts['posts'][i]['post_url'])
        fe.id(posts['posts'][i]['post_url'])

    print(fg.atom_file('/var/www/html/elmundoesimperfecto/tumblr.xml'))

    sys.exit()
コード例 #19
0
ファイル: blog.py プロジェクト: matrufsc2/matrufsc2
def get_feed(atom=False):
    fg = FeedGenerator()
    domain = get_domain()
    items = get_posts({"limit": "10"}, full=True)["results"]
    fg.id("http://%s/"%domain)
    fg.title("Blog do MatrUFSC2")
    fg.description("Feed do blog do MatrUFSC2, onde noticias e novos recursos sao anunciados primeiro!")
    fg.language('pt-BR')
    fg.link({"href":"/blog/feed","rel":"self"})
    fg.updated(items[0]["posted_at"].replace(tzinfo=pytz.UTC))
    for item in items:
        entry = fg.add_entry()
        entry.title(item["title"])

        tree = html.fromstring(item["summary"])
        cleaner = Cleaner(allow_tags=[])
        tree = cleaner.clean_html(tree)

        text = tree.text_content()
        entry.description(text, True)
        entry.link({"href":item["link"],"rel":"self"})
        entry.content(item["body"])
        entry.published(item["posted_at"].replace(tzinfo=pytz.UTC))
        entry.updated(item["posted_at"].replace(tzinfo=pytz.UTC))
        entry.category({"label": item["category"]["title"], "term": item["category"]["slug"]})
        entry.id(item["id"])
    if atom:
        return fg.atom_str(pretty=True)
    else:
        return fg.rss_str(pretty=True)
コード例 #20
0
ファイル: main.py プロジェクト: dgomes/imap2rss
	def GET(self):
		cherrypy.response.headers["Access-Control-Allow-Origin"] = "*"
		fg = FeedGenerator()
		#TODO create icon
		# fg.icon('http://www.det.ua.pt')
		fg.id(config.get('rss','id'))
		fg.title(config.get('rss','title'))
		fg.subtitle(config.get('rss','subtitle'))
		fg.description(config.get('rss','description'))
		fg.author({'name': config.get('rss','author_name'), 'email':config.get('rss','author_email')})
		fg.language(config.get('rss','language'))
		fg.link(href=config.get('rss','href'), rel='related')

		client = EmailClient()

		for msgn in reversed(client.listBox(config.get('imap','mailbox'))[:config.getint('rss','maxitems')]):
			cherrypy.log("RSS Entry: "+msgn)
			em = client.getEMail(msgn)
			entry = fg.add_entry()
			entry.title(em['subject'])
			entry.author({'name': em['From']['name'], 'email': em['From']['email']})
			entry.guid(config.get("main","baseurl")+'news/'+msgn)
			entry.link({'href':config.get("main","baseurl")+'news/'+msgn, 'rel':'alternate'})
			entry.pubdate(em['date'])
			entry.content(em['body'])
		return	fg.rss_str(pretty=True)
コード例 #21
0
ファイル: bassdrive.py プロジェクト: bspeice/elektricity
    def build_feed(self):
        "Build the feed given our existing URL"
        # Get all the episodes
        page_content = str(requests.get(self.url).content)
        parser = BassdriveParser()
        parser.feed(page_content)
        links = parser.get_links()

        # And turn them into something usable
        fg = FeedGenerator()
        fg.id(self.url)
        fg.title(self.title)
        fg.description(self.title)
        fg.author({'name': self.dj})
        fg.language('en')
        fg.link({'href': self.url, 'rel': 'alternate'})
        fg.logo(self.logo)

        for link in links:
            fe = fg.add_entry()
            fe.author({'name': self.dj})
            fe.title(link[0])
            fe.description(link[0])
            fe.enclosure(self.url + link[1], 0, 'audio/mpeg')

            # Bassdrive always uses date strings of
            # [yyyy.mm.dd] with 0 padding on days and months,
            # so that makes our lives easy
            date_start = link[0].find('[')
            date_str = link[0][date_start:date_start+12]
            published = datetime.strptime(date_str, '[%Y.%m.%d]')
            fe.pubdate(UTC.localize(published))
            fe.guid((link[0]))

        return fg
コード例 #22
0
ファイル: main.py プロジェクト: ryotarai/github_trends_rss
def generate_rss(language, since):
    url = "{0}?since={1}".format(language["url"], since)
    file_name = "github_trends_{0}_{1}.rss".format(language["key"], since)
    title = "GitHub Trends - {0} - {1}".format(language["name"], since.capitalize())

    print(url)
    page = requests.get(url)
    tree = html.fromstring(page.content)
    lis = tree.cssselect("ol.repo-list li")

    fg = FeedGenerator()
    fg.title(title)
    fg.link(href="http://github-trends.ryotarai.info/rss/{0}".format(file_name))
    fg.description(title)
    index = 1
    for li in lis:
        a = li.cssselect("h3 a")[0]
        description = ""
        ps = li.cssselect("p")
        if len(ps) > 0:
            description = ps[0].text_content().strip()

        fe = fg.add_entry()
        fe.link(href="https://github.com{0}".format(a.get("href")))
        fe.title("{0} (#{1} - {2} - {3})".format(
            a.text_content().strip().replace(" / ", "/"),
            index,
            language["name"],
            since.capitalize(),
        ))
        fe.description(description)
        index += 1
    rssfeed = fg.rss_str(pretty=True)
    s3.Object(bucket, 'rss/{0}'.format(file_name)).put(Body=rssfeed, ContentType="application/xml")
コード例 #23
0
ファイル: rssbook.py プロジェクト: histrio/rssutils
def run(folder, url):
    from feedgen.feed import FeedGenerator

    fg = FeedGenerator()

    head, tail = os.path.split(folder)

    title = tail.decode("utf-8")
    fg.id(str(uuid.uuid4()))
    fg.title(title)

    fg.link(href="{0}/rss.xml".format(url), rel="self")
    fg.description(u"Audiobook `{0}` generated with rssbook".format(title))

    fg.load_extension("podcast")
    for item in sorted(os.listdir(folder)):
        if os.path.splitext(item)[1] == ".mp3":
            get_node(os.path.join(folder, item))

        fullpath = os.path.join(folder, item)
        fe = fg.add_entry()
        fe.id(str(uuid.uuid4()))
        fe.title(title)
        fe.description(item)

        fe.link(
            href="{0}/{1}".format(url, item), rel="enclosure", type="audio/mpeg", length=str(os.stat(fullpath).st_size)
        )

    fg.rss_file(os.path.join(folder, "rss.xml"))
コード例 #24
0
def _filter_fb_rss_feeed(url):
    parsed_feed = feedparser.parse(url)
    filtered_entries = filter(
        lambda x: ' shared a link: "' in x.title, parsed_feed.entries)

    fg = FeedGenerator()
    fg.id('https://fb-notifications-to-pocket.herokuapp.com/')
    fg.title('Facebook Notifications to Pocket')
    fg.author({'name': 'Pankaj Singh', 'email': '*****@*****.**'})
    fg.description(
        '''Filter FB notifications which contain a link and generate a new rss feed which will be used by IFTTT''')
    fg.link(href='https://fb-notifications-to-pocket.herokuapp.com/')

    for entry in filtered_entries:
        root = etree.HTML(entry.summary_detail.value)
        title = entry.title.split(" shared a link: ")[1].strip()[1:-2]
        author_name = entry.title.split(" shared a link: ")[0].strip()
        url = urlparse.parse_qs(
            urlparse.urlparse(root.findall(".//a")[-1].attrib["href"]).query)["u"][0]

        title = get_title_for_url(url) or title

        fe = fg.add_entry()
        fe.id(entry.id)
        fe.link(href=url)
        fe.published(entry.published)
        fe.author({'name': author_name})
        fe.title(title)

    return fg.atom_str(pretty=True)
コード例 #25
0
ファイル: blog.py プロジェクト: ivanm11/dataflytest
def rss():    
    config = public_app.config['feed']
    fg = FeedGenerator()
    fg.id('%s/blog' % Config.BASE_URL)
    fg.title(config['title'])
    fg.author( {'name': config['author'],'email': config['email']} )
    fg.description(config['desc'])
    fg.link( href=Config.BASE_URL, rel='alternate' )
    query = {
        'id': { '$regex': 'blog' },
        'current': True,
        'meta.hide': { '$ne': True }
    }
    posts = db.pages.find(query).sort('meta.created', -1)[:20]
    for post in posts:
        fe = fg.add_entry()
        fe.title(post['meta']['title'])
        if 'author' in post['meta']:
            fe.author( {'name': post['meta']['author'],'email': config['email']} )
        else:
            fe.author( {'name': config['author'],'email': config['email']} )
        fe.description(do_truncate(post['content'], 300))
        fe.link(href="%s/%s" % (Config.BASE_URL, post['id']), rel='alternate')
        fe.pubdate(utc.localize(post['meta']['created']))
        fe.content(post['content'])    
    response.headers['Content-Type'] = 'application/rss+xml'
    return fg.rss_str(pretty=True)
コード例 #26
0
ファイル: technowatch.py プロジェクト: TheBlusky/technowatch
def build():
    global fg
    fg = FeedGenerator()
    fg.title(parser.get('technowatch', 'name'))
    fg.language('en')
    fg.description(parser.get('technowatch', 'name'))
    fg.link(href=parser.get('technowatch', 'link'), rel='alternate')
    # Cleaning stories if too much
    if len(known_stories) > int(parser.get('technowatch', 'cache_max')):
        clean()
    # Sorting stories by crawled date
    for item in sorted(known_stories.values(), key=operator.itemgetter('crawledDate'), reverse=True):
        fe = fg.add_entry()
        fe.link(href=item['url'], rel='alternate')
        fe.title("[" + item['type'] + "] " + item['title'])
        fe.category({
            'label': item['type'],
            'term': item['type']
        })
        fe.author({'name': item['by']})
        fe.description(item['desc'])
        fe.pubdate(item['crawledDate'])
    # Caching RSS building
    pickle.dump(known_stories, open(cust_path + "/technowatch.data", "wb"))
    if parser.get('wsgi', 'activated') == "True":
        fg.rss_file(cust_path + '/static/rss.xml')
    if parser.get('ftp', 'activated') == "True":
        upload()
コード例 #27
0
def feed(column_id):
    api = Api(column_id)

    with request.urlopen(api.info) as stream:
        result = stream.read().decode('utf-8')

    if not result:
        return '', 404

    info = json.loads(result)

    with request.urlopen(api.posts) as stream:
        result = stream.read().decode('utf-8')
        entries = json.loads(result)

    fg = FeedGenerator()
    fg.id(str(entries[0]['slug']))
    fg.title(info['name'])
    fg.language('zh_CN')
    fg.icon(info['avatar']['template'].replace('{id}', info['avatar']['id']).replace('{size}', 's'))
    fg.logo(info['avatar']['template'].replace('{id}', info['avatar']['id']).replace('{size}', 'l'))
    fg.description(info['intro'])
    fg.author(dict(name=info['creator']['name']))
    fg.link(href=api.base_url + info['url'], rel='alternate')
    for entry in entries:
        fe = fg.add_entry()
        fe.id(entry['url'])
        fe.title(entry['title'])
        fe.published(entry['publishedTime'])
        fe.updated(entry['publishedTime'])
        fe.author(dict(name=entry['author']['name']))
        fe.link(href=api.base_url + entry['url'], rel='alternate')
        fe.content(entry['content'])

    return fg.atom_str(pretty=True)
コード例 #28
0
ファイル: main.py プロジェクト: crgwbr/wt-podcast
    def export_feed(self, output):
        fg = FeedGenerator()
        fg.load_extension('podcast')
        fg.podcast.itunes_category('Religion & Spirituality', 'Christianity')
        fg.podcast.itunes_image("%s/icon.png" % URL_BASE)

        fg.title('JW.ORG Magazines')
        fg.description('Combined Feed of Watchtower (public), Watchtower (study), and Awake! in English from jw.org.')
        fg.link(href="%s/%s" % (URL_BASE, output), rel='self')

        manifest = self._load()
        entries = []
        for lang, mnemonics in manifest.items():
            for mnemonic, issues in mnemonics.items():
                for issue, data in issues.items():
                    entries.append((issue, data))

        for issue, entry in sorted(entries, key=lambda i: i[0], reverse=True):
            fe = fg.add_entry()

            fe.id( entry['hash'] )
            fe.title( entry['title'] )
            fe.description( entry['title'] )
            fe.published( pytz.utc.localize( entry['created_on'] ) )
            url = "%s/%s" % (URL_BASE, os.path.basename(entry['file']))
            mime = 'audio/mpeg'
            fe.enclosure(url, str(entry['duration']), mime)
            fe.link(href=url, type=mime)
        fg.rss_str(pretty=True)
        fg.rss_file(os.path.join(CACHE_DIR, output))
コード例 #29
0
ファイル: using_api.py プロジェクト: WarmongeR1/vk2rss
def main():
    session = vk.Session()
    api = vk.API(session)

    group_id = '96469126'

    group_info = api.groups.getById(group_ids=group_id, fields=['description', 'site', 'name', 'photo', 'gid'])

    assert len(group_info) == 1
    group_info = group_info[0]

    url = 'http://vk.com/club{}'.format(group_info['gid'])
    # a = api.wall.get(owner_id=-1 * group_info['gid'])
    #
    # with open('out', 'wb') as fio:
    #     pickle.dump(a, fio)

    with open('out', 'rb') as fio:
        data = pickle.loads(fio.read())

    assert len(data) > 1

    fg = FeedGenerator()
    fg.id(url)
    fg.title(_(group_info['name']))
    fg.description(_(group_info['description']))
    fg.logo(group_info['photo'])
    site_url = group_info.get('site', url) if group_info.get('site', url) else url
    fg.link(href=_(site_url))
    fg.link(href=_(site_url), rel='self')
    fg.link(href=_(site_url), rel='alternate')
    fg.author({'name': 'Alexander Sapronov', 'email': '*****@*****.**'})
    fg.webMaster('[email protected] (Alexander Sapronov)')

    pat = re.compile(r"#(\w+)")

    for x in data[1:]:
        post_link = "{}?w=wall-{}_{}".format(url, group_info['gid'], x['id'])
        e = fg.add_entry()
        # text = x.get('text', '').replace('<br>', '\n')
        text = x.get('text', '')

        e.description(_(text))
        e.author({'name': _(get_author_name(api, x.get('from_id')))})
        e.id(post_link)
        e.link(href=_(post_link))
        e.link(href=_(post_link), rel='alternate')

        tags = pat.findall(text)

        title = x.get('text', '')
        for tag in tags:
            e.category(term=_(tag))
            title = title.replace('#{}'.format(tag), '')

        title = re.sub('<[^<]+?>', ' ', title)
        title = textwrap.wrap(title, width=80)[0]
        e.title(_(title.strip()))

    fg.rss_file('rss.xml')
コード例 #30
0
ファイル: slack.py プロジェクト: Konubinix/Devel
def rss(conversation,
        url,
        author_name,
        author_email,
        title,
        subtitle,
        language,
        output_path):
    """Export all the links of the conversation in a simple RSS feed"""
    from feedgen.feed import FeedGenerator
    fg = FeedGenerator()
    fg.id(url)
    fg.title(title)
    fg.author(
        {
            'name': author_name,
            'email': author_email,
        }
    )
    fg.link(
        href=url,
        rel='alternate'
    )
    if subtitle:
        fg.subtitle(subtitle)
    fg.language(language)
    for message in conversation.history():
        match = re.search(
            "^.*<(?P<url>[^>|]+)\|?(?P<title>[^>]+)?>.*$",
            message.data["text"],
            flags=re.MULTILINE
        )
        if match is not None:
            fe = fg.add_entry()
            link = match.group("url")
            title = match.group("title") or link
            date = naive_to_local(datetime.datetime.fromtimestamp(float(message.data["ts"])))
            description = message.data["text"]
            if "attachments" in message.data:
                attachment = [a for a in message.data["attachments"] if
                              a["title_link"] == link][0]
                title += " | " + attachment["title"]
                description += """

""" + attachment["text"]
            fe.id(link)
            fe.title(title)
            fe.link(href=link)
            fe.published(date)
            user = config.slack.get_user(message.data["user"])
            author = {
                "name": message.data["username"],
                "email": user.email or "noemail",
            }
            fe.author(author)
            fe.description(description)
    fg.rss_file(output_path, pretty=True)
コード例 #31
0
ファイル: wrfpodcast.py プロジェクト: canada4663/wrf-podcast
def autogen(*args):
    # Setup Feed
    feed = FeedGenerator()
    feed.id('http://www.whiterock.org/sermons/')
    feed.title('White Rock Fellowship Sermon Podcast')
    feed.description(WRF_DESCRIPTION)
    feed.link(href='https://s3-us-west-1.amazonaws.com/wrf-autogen/wrf-podcast.rss')
    feed.language('en-US')

    user_agent = {'User-agent': 'Mozilla/5.0'}
    html = requests.get(WRF_WEBSITE, headers=user_agent).text
    soup = bs4.BeautifulSoup(html, convertEntities=bs4.BeautifulSoup.HTML_ENTITIES)

    feed.load_extension('podcast')

    feed.podcast.itunes_subtitle('Sermon Recordings from White Rock Followship Dallas, TX')
    feed.podcast.itunes_category('Religion & Spirituality', 'Christianity')
    feed.podcast.itunes_author('White Rock Fellowship')
    feed.podcast.itunes_explicit('no')
    feed.podcast.itunes_owner('Ryan Hoium', '*****@*****.**')
    feed.podcast.itunes_summary(WRF_DESCRIPTION)
    feed.podcast.itunes_subtitle('White Rock Fellowship Sermon Podcast')
    feed.podcast.itunes_image(WRF_LOGO)

    series_pagelinks =[]
    series_sidebar = soup.find('div', {'id':'custom_category-2'})
    for series in series_sidebar.findAll('a'):
        series_link = series['href']
        series_name = series.text
        series_html = requests.get(series_link, headers=user_agent).text
        series_soup = bs4.BeautifulSoup(series_html, 
                                        convertEntities=bs4.BeautifulSoup.HTML_ENTITIES)
        pagination = series_soup.find('ul', {'class':'pagination'})
        series_pagelinks.append({'series_name': series_name,
                                 'series_link': series_link})
        if pagination is not None:
            for series_page in pagination.findAll('a', {'class':''}):
                series_pagelinks.append({'series_name': series_name,
                                         'series_link': series_page['href']})

    postings =[]
    for series_page in series_pagelinks:
        series_link = series_page['series_link']
        series_name = series_page['series_name']
        series_html = requests.get(series_link, headers=user_agent).text
        series_soup = bs4.BeautifulSoup(series_html, convertEntities=bs4.BeautifulSoup.HTML_ENTITIES)
        for posting in series_soup.findAll('article', {'class':'post sermon'}):
            for link in posting.findAll('a', {'data-original-title':'Audio'}):
                try:
                    inner_html = requests.get(link['href'], headers=user_agent).text
                    inner_soup = bs4.BeautifulSoup(inner_html, convertEntities=bs4.BeautifulSoup.HTML_ENTITIES)
                    title = series_name + ' : ' + inner_soup.find('h2', {'class':'post-title'}).text
                    download_link = inner_soup.find('a', {'data-original-title':'Download Audio'})['href']
                    download_link = download_link.split('=')[1]
                    length = requests.head(download_link).headers.get('content-length', None)
                    staff_data = inner_soup.find('div', {'class':'staff-data'}).text
                    author = staff_data.split(' on ')[0]
                    author = author.split('by ')[1]
                    date = staff_data.split(' on ')[1]
                    date = datetime.datetime.strptime(date,'%B %d, %Y') #.strftime('%m/%d/%Y')
                    date = central_tz.localize(datetime.datetime.combine(date,datetime.time.min))
                except:
                    print "Error processing: ", link
                    continue
                try:
                    entry = feed.add_entry()
                    entry.id(download_link)
                    entry.title(title + " by " + author)
                    entry.enclosure(download_link, length, 'audio/mpeg')
                    entry.podcast.itunes_author(itunes_author=author)
                    entry.pubdate(pubDate=date)
                except:
                    print "Error Generating Feed Entry: ", title
                    continue
                postings.append({'title':title, 
                                 'download_link':download_link,
                                 'date': date,
                                 'author': staff_data})

    rssString = feed.rss_str(pretty=True)
    s3 = boto.connect_s3()
    wrf = s3.get_bucket('wrf-autogen')
    s3key = wrf.get_key('wrf-podcast.rss')
    s3key.set_contents_from_string(rssString)
    s3key.make_public()
コード例 #32
0
def get_rss():
    zen_url = request.args.get('url')
    # set telegram instant view rhash if available
    tg_rhash = request.args.get('tg_rhash')

    limit_description = request.args.get('limit_description', type=int)

    if not zen_url:
        return 'url (?url=https://zen.yandex.ru/media/.../) must be set'
    parsed_url = urlparse(zen_url)
    if parsed_url.netloc != 'zen.yandex.ru':
        return 'Domain must be zen.yandex.ru'

    # validate tg_rhash
    if tg_rhash and not re.match(r'^[a-fA-F\d]+$', tg_rhash):
        return 'Invalid tg_rhash. Please, check rhash value from instant view template'

    if not re.match(r'^/(media/)?(id/[\da-f]+|[a-z\d_]+)/?$', parsed_url.path):
        return 'Url is unsupported. Supported formats:<br>' \
               '• https://zen.yandex.ru/media/id/01234567890abcdef0123456 <br>' \
               '• https://zen.yandex.ru/media/nickname'

    resp = requests.get(
        zen_url, headers={'User-Agent': 'TelegramBot (like TwitterBot)'})
    doc = fromstring(resp.text)

    try:
        text = re.search(
            r'{.+}',
            doc.xpath('.//script[contains(text(), "window.__SERVER_STATE__")]')
            [0].text)[0]
        json_data = json.loads(text)
    except:
        return abort(404)

    items = json_data['feed'].get('items')
    items_order = json_data['feed'].get('itemsOrder')
    publisher = next(iter(json_data.get('sources').values()))

    feed = FeedGenerator()
    feed.id('http://zen.yandex.ru/')
    feed.title(publisher.get('title'))
    feed.subtitle(publisher.get('description').strip())
    feed.language('ru')
    feed.author({'name': '-', 'email': '-'})
    feed.link(href=zen_url, rel='alternate')
    try:
        image_logo_url = publisher.get('logo')
        feed.logo(image_logo_url)
    except:
        pass

    for oItem in items_order:
        item = items.get(oItem)
        if item.get('type') != 'card':
            continue

        entry = feed.add_entry()

        entry.title(item.get('title').strip())

        entry.description(item.get('text').strip()[:limit_description])

        if item.get('image'):
            item_image_url = item.get('image')
            entry.enclosure(url=item_image_url,
                            type='image/webp',
                            length='2048')

        entry_url = item.get('link').split('?')[0]
        # convert to instant view link if tg hash is provided
        if tg_rhash:
            # write original url into author field
            entry.author({'name': '', 'email': entry_url})
            entry.link({
                'href':
                TG_URL.format(url=quote_plus(entry_url), rhash=tg_rhash)
            })

        else:
            entry.link({'href': entry_url})

        try:
            entry.pubdate(
                dateparser.parse(item.get('creationTime'),
                                 settings={'RETURN_AS_TIMEZONE_AWARE': True}))
        except:
            pass

    rss_response = Response(feed.rss_str(pretty=True))
    rss_response.headers.set('Content-Type',
                             'application/rss+xml; charset=utf-8')

    return rss_response
コード例 #33
0
def lambda_handler(event, context):
    # obtain all entries in database
    response = table.scan(
        FilterExpression=Attr('episode_int').gte(1)
    )

    # save object with the items themselves
    items = response['Items']
    #print(items)
    
    items_sorted = sorted(items, key = lambda i: i['episode_int'])

    # set up overall feed metadata
    fg = FeedGenerator()

    # general feed params
    fg.id('https://r-podcast.org')
    fg.title('Residual Snippets')
    fg.author( {'name':'Eric Nantz', 'email':'*****@*****.**'})
    fg.link(href='https://r-podcast.org', rel='alternate' )
    fg.logo(LOGO_URL)
    fg.subtitle('Musings on R, data science, linux, and life')
    fg.link( href=RSS_URL, rel='self')
    fg.language('en')

    fg.load_extension('podcast')

    # podcast-specific params
    fg.podcast.itunes_category('Technology')
    fg.podcast.itunes_author('Eric Nantz')
    fg.podcast.itunes_explicit('no')
    fg.podcast.itunes_owner('Eric Nantz', '*****@*****.**')
    fg.podcast.itunes_summary('Residual Snippets is an informal, unedited, and free-flowing audio podcast from Eric Nantz.  If you enjoy hearing quick takes from a data scientist on their journey to blend innovative uses of open-source technology, contributing back to their brilliant communities, and juggling the curveballs life throws at them, this podcast is for you!')
    
    for x in range(len(items_sorted)):
        #print(items[x])
        fe = fg.add_entry()
        fe.title(items_sorted[x]['episode_title'])
        fe.author( {'name':'Eric Nantz', 'email':'*****@*****.**'} )
        fe.enclosure(url=items_sorted[x]['episode_url'], type = 'audio/mpeg')

        # process description before adding to feed
        ep_desc = create_summary(items_sorted[x]['episode_summary'])
        #fe.description(items_sorted[x]['episode_summary'])
        fe.description(ep_desc)
 
    # populate xml file for RSS feed    
    feed_string = fg.rss_str(pretty=True)
    fg.rss_file('/tmp/residual_snippets.xml', pretty=True)
    
    # upload xml feed to pcloud and s3
    pc = PyCloud(PCLOUD_USERNAME, PCLOUD_PASS)
    pc.uploadfile(data = feed_string, filename='residual_snippets.xml', folderid=PCLOUD_FOLDER_ID)

    #upload_file("/tmp/residual_snippets.xml", BUCKET_NAME, object_name = 'residual_snippets.xml')
    s3_client.upload_file("/tmp/residual_snippets.xml", BUCKET_NAME, 'residual_snippets.xml')
    
    # create export of dynamodb and upload to s3
    # obtain all entries in database
    response2 = table.scan(
        FilterExpression=Attr('episode_int').gte(1)
    )

    # save object with the items themselves
    items2 = response2['Items']

    items2_sorted = sorted(items2, key = lambda i: i['episode_int'])

    db_export = "/tmp/dbexport.json"
    f = open(db_export, "w")
    f.write(json.dumps(items2_sorted, indent=2, default=decimal_default))
    f.close()
    
    # upload to s3 bucket
    success = s3_client.upload_file(db_export, BUCKET_NAME, 'dbexport.json')
    
    return {
        'statusCode': 200,
        'body': json.dumps('Hello from Lambda!')
    }
コード例 #34
0
ファイル: podtube.py プロジェクト: kaesi0/PodTube
 def get(self, channel):
     channel = channel.split('/')
     if len(channel) < 2:
         channel.append('video')
     channel_name = ['/'.join(channel)]
     self.set_header('Content-type', 'application/rss+xml')
     if channel_name[0] in channel_feed and channel_feed[
             channel_name[0]]['expire'] > datetime.datetime.now():
         self.write(channel_feed[channel_name[0]]['feed'])
         self.finish()
         return
     fg = None
     video = None
     calls = 0
     response = {'nextPageToken': ''}
     while 'nextPageToken' in response.keys():
         next_page = response['nextPageToken']
         payload = {
             'part': 'snippet,contentDetails',
             'maxResults': 50,
             'channelId': channel[0],
             'key': key,
             'pageToken': next_page
         }
         request = requests.get(
             'https://www.googleapis.com/youtube/v3/activities',
             params=payload)
         calls += 1
         if request.status_code != 200:
             payload = {
                 'part': 'snippet',
                 'maxResults': 1,
                 'forUsername': channel[0],
                 'key': key
             }
             request = requests.get(
                 'https://www.googleapis.com/youtube/v3/channels',
                 params=payload)
             response = request.json()
             channel[0] = response['items'][0]['id']
             channel_name.append('/'.join(channel))
             payload = {
                 'part': 'snippet,contentDetails',
                 'maxResults': 50,
                 'channelId': channel[0],
                 'key': key,
                 'pageToken': next_page
             }
             request = requests.get(
                 'https://www.googleapis.com/youtube/v3/activities',
                 params=payload)
             calls += 2
         response = request.json()
         if request.status_code == 200:
             logging.debug('Downloaded Channel Information')
         else:
             logging.error('Error Downloading Channel: %s', request.reason)
             self.send_error(reason='Error Downloading Channel')
             return
         if not fg:
             fg = FeedGenerator()
             fg.load_extension('podcast')
             fg.generator('PodTube (python-feedgen)', __version__,
                          'https://github.com/aquacash5/PodTube')
             for item in response['items']:
                 if item['snippet']['type'] != 'upload':
                     continue
                 elif 'Private' in item['snippet']['title']:
                     continue
                 else:
                     snippet = item['snippet']
                     break
             logging.info('Channel: %s (%s)', channel[0],
                          snippet['channelTitle'])
             icon = max(snippet['thumbnails'],
                        key=lambda x: snippet['thumbnails'][x]['width'])
             fg.title(snippet['channelTitle'])
             fg.id('http://' + self.request.host + self.request.uri)
             fg.description(snippet['description'] or ' ')
             fg.author(name='Podtube',
                       email='*****@*****.**',
                       uri='https://github.com/aquacash5/PodTube')
             fg.podcast.itunes_author(snippet['channelTitle'])
             fg.image(snippet['thumbnails'][icon]['url'])
             fg.link(href=f'http://youtube.com/channel/{channel}',
                     rel='self')
             fg.language('en-US')
             fg.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
             fg.podcast.itunes_explicit('no')
             fg.podcast.itunes_owner(name='Podtube',
                                     email='*****@*****.**')
             fg.podcast.itunes_summary(snippet['description'])
             fg.podcast.itunes_category(cat='Technology')
             fg.updated(str(datetime.datetime.utcnow()) + 'Z')
         for item in response['items']:
             snippet = item['snippet']
             if snippet['type'] != 'upload':
                 continue
             if 'private' in snippet['title'].lower():
                 continue
             current_video = item['contentDetails']['upload']['videoId']
             logging.debug('ChannelVideo: %s (%s)', current_video,
                           snippet['title'])
             fe = fg.add_entry()
             fe.title(snippet['title'])
             fe.id(current_video)
             icon = max(snippet['thumbnails'],
                        key=lambda x: snippet['thumbnails'][x]['width'])
             fe.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
             fe.updated(snippet['publishedAt'])
             if channel[1] == 'video':
                 fe.enclosure(
                     url=f'http://{self.request.host}/video/{current_video}',
                     type="video/mp4")
             elif channel[1] == 'audio':
                 fe.enclosure(
                     url=f'http://{self.request.host}/audio/{current_video}',
                     type="audio/mpeg")
             fe.author(name=snippet['channelTitle'])
             fe.podcast.itunes_author(snippet['channelTitle'])
             fe.pubDate(snippet['publishedAt'])
             fe.link(href=f'http://www.youtube.com/watch?v={current_video}',
                     title=snippet['title'])
             fe.podcast.itunes_summary(snippet['description'])
             fe.description(snippet['description'])
             if not video or video['expire'] < fe.pubDate():
                 video = {'video': fe.id(), 'expire': fe.pubDate()}
     feed = {
         'feed': fg.rss_str(),
         'expire': datetime.datetime.now() + datetime.timedelta(hours=calls)
     }
     for chan in channel_name:
         channel_feed[chan] = feed
     self.write(feed['feed'])
     self.finish()
     video = video['video']
     mp3_file = 'audio/{}.mp3'.format(video)
     if channel[1] == 'audio' and not os.path.exists(
             mp3_file) and video not in conversion_queue.keys():
         conversion_queue[video] = {
             'status': False,
             'added': datetime.datetime.now()
         }
コード例 #35
0
from datetime import datetime

import pytz
from algoliasearch.search_client import SearchClient
from feedgen.feed import FeedGenerator

if __name__ == '__main__':
    client = SearchClient.create(os.environ['APP_ID'], os.environ['API_KEY'])
    index = client.init_index('interviews_publishedAt_desc')
    articles = index.search('')['hits']

    fg = FeedGenerator()
    fg.title('IH Interviews')
    fg.id('ih-interviews-20201123-205642')
    pubs = []
    for article in articles:
        pub = datetime.fromtimestamp(article['publishedAt'] /
                                     1000).replace(tzinfo=pytz.timezone('UTC'))
        pubs.append(pub)
        fe = fg.add_entry()
        fe.id(article['interviewId'])
        fe.published(pub)
        fe.pubDate(pub)
        fe.updated(pub)
        fe.title(article['title'])
        fe.link(
            href=
            f"https://www.indiehackers.com/interview/{article['interviewId']}")
    fg.updated(max(pubs))
    print(fg.atom_str(pretty=True).decode())
コード例 #36
0
def main():
    if len(sys.argv) != 2 or not (
            sys.argv[1].endswith('rss') or sys.argv[1].endswith('atom')
            or sys.argv[1] == 'torrent' or sys.argv[1] == 'podcast'):
        print(USAGE)
        exit()

    arg = sys.argv[1]

    fg = FeedGenerator()
    fg.id('http://lernfunk.de/_MEDIAID_123')
    fg.title('Testfeed')
    fg.author({'name': 'Lars Kiesow', 'email': '*****@*****.**'})
    fg.link(href='http://example.com', rel='alternate')
    fg.category(term='test')
    fg.contributor(name='Lars Kiesow', email='*****@*****.**')
    fg.contributor(name='John Doe', email='*****@*****.**')
    fg.icon('http://ex.com/icon.jpg')
    fg.logo('http://ex.com/logo.jpg')
    fg.rights('cc-by')
    fg.subtitle('This is a cool feed!')
    fg.link(href='http://larskiesow.de/test.atom', rel='self')
    fg.language('de')
    fe = fg.add_entry()
    fe.id('http://lernfunk.de/_MEDIAID_123#1')
    fe.title('First Element')
    fe.content(
        '''Lorem ipsum dolor sit amet, consectetur adipiscing elit. Tamen
            aberramus a proposito, et, ne longius, prorsus, inquam, Piso, si
            ista mala sunt, placet. Aut etiam, ut vestitum, sic sententiam
            habeas aliam domesticam, aliam forensem, ut in fronte ostentatio
            sit, intus veritas occultetur? Cum id fugiunt, re eadem defendunt,
            quae Peripatetici, verba.''')
    fe.summary(u'Lorem ipsum dolor sit amet, consectetur adipiscing elit…')
    fe.link(href='http://example.com', rel='alternate')
    fe.author(name='Lars Kiesow', email='*****@*****.**')

    if arg == 'atom':
        print_enc(fg.atom_str(pretty=True))
    elif arg == 'rss':
        print_enc(fg.rss_str(pretty=True))
    elif arg == 'podcast':
        # Load the podcast extension. It will automatically be loaded for all
        # entries in the feed, too. Thus also for our “fe”.
        fg.load_extension('podcast')
        fg.podcast.itunes_author('Lars Kiesow')
        fg.podcast.itunes_category('Technology', 'Podcasting')
        fg.podcast.itunes_explicit('no')
        fg.podcast.itunes_complete('no')
        fg.podcast.itunes_new_feed_url('http://example.com/new-feed.rss')
        fg.podcast.itunes_owner('John Doe', '*****@*****.**')
        fg.podcast.itunes_summary('Lorem ipsum dolor sit amet, consectetur ' +
                                  'adipiscing elit. Verba tu fingas et ea ' +
                                  'dicas, quae non sentias?')
        fe.podcast.itunes_author('Lars Kiesow')
        print_enc(fg.rss_str(pretty=True))

    elif arg == 'torrent':
        fg.load_extension('torrent')
        fe.link(href='http://example.com/torrent/debian-8-netint.iso.torrent',
                rel='alternate',
                type='application/x-bittorrent, length=1000')
        fe.torrent.filename('debian-8.4.0-i386-netint.iso.torrent')
        fe.torrent.infohash('7661229811ef32014879ceedcdf4a48f256c88ba')
        fe.torrent.contentlength('331350016')
        fe.torrent.seeds('789')
        fe.torrent.peers('456')
        fe.torrent.verified('123')
        print_enc(fg.rss_str(pretty=True))

    elif arg.startswith('dc.'):
        fg.load_extension('dc')
        fg.dc.dc_contributor('Lars Kiesow')
        if arg.endswith('.atom'):
            print_enc(fg.atom_str(pretty=True))
        else:
            print_enc(fg.rss_str(pretty=True))

    elif arg.startswith('syndication'):
        fg.load_extension('syndication')
        fg.syndication.update_period('daily')
        fg.syndication.update_frequency(2)
        fg.syndication.update_base('2000-01-01T12:00+00:00')
        if arg.endswith('.rss'):
            print_enc(fg.rss_str(pretty=True))
        else:
            print_enc(fg.atom_str(pretty=True))

    elif arg.endswith('atom'):
        fg.atom_file(arg)

    elif arg.endswith('rss'):
        fg.rss_file(arg)
コード例 #37
0
    def index():
        limit_tag = request.args.get('tag')
        pause_uuid = request.args.get('pause')

        if pause_uuid:
            try:
                datastore.data['watching'][pause_uuid]['paused'] ^= True
                datastore.needs_write = True

                return redirect(url_for('index', tag=limit_tag))
            except KeyError:
                pass

        # Sort by last_changed and add the uuid which is usually the key..
        sorted_watches = []
        for uuid, watch in datastore.data['watching'].items():

            if limit_tag != None:
                # Support for comma separated list of tags.
                for tag_in_watch in watch['tag'].split(','):
                    tag_in_watch = tag_in_watch.strip()
                    if tag_in_watch == limit_tag:
                        watch['uuid'] = uuid
                        sorted_watches.append(watch)

            else:
                watch['uuid'] = uuid
                sorted_watches.append(watch)

        sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True)

        existing_tags = datastore.get_all_tags()
        rss = request.args.get('rss')

        if rss:
            fg = FeedGenerator()
            fg.title('changedetection.io')
            fg.description('Feed description')
            fg.link(href='https://changedetection.io')

            for watch in sorted_watches:
                if not watch['viewed']:
                    fe = fg.add_entry()
                    fe.title(watch['url'])
                    fe.link(href=watch['url'])
                    fe.description(watch['url'])
                    fe.guid(watch['uuid'], permalink=False)
                    dt = datetime.datetime.fromtimestamp(
                        int(watch['newest_history_key']))
                    dt = dt.replace(tzinfo=pytz.UTC)
                    fe.pubDate(dt)

            response = make_response(fg.rss_str())
            response.headers.set('Content-Type', 'application/rss+xml')
            return response

        else:
            from backend import forms
            form = forms.quickWatchForm(request.form)

            output = render_template(
                "watch-overview.html",
                form=form,
                watches=sorted_watches,
                tags=existing_tags,
                active_tag=limit_tag,
                has_unviewed=datastore.data['has_unviewed'])

        return output
コード例 #38
0
async def channel(request, channel_id, return_type='video'):
    log.info(f'Channel: {channel_id}')
    channel_name = [f'{channel_id}/{return_type}']
    if channel_name[0] in channel_feed and channel_feed[
            channel_name[0]]['expire'] > datetime.now():
        return raw(channel_feed[channel_name[0]]['feed'],
                   content_type='application/rss+xml')
    fg = None
    calls = 0
    response = {'nextPageToken': ''}
    while 'nextPageToken' in response:
        next_page = response['nextPageToken']
        payload = {
            'part': 'snippet,contentDetails',
            'maxResults': 50,
            'channelId': channel_id,
            'key': KEY,
            'pageToken': next_page
        }
        response = json.loads(
            await get('https://www.googleapis.com/youtube/v3/activities',
                      params=payload))
        calls += 1
        if 'error' in response:
            payload = {
                'part': 'snippet',
                'maxResults': 1,
                'forUsername': channel_id,
                'key': KEY
            }
            response = json.loads(await get(
                'https://www.googleapis.com/youtube/v3/channels',
                params=payload))
            channel_id = response['items'][0]['id']
            channel_name.append(f'{channel_id}/{return_type}')
            payload = {
                'part': 'snippet,contentDetails',
                'maxResults': 50,
                'channelId': channel_id,
                'key': KEY,
                'pageToken': next_page
            }
            response = json.loads(await get(
                'https://www.googleapis.com/youtube/v3/activities',
                params=payload))
            calls += 2
        if not fg:
            fg = FeedGenerator()
            fg.load_extension('podcast')
            fg.generator('PodTube', __version__,
                         'https://github.com/aquacash5/PodTube')
            snippet = response['items'][0]['snippet']
            if 'Private' in snippet['title']:
                continue
            icon = max(snippet['thumbnails'],
                       key=lambda x: snippet['thumbnails'][x]['width'])
            fg.title(snippet['title'])
            fg.id(f'http://{request.headers["host"]}{request.url}')
            fg.description(snippet['description'] or ' ')
            fg.author(name=snippet['channelTitle'])
            fg.image(snippet['thumbnails'][icon]['url'])
            fg.link(href=f'https://www.youtube.com/playlist?list={channel_id}')
            fg.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
            fg.podcast.itunes_summary(snippet['description'])
            fg.podcast.itunes_category('Technology', 'Podcasting')
            fg.updated(f'{str(datetime.utcnow())}Z')
        for item in response['items']:
            snippet = item['snippet']
            if snippet['type'] != 'upload':
                continue
            current_video = item['contentDetails']['upload']['videoId']
            log.debug(f'ChannelVideo: {current_video} {snippet["title"]}')
            fe = fg.add_entry()
            fe.title(snippet['title'])
            fe.id(current_video)
            icon = max(snippet['thumbnails'],
                       key=lambda x: snippet['thumbnails'][x]['width'])
            fe.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
            fe.updated(snippet['publishedAt'])
            if return_type == 'audio':
                fe.enclosure(
                    url=
                    f'http://{request.headers["host"]}/audio/{current_video}',
                    type="audio/mpeg")
            else:
                fe.enclosure(
                    url=
                    f'http://{request.headers["host"]}/video/{current_video}',
                    type="video/mp4")
            fe.author(name=snippet['channelTitle'])
            fe.podcast.itunes_author(snippet['channelTitle'])
            fe.podcast.itunes_author(snippet['channelTitle'])
            fe.pubdate(snippet['publishedAt'])
            fe.link(href=f'http://www.youtube.com/watch?v={current_video}',
                    title=snippet['title'])
            fe.podcast.itunes_summary(snippet['description'])
            fe.description(snippet['description'])
            await sleep(0)
    feed = {
        'feed': fg.rss_str(),
        'expire': datetime.now() + timedelta(hours=calls)
    }
    for _name in channel_name:
        channel_feed[_name] = feed
    return raw(feed['feed'], content_type='application/rss+xml')
コード例 #39
0
ファイル: feedbridge.py プロジェクト: mod94/botfriend
class Bridge(object):

    NO_VALUE = object()

    def __init__(self, filelike):
        if isstr(filelike):
            feed = filelike
        else:
            feed = filelike.read()

        self.raw = feed
        self.parsed = feedparser.parse(self.raw)
        self.feed = FeedGenerator()

        # Set feed-level values.
        self.build_feed()
        self.build_entries()

    def build_feed(self):
        f = self.parsed.feed

        for field in [
                'id', 'title', 'subtitle', 'updated', 'rights', 'generator',
                'docs', 'language', ('xml_lang', 'language'),
            ('authors', 'author'), ('links', 'link')
        ]:
            self._copy(f, self.feed, field)

        if f.get('image'):
            image_kwargs = {}
            for image_field in 'url', 'title', 'link', 'width', 'height', 'description':
                ignore, value = self._setter(f.image, self.feed, image_field)
                if value is not self.NO_VALUE:
                    image_kwargs[image_field] = value

            if image_kwargs:
                self.feed.image(**image_kwargs)

    def build_entries(self):
        for entry in self.parsed.entries:
            self.build_entry(entry)

    def build_entry(self, parsed):
        built = self.feed.add_entry(order='append')

        # TODO: 'tag' is not supported in feedgen
        for field in [
                'id', 'title', 'updated', 'summary', 'published',
            ('links', 'link')
        ]:
            self._copy(parsed, built, field)

        permalink = parsed.get('link')
        guid_is_link = parsed['guidislink']
        if permalink:
            built.guid(permalink, guid_is_link)

    def _setter(self, feedparser_obj, feedgen_obj, field):
        if isinstance(field, tuple):
            field, method_name = field
        else:
            method_name = field
        setter = getattr(feedgen_obj, method_name, None)
        value = feedparser_obj.get(field, self.NO_VALUE)
        return setter, value

    def _copy(self, feedparser_obj, feedgen_obj, field):
        setter, value = self._setter(feedparser_obj, feedgen_obj, field)
        if value is self.NO_VALUE:
            return
        if not isinstance(value, list):
            value = [value]
        for v in value:
            setter(v)
        if field in feedparser_obj:  # Temporary cleanup
            del feedparser_obj[field]
コード例 #40
0
    def render(self, data, media_type=None, renderer_context=None):
        renderer_context = renderer_context or {}
        response = renderer_context['response']

        if response.exception:
            return f'Error: {response.status_code}'

        # If we get this far, the response is not an error and we can render the RSS feed.

        # Prepare the feed generator object.
        fg = FeedGenerator()
        fg.load_extension('podcast')

        # Playlist wrapper.
        fg.id(data['url'])
        fg.title(data['title'])

        # Description. Feedgen will raise exception if the description is empty
        fg.description(_ensure_non_empty(data['description']))
        fg.podcast.itunes_summary(_ensure_non_empty(data['description']))

        # Self link.
        fg.link(href=data['url'])

        # TODO: Missing fields from playlists: author, contributors, logo, subtitle, and language.

        # Add entries
        for entry in data['entries']:
            fe = fg.add_entry()

            # The item id. We don't set permaLink for the moment because URLs may change during the
            # alpha.
            fe.id(entry['url'])

            # Set basic metadata. Feedgen will raise an exception if the description is empty.
            fe.title(entry['title'])
            fe.description(_ensure_non_empty(entry['description']))
            fe.summary(_ensure_non_empty(entry['description']))

            # RSS only supports one link with nothing but a URL. So for the RSS link element the
            # last link with rel=alternate is used. We link to the UI view even though we use the
            # API endpoint as the id.
            fe.link(href=entry['url'])

            # Publication date.
            fe.pubDate(entry['published_at'])

            # Free-text copyright field.
            fe.rights(entry['rights'])

            # When the item was last updated.
            fe.updated(entry['updated_at'])

            # Image. Note: iTunes *requires* this to end in ".jpg" or ".png" which is annoying.
            fe.podcast.itunes_image(entry['imageUrl'])

            # Duration in seconds.
            fe.podcast.itunes_duration(entry['duration'])

            # The actual downloads themselves.
            for enclosure in entry['enclosures']:
                fe.enclosure(url=enclosure['url'], type=enclosure['mime_type'])

        # Render the feed.
        return fg.rss_str(pretty=True)
コード例 #41
0
ファイル: atom_1_0.py プロジェクト: arXiv/arxiv-feed
    def get_xml(self: Serializer, response: Response) -> Tuple[str, int]:
        """
        Serialize the provided response data into Atom, version 1.0.

        Parameters
        ----------
        response : Response
            The search response data to be serialized.

        Returns
        -------
        data : str
            The serialized XML results.
        status
            The HTTP status code for the operation.

        """
        fg = FeedGenerator()
        fg.register_extension("arxiv",
                              ArxivExtension,
                              ArxivEntryExtension,
                              rss=False)
        fg.id("http://arxiv.org/rss/version=atom_1.0")
        archive = response.hits[0]["primary_classification"]["archive"]
        fg.title(archive["id"] + " updates on arXiv.org")
        fg.link(href='http://arxiv.org/rss/version=atom_1.0',
                rel='self',
                type='application/atom+xml')
        fg.updated(datetime.utcnow().replace(tzinfo=utc))

        # TODO - Try to remove generator element?  This doesn't work - code ignores "None"
        # fg.generator(None)
        # TODO - We don't currently set "subtitle", but could do it like this
        # fg.subtitle(
        #     f"{archive['name']} ({archive['id']}) updates on the arXiv.org e-print archive")

        # Add each search result "hit" to the feed
        for hit in response:
            entry = fg.add_entry()
            entry.id("http://arxiv.org/abs/" + hit['id'])
            entry.title(hit['title'])
            entry.summary(hit['abstract'])
            entry.published(hit['submitted_date'])
            entry.updated(hit['updated_date'])

            entry.link({
                "href": url_for("abs_by_id", paper_id=hit['id']),
                "type": "text/html"
            })
            pdf_link = {
                "title": "pdf",
                "rel": "related",
                "type": "application/pdf"
            }
            pdf_link["href"] = url_for("pdf_by_id", paper_id=hit['id'])
            entry.link(pdf_link)

            # Add categories
            categories = [hit['primary_classification'].to_dict()['category']]
            for dict in hit['secondary_classification']:
                categories += [dict['category'].to_dict()]
            for cat in categories:
                label = cat['name'] + " (" + cat['id'] + ")"
                category = {
                    "term": cat['id'],
                    "scheme": "http://arxiv.org/schemas/atom",
                    "label": label
                }
                entry.category(category)

            # Add arXiv-specific element "comment"
            if not hit['comments'].strip():
                entry.arxiv.comment(hit['comments'])

            # Add arXiv-specific element "journal_ref"
            if not hit['journal_ref'].strip():
                entry.arxiv.journal_ref(hit['journal_ref'])

            # Add arXiv-specific element "primary_category"
            prim_cat = hit['primary_classification'].to_dict()['category']
            label = prim_cat['name'] + " (" + prim_cat['id'] + ")"
            category = {
                "term": prim_cat['id'],
                "scheme": "http://arxiv.org/schemas/atom",
                "label": label
            }
            entry.arxiv.primary_category(category)

            # Add arXiv-specific element "doi"
            if hit['doi']:
                entry.arxiv.doi(hit['doi'])

            # Add each author
            for author in hit['authors']:
                author_list = {"name": author['full_name']}
                entry.author(author_list)
                # TODO - How can arxiv-specific affiliation elements be added to authors?

        data = fg.atom_str(pretty=True)
        status_code = status.HTTP_200_OK
        return data, status_code
コード例 #42
0
    def write_rss(self, audio=False):
        """Write podcast feeds to files."""

        print("playlist self.info", flush=True)
        pp.pprint(self.info)

        prefix = "audio-" if audio else ""

        feed_url = self.controller.base_url + self.folder + '/' + prefix + 'podcast.xml'

        feedgen = FeedGenerator()
        feedgen.load_extension('podcast')

        feedgen.generator('Adafruit-Podcast')
        feedgen.id(feed_url)
        feedgen.title(self.info['title'])
        feedgen.subtitle(self.info['itunesSubtitle'])
        feedgen.author({'name': self.info['author']})
        for category in self.info['categories']:
            feedgen.category(term=category)
        feedgen.webMaster(self.info['webMaster'])
        feedgen.managingEditor(self.info['managingEditor'])
        feedgen.link(href=feed_url, rel='self')

        # Link to a chosen URL as an alternate, if set.
        if 'htmlUrl' in self.info:
            feedgen.link(href=self.info['htmlUrl'], rel='alternate')
        else:
            # Otherwise link to the original YouTube playlist as an alternate:
            if isinstance(self.url, list):
                for url in self.url:
                    feedgen.link(href=url, rel='alternate')
            else:
                feedgen.link(href=self.url, rel='alternate')

        feedgen.language('en')

        # feedgen.logo('http://ex.com/logo.jpg')

        # pylint: disable=no-member
        feedgen.podcast.itunes_category(self.info['itunesCategory']['text'])
        feedgen.podcast.itunes_subtitle(self.info['itunesSubtitle'])
        feedgen.podcast.itunes_summary(self.info['description'])
        feedgen.podcast.itunes_owner(email=self.info['itunesOwner']['email'],
                                     name=self.info['itunesOwner']['name'])
        feedgen.podcast.itunes_author(self.info['itunesOwner']['name'])
        feedgen.podcast.itunes_image(self.controller.base_url + self.folder +
                                     '/image.jpg')
        feedgen.podcast.itunes_explicit('clean')

        for vid in self.videos:
            print("vid:\n", flush=True)
            pp.pprint(vid)
            print("\n", flush=True)

            vid_filename = vid['_filename'].split('.')[0] + (".mp3" if audio
                                                             else ".mp4")

            vid_url = self.video_url(vid_filename)

            # Size of enclosed file in bytes:
            vid_size = os.path.getsize(vid_filename)

            # Date of upload (from the youtube-dl JSON data)
            eastern = pytz.timezone('US/Eastern')
            vid_date = eastern.localize(
                datetime.datetime.strptime(vid['upload_date'], '%Y%m%d'))

            entry = feedgen.add_entry()
            entry.id(vid_url)
            entry.title(vid['fulltitle'])
            entry.published(vid_date)
            for category in vid['categories']:
                entry.category(term=category)
            entry.description(vid['description'])
            entry.enclosure(vid_url, str(vid_size),
                            ('audio/mp3' if audio else 'video/mp4'))
            entry.podcast.itunes_image(self.controller.base_url + self.folder +
                                       '/image.jpg')

            entry.podcast.itunes_author(self.info['author'])
            entry.podcast.itunes_summary(vid['description'])
            entry.podcast.itunes_duration(vid['duration'])

        feedgen.rss_str(pretty=True)

        # Ensure output folder for this podcast exists:
        os.makedirs(os.path.join(self.controller.output_dir, self.folder),
                    exist_ok=True)

        # Generate RSS file in output folder:
        feedgen.rss_file(
            os.path.join(self.controller.output_dir, self.folder,
                         prefix + 'podcast.xml'))
コード例 #43
0
ファイル: main.py プロジェクト: vadviktor/animetorrent-feed
class Spider:
    def __init__(self):
        self.config = toml.load("config.toml")
        self.aws_session = boto3.session.Session()
        self.cloudwatch = self.aws_session.client(
            service_name="cloudwatch",
            region_name=self.config["secretsmanager"]["region"],
        )
        self._report_execution()
        self.metric_retry_count = 0

        self.environment = getenv("APP_ENVIRONMENT", "development")
        with open("version.txt", "r") as f:
            self.version = f.readline().strip()

        loglevel = logging.DEBUG
        if self.environment == "production":
            loglevel = logging.ERROR

        logging.basicConfig(
            stream=stdout,
            level=loglevel,
            format="%(asctime)s - %(levelname)s - %(message)s",
        )

        sentry_sdk.init(
            "https://[email protected]/1509686",
            environment=self.environment,
            release=self.version,
        )

        self.feed = FeedGenerator()
        self.session = HTMLSession()
        self.s3 = self.aws_session.client(service_name="s3")

    def _anti_hammer_sleep(self):
        logging.debug("zzzZZzzzZZZZZzzzzz")
        sleep(randrange(1, self.config["anti_hammer_sleep"]))

    def _secrets(self):
        logging.debug("fetching secrets from AWS")
        try:
            client = self.aws_session.client(
                service_name="secretsmanager",
                region_name=self.config["secretsmanager"]["region"],
            )
            get_secret_value_response = client.get_secret_value(
                SecretId=self.config["secretsmanager"]["secret_name"])
        except ClientError as e:
            capture_exception(e)

            if e.response["Error"]["Code"] == "DecryptionFailureException":
                # Secrets Manager can't decrypt the protected secret text using the provided KMS key.
                # Deal with the exception here, and/or rethrow at your discretion.
                raise e
            elif e.response["Error"][
                    "Code"] == "InternalServiceErrorException":
                # An error occurred on the server side.
                # Deal with the exception here, and/or rethrow at your discretion.
                raise e
            elif e.response["Error"]["Code"] == "InvalidParameterException":
                # You provided an invalid value for a parameter.
                # Deal with the exception here, and/or rethrow at your discretion.
                raise e
            elif e.response["Error"]["Code"] == "InvalidRequestException":
                # You provided a parameter value that is not valid for the current state of the resource.
                # Deal with the exception here, and/or rethrow at your discretion.
                raise e
            elif e.response["Error"]["Code"] == "ResourceNotFoundException":
                # We can't find the resource that you asked for.
                # Deal with the exception here, and/or rethrow at your discretion.
                raise e
        else:
            # Decrypts secret using the associated KMS CMK.
            # Depending on whether the secret is a string or binary, one of these fields will be populated.
            if "SecretString" in get_secret_value_response:
                return json.loads(get_secret_value_response["SecretString"])

    def crawl(self):
        self._login()

        self.feed.id(f"{self.version}.vadviktor.xyz")
        self.feed.updated(datetime.utcnow().isoformat("T") + "Z")
        self.feed.author({
            "name": "Viktor (Ikon) VAD",
            "email": "*****@*****.**",
            "uri": "https://www.github.com/vadviktor",
        })
        self.feed.title("Animetorrents.me feed")
        self.feed.link(
            href=self.config["s3"]["object_url"].format(
                bucket=self.config["s3"]["bucket"],
                region=self.config["s3"]["region"],
                filekey=self.config["s3"][f"feed_filename_{self.environment}"],
            ),
            rel="self",
        )

        for profile_url in self._torrent_profile_links(self._max_pages()):
            profile_data = self._parse_profile(profile_url)
            if profile_data is None:
                continue

            fe = self.feed.add_entry(order="append")
            fe.id(profile_url)
            fe.title(profile_data["title"])
            fe.link(href=profile_url, rel="self")

            cover_image_url = None
            if profile_data["cover_image_src"] is not None:
                cover_image_url = self._cover_image_upload_and_get_url(
                    profile_data["cover_image_src"])

            thumbnail_small_image_urls = self._thumbnail_small_image_upload_and_get_urls(
                profile_data["thumbnail_small_image_srcs"])
            thumbnail_large_image_urls = self._thumbnail_large_image_upload_and_get_urls(
                profile_data["thumbnail_large_image_srcs"])

            torrent_public_url = self._torrent_upload_and_get_url(
                profile_data["torrent_download_url"],
                profile_data["torid"],
                slugify(profile_data["title"]),
                profile_data["publish_date"],
            )

            content_lines = []
            if cover_image_url is not None:
                content_lines.append(f'<p><img src="{cover_image_url}" /></p>')

            content_lines.append(f'<p>[{profile_data["category"]}]</p>')
            content_lines.append(f'<p>Tags: {profile_data["tags"]}</p>')
            content_lines.append(
                f'<p>Published: {profile_data["publish_date"]}</p>')
            content_lines.append(
                f'<p><a href="{profile_url}" target="blank">{profile_url}</a></p>'
            )
            content_lines.append(
                f'<p style="white-space: pre-wrap;">{profile_data["description"]}</p>'
            )

            content_lines.append(f"<p>")
            for k, v in enumerate(thumbnail_small_image_urls):
                content_lines.append(f"""
                    <a href="{thumbnail_large_image_urls[k]}" target="blank">
                        <img src="{v}" width="200" height="100" />
                    </a>""")
            content_lines.append(f"</p>")

            content_lines.append(
                f'<p><a href="{torrent_public_url}" target="blank">Download</a></p>'
            )
            content_lines.append(f'<p>{profile_data["torrent_details"]}</p>')
            content_lines.append(f'<p>{profile_data["file_list"]}</p>')

            if profile_data["media_info"] is not None:
                content_lines.append(f'<p>{profile_data["media_info"]}</p>')

            fe.content(self._valid_xhtml_content(content_lines), type="xhtml")

        self._upload_feed()
        self._report_retry_count()

    @staticmethod
    def _valid_xhtml_content(content_lines: List) -> str:
        broken_html = "".join(content_lines)
        # parse as HTML
        parser = etree.HTMLParser()
        tree = etree.parse(StringIO(broken_html), parser)
        # output as valid XML
        result = etree.tostring(tree.getroot(),
                                pretty_print=True,
                                method="xml")

        return result.decode("utf-8")

    def _upload_feed(self):
        logging.debug("construct and upload feed")

        atomfeed = self.feed.atom_str()
        bucket = self.config["s3"]["bucket"]
        key = self.config["s3"][f"feed_filename_{self.environment}"].format(
            version=getenv("FEED_VERSION", "v0"))
        self.s3.upload_fileobj(BytesIO(atomfeed), bucket, key)
        resp = self.s3.put_object_acl(ACL="public-read",
                                      Bucket=bucket,
                                      Key=key)
        if resp is None:
            capture_message(f"Failed to set object ACL for {bucket}/{key}")

    def _parse_profile(self, profile_url):
        logging.debug(f"processing profile {profile_url}")
        resp = self._get(profile_url)

        if ("Error 404: Torrent not found" in resp.text
                or "Torrent not found" in resp.text):
            msg = f"No torrent found for {profile_url}"
            logging.info(msg)
            capture_message(msg)
            return None

        profile_data = {}
        profile_data["category"] = resp.html.find("h1.headline img",
                                                  first=True).attrs["alt"]
        if any(category in profile_data["category"]
               for category in self.config["exclude_categories"]):
            return None

        profile_data["torid"] = re.match(r".*=(\d+)$", profile_url)[1]

        try:
            profile_data["torrent_download_url"] = next(
                l for l in resp.html.links if "download.php?torid=" in l)
        except StopIteration:
            msg = f"did not find download link for {profile_url}"
            capture_message(msg)
            raise RuntimeError(msg)

        profile_data["hashid"] = re.match(
            r".*torid=([a-z0-9]+)$",
            profile_data["torrent_download_url"]).group(1)

        profile_data["title"] = resp.html.find("h1.headline", first=True).text
        profile_data["description"] = resp.html.find("#torDescription",
                                                     first=True).text
        profile_data["tags"] = resp.html.find("#tagLinks", first=True).text
        profile_data["publish_date"] = self._parse_publish_date(
            resp.html.find("div.ribbon span.blogDate", first=True).text)
        profile_data["torrent_details"] = resp.html.find(
            "#tabs-1 table.dataTable", first=True).html
        profile_data["media_info"] = self._download_media_info(
            profile_data["torid"])
        profile_data["file_list"] = self._download_file_list(
            profile_data["hashid"])

        try:
            profile_data["cover_image_src"] = next(
                link.attrs["src"]
                for link in resp.html.find("div.contentArea img")
                if "imghost/covers/" in link.attrs["src"])
        except StopIteration:
            logging.debug(f"did not find cover image for {profile_url}")
            profile_data["cover_image_src"] = None

        profile_data["thumbnail_small_image_srcs"] = [
            i.attrs["src"] for i in resp.html.find("#torScreens img")
        ]
        profile_data["thumbnail_large_image_srcs"] = [
            i.attrs["href"] for i in resp.html.find("#torScreens a")
        ]

        return profile_data

    @retry((TimeOutException, ConnectionError), tries=5, delay=3, backoff=2)
    def _get(self, url, **kwargs) -> Response:
        self._anti_hammer_sleep()
        resp = self.session.get(url, **kwargs)

        if resp.status_code in TIMEOUT_STATUS_CODES:
            self.metric_retry_count += 1
            raise TimeOutException

        return resp

    @staticmethod
    def _parse_publish_date(text) -> datetime:
        return datetime.fromtimestamp(
            mktime(strptime(text, "%d %b, %Y [%I:%M %p]")))

    def _torrent_profile_links(self, max_pages) -> List:
        links = []
        for page in range(1, self.config["torrent_pages_to_scan"] + 1):
            resp = self._torrent_list_response(page, max_pages)

            [
                links.append(l) for l in resp.html.links
                if "torrent-details.php?torid=" in l
            ]

        return links

    @retry(TimeOutException, tries=5, delay=3, backoff=2)
    def _torrent_list_response(self, current_page: int,
                               max_pages: int) -> Response:
        logging.debug(f"getting torrent list page no. {current_page}")
        headers = {"X-Requested-With": "XMLHttpRequest"}
        url = self.config["site"]["torrent_list_url"].format(
            max=max_pages, current=current_page)
        resp = self._get(url=url, headers=headers)
        if resp.status_code in TIMEOUT_STATUS_CODES:
            self.metric_retry_count += 1
            raise TimeOutException

        logging.debug(f"response status code {resp.status_code}")
        logging.debug(f"response length {len(resp.text)}")

        if "Access Denied!" in resp.text:
            raise RuntimeError("AJAX request was denied")

        return resp

    @retry(TimeOutException, tries=5, delay=3, backoff=2)
    def _login(self):
        login_url = self.config["site"]["login_url"]
        username = self._secrets()["username"]
        password = self._secrets()["password"]

        self._get(login_url)
        resp = self.session.post(
            login_url,
            data={
                "form": "login",
                "username": username,
                "password": password
            },
        )

        if resp.status_code in TIMEOUT_STATUS_CODES:
            self.metric_retry_count += 1
            raise TimeOutException

        if "Error: Invalid username or password." in resp.text:
            raise RuntimeError("login failed because of invalid credentials")
        else:
            logging.debug("logged in")

    @retry(TimeOutException, tries=5, delay=3, backoff=2)
    def _max_pages(self):
        logging.debug("finding out torrents max page number")

        try:
            resp = self._get(self.config["site"]["torrents_url"])

            if resp.status_code in TIMEOUT_STATUS_CODES:
                self.metric_retry_count += 1
                raise TimeOutException

            if resp.status_code != 200:
                raise RuntimeError(
                    "the torrents page is not responding correctly")

            pattern = r"ajax/torrents_data\.php\?total=(?P<max>\d+)&page=1"
            match = re.search(pattern, resp.text)
            if match is None:
                raise RuntimeError("could not find max page number")

            max_page = match.group("max")
            logging.debug(f"max pages figured out: {max_page}")

            return int(max_page)
        except ConnectionError as e:
            capture_exception(e)
            raise RuntimeError("failed to get the torrents page")

    def _download_media_info(self, torid) -> Optional[str]:
        logging.debug(f"getting torrent media info for {torid}")

        headers = {"X-Requested-With": "XMLHttpRequest"}
        url = self.config["site"]["torrent_techspec_url"].format(torid)
        resp = self._get(url=url, headers=headers)

        logging.debug(f"response status code {resp.status_code}")
        logging.debug(f"response length {len(resp.text)}")

        if len(resp.text) == 0:
            return None

        if "Access Denied!" in resp.text:
            raise RuntimeError("AJAX request was denied")

        return resp.html.html

    def _download_file_list(self, hashid) -> str:
        logging.debug(f"getting torrent file list for {hashid}")

        headers = {"X-Requested-With": "XMLHttpRequest"}
        url = self.config["site"]["torrent_filelist_url"].format(hashid)
        resp = self._get(url=url, headers=headers)

        logging.debug(f"response status code {resp.status_code}")
        logging.debug(f"response length {len(resp.text)}")

        if "Access Denied!" in resp.text:
            raise RuntimeError("AJAX request was denied")

        return resp.html.html

    def _cover_image_upload_and_get_url(self, url) -> str:
        matches = re.match(r".*/covers/(\d{4})/(\d{2})/(.*)", url)
        year = matches[1]
        month = matches[2]
        filename = matches[3]
        key = f"covers/{year}/{month}/{filename}"

        return self._upload(key, url)

    def _upload(self, key, url) -> str:
        """
        Check if key exists in the bucket.
        If not, then download it from url and upload it to S3 as key.
        Set the object ACL to public readable.
        Return the public URL for the object.

        Args:
            key (str): S3 object key
            url (str): source URL to download the data from

        Returns:
            (str): the public URL in S3
        """
        bucket = self.config["s3"]["bucket"]
        try:
            self.s3.head_object(Bucket=bucket, Key=key)
        except ClientError:
            resp = self._get(url)
            self.s3.upload_fileobj(
                BytesIO(resp.content),
                bucket,
                key,
                ExtraArgs={"StorageClass": "STANDARD_IA"},
            )

            resp = self.s3.put_object_acl(ACL="public-read",
                                          Bucket=bucket,
                                          Key=key)
            if resp is None:
                capture_message(f"Failed to set object ACL for {bucket}/{key}")

        return self.config["s3"]["object_url"].format(
            bucket=self.config["s3"]["bucket"],
            region=self.config["s3"]["region"],
            filekey=key,
        )

    def _thumbnail_small_image_upload_and_get_urls(self, urls) -> List:
        pub_urls = []
        for url in urls:
            matches = re.match(r".*/screenthumb/(\d{4})/(\d{2})/(.*)", url)
            year = matches[1]
            month = matches[2]
            filename = matches[3]
            key = f"screenthumbs/small/{year}/{month}/{filename}"
            pub_urls.append(self._upload(key, url))

        return pub_urls

    def _thumbnail_large_image_upload_and_get_urls(self, urls) -> List:
        pub_urls = []
        for url in urls:
            matches = re.match(r".*/screens/(\d{4})/(\d{2})/(.*)", url)
            year = matches[1]
            month = matches[2]
            filename = matches[3]
            key = f"screenthumbs/large/{year}/{month}/{filename}"
            pub_urls.append(self._upload(key, url))

        return pub_urls

    def _torrent_upload_and_get_url(self, url, torid, filename,
                                    publish_date) -> str:
        """

        Args:
            url (str): Source URL to torrent
            torid (str): Torrent ID
            filename (str): The filename to use in the S3 key
            publish_date (datetime): Torrent publish date

        Returns:
            (str) S3 public URL for the file
        """
        key = f"torrents/{publish_date.year}/{publish_date.month}/{filename}_{torid}.torrent"
        return self._upload(key, url)

    def _report_execution(self):
        self.cloudwatch.put_metric_data(
            Namespace="Animetorrents",
            MetricData=[{
                "MetricName": "execution",
                "Value": 0.0
            }],
        )

    def _report_retry_count(self):
        self.cloudwatch.put_metric_data(
            Namespace="Animetorrents",
            MetricData=[{
                "MetricName": "retries",
                "Value": self.metric_retry_count
            }],
        )
コード例 #44
0
ファイル: rss.py プロジェクト: ye11ow/tohsaka
class Outputter(BaseOutputter):
    @property
    def REQUIRED_FIELDS(self):
        return ['description', 'pubDate', 'title']

    def __init__(self, config):
        BaseOutputter.__init__(self, config)

        self.file = config.get('filename', 'output') + '.xml'
        self.title = config.get('title', 'Sample RSS')
        self.description = config.get('description', 'Sample')
        self.cache = config.get('cache', False)
        self.base_link = config.get('host')
        self.temp_dir = file_util.get_temp_dir()

        self.fg = FeedGenerator()
        self._create_feed()

        self.item_count = 0
        self.filtered_count = 0

    def _create_feed(self):
        fg = self.fg
        fg.id(self.base_link)
        fg.title(self.title)
        fg.language('zh-CN')
        fg.link(href=self.base_link, rel='self')
        fg.description(self.description)
        fg.author(name='Tohsaka')

    def _valid(self, item):
        self.item_count += 1
        valid = BaseOutputter._valid(self, item)

        if not self.cache:
            return valid

        if valid:
            filename = hashlib.md5(
                item.get('link').encode('utf-8')).hexdigest()
            valid = not file_util.touch(os.path.join(self.temp_dir, filename))
            if not valid:
                logger.debug('%s is filtered', item.get('title'))
                self.filtered_count += 1

        return valid

    def _clear_obsolete_cache(self, days):
        files = os.listdir(self.temp_dir)
        now = time.time()
        removed_count = 0

        for f in files:
            filename = os.path.join(self.temp_dir, f)
            diff = now - os.path.getmtime(filename)
            if diff > SECONDS_OF_DAY * days:
                os.remove(filename)
                removed_count += 1

        if removed_count > 0:
            logger.info(f'Removing {removed_count} obsolete cache', )

    def _output(self):
        filename = os.path.join(self.output_folder, self.file)
        logger.info(
            f'Output to file {filename}. Total items {self.item_count}, filtered {self.filtered_count}'
        )
        self.fg.atom_file(filename)

        if self.cache:
            self._clear_obsolete_cache(14)

    def _add_item(self, item):
        title = item.get('title')
        description = item.get('description')
        link = item.get('link')
        pub_date = item.get('pubDate')
        guid = item.get('id', link)

        entry = self.fg.add_entry()

        entry.title(title)
        entry.link(href=link)
        entry.content(content=description, type='html')
        entry.guid(guid)
        try:
            pub_date = parser.parse(pub_date).replace(
                tzinfo=pytz.timezone('Asia/Shanghai'))
        except:
            pub_date = datetime.now(pytz.utc).isoformat()
        entry.pubDate(pub_date)
        entry.updated(pub_date)
コード例 #45
0
def rss_feed_for_group(api, group, reposts=True):
    """
    Create rss feed based on the group posts

    :param api: VkApiMethod instance, to initialise it,
          api = vk_api.VkApi(USERNAME, PASSWORD).get_api();
    :param group: string, short name of a group, for instance,
            'club1' in https://vk.com/club1/;
    :param reposts: boolean, False if we do not want to add reposts
              to the feed
    :returns: FeedGenerator instance, ready for writing XML
    """

    # VK API allows to make 10000 calls per day with wall.get_localzone
    # so if we going to refresh a feed every 20 minutes (it's 72 a day),
    # we should be ok with about 138 groups (if I get it right)

    # Get the first 60 (should be enough) posts from a group
    vargs = {'domain': group, 'count': 60}

    # If a group doesn't have a short name, its url looks like,
    # for example, this: vk.com/club526452694, but, in general, a group
    # can have a short name beginning with 'club'. The problem is that
    # VK API doesn't allow to get the posts from the group 'club526452694'
    # if we use it as the short name (it returns an empty list) therefore
    # we have to check it
    if group[:4] == 'club':
        # So if it's a shortname beginning with 'club', we get an exception
        try:
            owner_id = -1 * int(group[4:])
            vargs['owner_id'] = owner_id
            del vargs['domain']
        except ValueError:
            pass

    try:
        posts = api.wall.get(**vargs)['items']
        # Get the name of a group
        group_name = api.groups.getById(group_id=group)[0]['name']
    except VkApiError as error_msg:
        print(error_msg)

    # Generate the feed
    fg = FeedGenerator()
    fg.title(group_name)
    fg.link(href='https://vk.com/{}/'.format(group))
    fg.description("Vk feed - {}".format(group_name))
    # Get the local timezone odject
    local_tz = get_localzone()
    # Feedgen lib desperatly want timezone info in every date
    fg.lastBuildDate(datetime.now(local_tz))

    # Go through the posts...
    for post in posts:
        # We do not need ads, right?
        if post['marked_as_ads']:
            continue
        # If the post is not a repost
        if post.get('copy_history') is None:
            post_data = post_parsing(post, group_name)
        # If it is, pass to post_parsing function the dictionary
        # post['copy_history'][0] representing the post
        # which the repost are made from (if we want reposts)
        elif reposts:
            post_data = post_parsing(post['copy_history'][0], group_name)
        else:
            continue

        # ...and create RSS items
        fe = fg.add_entry()
        fe.title(post_data['title'])
        fe.link(href=post_data['link'])
        fe.description(post_data['description'])
        fe.guid(post_data['guid'])
        fe.pubdate(post_data['pubDate'])

    return fg
コード例 #46
0
ファイル: build.py プロジェクト: jimmyhmiller/eatonphil.com
def main():
    all_tags = {}
    post_data = []
    for post in get_posts():
        out_file = post[len('posts/'):]
        output, title = get_post_data(post)
        header, date, tags_raw = title[1], title[2], title.get(6, "")

        tags = tags_raw.split(",")
        tags_html = get_html_tags(tags)

        post_data.append((out_file, title[1], title[2], post, output))
        for tag in tags:
            if tag not in all_tags:
                all_tags[tag] = []

            all_tags[tag].append((out_file, title[1], title[2]))

        title = title[1]
        with open('dist/' + out_file, 'w') as f:
            f.write(
                TEMPLATE.format(post=output,
                                title=title,
                                subtitle=date,
                                tag=title,
                                tags=tags_html))

    post_data.sort(key=lambda post: datetime.strptime(post[2], '%B %d, %Y'))
    post_data.reverse()
    home_page = HOME_PAGE
    home_page += "\n".join([POST_SUMMARY.format(*args) for args in post_data])
    with open('dist/index.html', 'w') as f:
        f.write(
            TEMPLATE.format(post=home_page,
                            title="",
                            tag=TAG,
                            subtitle="",
                            tags=""))

    with open('dist/style.css', 'w') as fw:
        with open('style.css') as fr:
            fw.write(fr.read())

    fg = FeedGenerator()
    for url, title, date, post, content in reversed(post_data):
        fe = fg.add_entry()
        fe.id('http://notes.eatonphil.com/' + url)
        fe.title(title)
        fe.link(href='http://notes.eatonphil.com/' + url)
        fe.pubDate(
            datetime.strptime(date, '%B %d, %Y').replace(tzinfo=timezone.utc))
        fe.content(content)

    fg.id('http://notes.eatonphil.com/')
    fg.link(href='http://notes.eatonphil.com/')
    fg.title(TAG)
    fg.description(TAG)
    fg.author(name='Phil Eaton', email='*****@*****.**')
    fg.language('en')
    fg.rss_file('dist/rss.xml')

    if not os.path.exists('dist/tags'):
        os.makedirs('dist/tags')
    for tag in all_tags:
        posts = all_tags[tag]
        with open('dist/tags/%s.html' % tag, 'w') as f:
            posts.sort(
                key=lambda post: datetime.strptime(post[2], '%B %d, %Y'))
            posts.reverse()
            tag_page = TAG_PAGE.format(tag)
            tag_page += "\n".join(
                [POST_SUMMARY.format(*args) for args in posts])
            f.write(
                TEMPLATE.format(post=tag_page,
                                title="",
                                tag=TAG,
                                subtitle="",
                                tags=""))
コード例 #47
0
    "href": "https://github.com/OWASP/CheatSheetSeries",
    "rel": "alternate"
})
feed_generator.language("en")
feed_generator.pubDate(current_date)
feed_generator.lastBuildDate(current_date)
for pull_request in pull_requests:
    # Take only merged PR
    if pull_request["merged_at"] is None:
        continue
    # Convert merge date from 2019-08-25T06:36:35Z To Sun, 19 May 2002 15:21:36 GMT
    merge_date_src = pull_request["merged_at"]
    merge_date_dst = datetime.strptime(
        merge_date_src,
        "%Y-%m-%dT%H:%M:%SZ").strftime("%a, %d %B %Y %H:%M:%S GMT")
    feed_entry = feed_generator.add_entry()
    feed_entry.id(pull_request["html_url"])
    feed_entry.title(pull_request["title"])
    feed_entry.link({"href": pull_request["html_url"], "rel": "self"})
    feed_entry.link({"href": pull_request["url"], "rel": "alternate"})
    feed_entry.pubDate(merge_date_dst)
    feed_entry.updated(merge_date_dst)
    contributors = []
    for assignee in pull_request["assignees"]:
        contributors.append({
            "name": assignee["login"],
            "uri": f"https://github.com/{assignee['login']}"
        })
    feed_entry.contributor(contributors)

# Save the feed to a XML file
コード例 #48
0
ファイル: rss.py プロジェクト: whyouare111/granary
def from_activities(activities,
                    actor=None,
                    title=None,
                    feed_url=None,
                    home_page_url=None,
                    hfeed=None):
    """Converts ActivityStreams activities to an RSS 2.0 feed.

  Args:
    activities: sequence of ActivityStreams activity dicts
    actor: ActivityStreams actor dict, the author of the feed
    title: string, the feed title
    feed_url: string, the URL for this RSS feed
    home_page_url: string, the home page URL
    hfeed: dict, parsed mf2 h-feed, if available

  Returns:
    unicode string with RSS 2.0 XML
  """
    try:
        iter(activities)
    except TypeError:
        raise TypeError('activities must be iterable')

    if isinstance(activities, (dict, str)):
        raise TypeError('activities may not be a dict or string')

    fg = FeedGenerator()
    fg.id(feed_url)
    assert feed_url
    fg.link(href=feed_url, rel='self')
    if home_page_url:
        fg.link(href=home_page_url, rel='alternate')
    # TODO: parse language from lang attribute:
    # https://github.com/microformats/mf2py/issues/150
    fg.language('en')
    fg.generator('granary', uri='https://granary.io/')

    hfeed = hfeed or {}
    actor = actor or {}
    image = (util.get_url(hfeed.get('properties', {}), 'photo')
             or util.get_url(actor, 'image'))
    if image:
        fg.image(image)

    props = hfeed.get('properties') or {}
    content = microformats2.get_text(util.get_first(props, 'content', ''))
    summary = util.get_first(props, 'summary', '')
    desc = content or summary or '-'
    fg.description(desc)  # required
    fg.title(title or util.ellipsize(desc))  # required

    latest = None
    feed_has_enclosure = False
    for activity in activities:
        obj = activity.get('object') or activity
        if obj.get('objectType') == 'person':
            continue

        item = fg.add_entry()
        url = obj.get('url')
        id = obj.get('id') or url
        item.id(id)
        item.link(href=url)
        item.guid(url, permalink=True)

        # title (required)
        title = (obj.get('title') or obj.get('displayName')
                 or util.ellipsize(obj.get('content', '-')))
        # strip HTML tags
        title = util.parse_html(title).get_text('').strip()
        item.title(title)

        content = microformats2.render_content(obj,
                                               include_location=True,
                                               render_attachments=True,
                                               render_image=True)
        if not content:
            content = obj.get('summary')
        if content:
            item.content(content, type='CDATA')

        categories = [
            {
                'term': t['displayName']
            } for t in obj.get('tags', [])
            if t.get('displayName') and t.get('verb') not in ('like', 'react',
                                                              'share')
            and t.get('objectType') not in ('article', 'person', 'mention')
        ]
        item.category(categories)

        author = obj.get('author', {})
        author = {
            'name': author.get('displayName') or author.get('username'),
            'uri': author.get('url'),
            'email': author.get('email') or '-',
        }
        item.author(author)

        published = obj.get('published') or obj.get('updated')
        if published and isinstance(published, str):
            try:
                dt = mf2util.parse_datetime(published)
                if not isinstance(dt, datetime):
                    dt = datetime.combine(dt, time.min)
                if not dt.tzinfo:
                    dt = dt.replace(tzinfo=util.UTC)
                item.published(dt)
                if not latest or dt > latest:
                    latest = dt
            except ValueError:  # bad datetime string
                pass

        item_has_enclosure = False
        for att in obj.get('attachments', []):
            stream = util.get_first(att, 'stream') or att
            if not stream:
                continue

            url = stream.get('url') or ''
            mime = mimetypes.guess_type(url)[0] or ''
            if (att.get('objectType') in ENCLOSURE_TYPES
                    or mime and mime.split('/')[0] in ENCLOSURE_TYPES):
                if item_has_enclosure:
                    logging.info(
                        'Warning: item %s already has an RSS enclosure, skipping additional enclosure %s',
                        id, url)
                    continue

                item_has_enclosure = feed_has_enclosure = True
                item.enclosure(url=url,
                               type=mime,
                               length=str(stream.get('size', '')))
                item.load_extension('podcast')
                duration = stream.get('duration')
                if duration:
                    item.podcast.itunes_duration(duration)

    if feed_has_enclosure:
        fg.load_extension('podcast')
        fg.podcast.itunes_author(
            actor.get('displayName') or actor.get('username'))
        if summary:
            fg.podcast.itunes_summary(summary)
        fg.podcast.itunes_explicit('no')
        fg.podcast.itunes_block(False)
        name = author.get('name')
        if name:
            fg.podcast.itunes_author(name)
        if image:
            fg.podcast.itunes_image(image)
        fg.podcast.itunes_category(categories)

    if latest:
        fg.lastBuildDate(latest)

    return fg.rss_str(pretty=True).decode('utf-8')
コード例 #49
0
    def _construct_feeds(self) -> Dict[str, Dict[str, str]]:
        """
        Takes the current content and returns a constructed dictionary
        of atom-formatted feeds. This method should only be
        called by the background thread.

        :return: A dictionary with string keys, one for each board
                 command and one for ``master``. The values are
                 XML-formated feeds.
        """
        def id_generator(name, ts):
            return ('tag:{feed_domain},{date}:{name}'.format(
                feed_domain=self.feed_domain,
                date=datetime.fromtimestamp(ts).strftime('%Y-%m-%d'),
                name=name))

        def translate_content_to_xhtml(content):
            """Try to render a board post as faithfully as possible in xhtml."""
            # Unfortunately most readers I find strip the style attribute so we'll probably have to work on this.
            return '<p style="white-space:pre-wrap;">{}</p>'.format(
                escape(content).replace('\n', '<br />'))

        # TODO(hyena): It would be more useful if these links were absolute.
        # Consider adding that if we ever make the web-app aware of its own
        # url.

        new_feeds = {}
        master_feedgen = FeedGenerator()
        master_feedgen.title("SpinDizzy Boards Master")
        master_feedgen.link({'href': '/sdb/atom', 'rel': 'self'})
        master_feedgen.description("All posts as scraped from SpinDizzy")
        master_feedgen.id(id_generator('master', 0))

        master_entry_list = []
        for board_command in self.current_content:
            board_feedgen = FeedGenerator()
            board_feedgen.title("SpinDizzy {}".format(
                self.board_names[board_command]))
            board_feedgen.link({
                'href': '/sdb/{}/atom'.format(board_command),
                'rel': 'self'
            })
            board_feedgen.description("Posts scraped from {}".format(
                self.board_names[board_command]))
            board_feedgen.id(id_generator(board_command, 0))
            for post in sorted(self.current_content[board_command].values(),
                               key=lambda p: -p['time']):
                entry = board_feedgen.add_entry()
                entry.title(post['title'])
                # RSS insists on an email which is annoying.
                entry.author({'name': post['owner_name']})
                entry.updated(datetime.fromtimestamp(post['time'], tz=self.tz))
                entry.link({
                    'href':
                    '/sdb/{}/{}'.format(board_command, post['time']),
                    'rel':
                    'alternate'
                })
                entry.content(translate_content_to_xhtml(post['content']),
                              type='xhtml')
                entry.id(
                    id_generator(name='/sdb/{}/{}'.format(
                        board_command, post['time']),
                                 ts=post['time']))
                master_entry_list.append(entry)
            new_feeds[board_command] = board_feedgen.atom_str(pretty=True)

        # Add the entries to the master feed in the right order.
        for entry in sorted(master_entry_list,
                            key=lambda e: -e.updated().timestamp()):
            master_feedgen.add_entry(feedEntry=entry)
        new_feeds['master'] = master_feedgen.atom_str(pretty=True)

        return new_feeds
コード例 #50
0
ファイル: podshrinker.py プロジェクト: cstorey/podshrinker
def feed(uri, verif):
    uri = base64.urlsafe_b64decode(uri.encode('utf8'))
    verif = base64.urlsafe_b64decode(verif.encode('utf8'))
    mac = hmac.new(HMAC_KEY, uri, digestmod=pyblake2.blake2s).digest()
    if not hmac.compare_digest(verif, mac):
        abort(403)

    uri = uri.decode('utf8')
    verify_uri(uri)

    cachefile = pathfor(uri, '.picklejson', FEED_DIR)
    modified = etag = None
    cached = None
    if os.path.isfile(cachefile):
        try:
            with open(cachefile, 'rb') as f:
                cached = jsonpickle.decode(f.read())
                app.logger.debug("Loaded cache from cachefile:%r", cachefile)
                etag = cached.etag if 'etag' in cached else None
                modified = cached.modified if 'modified' in cached else None
        except Exception as e:
            app.logger.warn("Could not load cache:%r", e)

    app.logger.debug("Parse feed: %r; etag:%r; modified:%r", uri, etag,
                     modified)
    parsed = feedparser.parse(uri, etag=etag, modified=modified)

    app.logger.debug("Parsed feed: %r; %r", uri, 'status' in parsed
                     and parsed.status)
    if parsed.status < 200 or parsed.status >= 400:
        app.logger.warn("Non okay status code, 404?")
        abort(404)

    if cached and not parsed.entries:
        parsed = cached

    def save_to_cache():
        with tempfile.NamedTemporaryFile(delete=False, dir=FEED_DIR,
                                         mode='w') as f:
            encoded = jsonpickle.encode(parsed)
            f.write(encoded)
            f.flush()
            os.rename(f.name, cachefile)
            os.chmod(cachefile, 0o644)
            app.logger.debug("Saved cache to cachefile:%r", cachefile)

    def done(fut):
        try:
            fut.result()
        except Exception:
            app.logger.exception("Error saving feed cache")

    pool.submit(save_to_cache).add_done_callback(done)

    feed = FeedGenerator()
    feed.id(uri)
    feed.title(parsed.feed.get('title', None) or '???')
    feed.link(href=parsed.feed.get('link', None) or 'about:blank')
    feed.description(parsed.feed.get('description', None) or '???')
    if 'image' in parsed.feed and 'href' in parsed.feed.image:
        feed.image(parsed.feed.image.href)

    for e in parsed.entries:
        try:
            entry = feed.add_entry(order='append')
            id = e.id if 'id' in e else None

            for l in (e.links if 'links' in e else []):
                if l.rel == 'enclosure' and 'href' in l:
                    if not id:
                        id = l.href
                    storename = transcoded_href(l.href)
                    entry.enclosure(urljoin(request.url, storename),
                                    l.get('size', None),
                                    l.get('type', OPUS_TYPE))
                elif l.rel == 'alternate' and 'href' in l:
                    entry.link(**l)

            for c in (e.content if 'content' in e else []):
                if 'type' in c and c.type.startswith('text/html'):
                    entry.content(content=c.value, type='html')
                else:
                    entry.content(content=c.value, type='text')

            entry.id(id)
            entry.title(e.get('title', None) or '???')
            entry.description(e.get('description', None) or '???')
            if 'updated_parsed' in e and e.updated_parsed:
                entry.updated(
                    datetime.fromtimestamp(mktime(e.updated_parsed), pytz.UTC))
            if 'published_parsed' in e and e.published_parsed:
                entry.published(
                    datetime.fromtimestamp(mktime(e.published_parsed),
                                           pytz.UTC))
        finally:
            pass

    try:
        resp = make_response(feed.rss_str(pretty=True))
        resp.headers['content-type'] = 'application/xml'
        return resp
    except BaseException as e:
        raise e
コード例 #51
0
ファイル: podtube.py プロジェクト: kaesi0/PodTube
 def get(self, playlist):
     playlist = playlist.split('/')
     if len(playlist) < 2:
         playlist.append('video')
     playlist_name = '/'.join(playlist)
     self.set_header('Content-type', 'application/rss+xml')
     if playlist_name in playlist_feed and playlist_feed[playlist_name][
             'expire'] > datetime.datetime.now():
         self.write(playlist_feed[playlist_name]['feed'])
         self.finish()
         return
     calls = 0
     payload = {'part': 'snippet', 'id': playlist[0], 'key': key}
     request = requests.get(
         'https://www.googleapis.com/youtube/v3/playlists', params=payload)
     calls += 1
     response = request.json()
     if request.status_code == 200:
         logging.debug('Downloaded Playlist Information')
     else:
         logging.error('Error Downloading Playlist: %s', request.reason)
         self.send_error(reason='Error Downloading Playlist')
         return
     fg = FeedGenerator()
     fg.load_extension('podcast')
     fg.generator('PodTube (python-feedgen)', __version__,
                  'https://github.com/aquacash5/PodTube')
     snippet = response['items'][0]['snippet']
     icon = max(snippet['thumbnails'],
                key=lambda x: snippet['thumbnails'][x]['width'])
     logging.info('Playlist: %s (%s)', playlist[0], snippet['title'])
     fg.title(snippet['title'])
     fg.id('http://' + self.request.host + self.request.uri)
     fg.description(snippet['description'] or ' ')
     fg.author(name='Podtube',
               email='*****@*****.**',
               uri='https://github.com/aquacash5/PodTube')
     fg.podcast.itunes_author(snippet['channelTitle'])
     fg.image(snippet['thumbnails'][icon]['url'])
     fg.link(href=f'http://youtube.com/playlist/?list={playlist}',
             rel='self')
     fg.language('en-US')
     fg.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
     fg.podcast.itunes_explicit('no')
     fg.podcast.itunes_owner(name='Podtube',
                             email='*****@*****.**')
     fg.podcast.itunes_summary(snippet['description'])
     fg.podcast.itunes_category(cat='Technology')
     fg.updated(str(datetime.datetime.utcnow()) + 'Z')
     video = None
     response = {'nextPageToken': ''}
     while 'nextPageToken' in response.keys():
         payload = {
             'part': 'snippet',
             'maxResults': 50,
             'playlistId': playlist[0],
             'key': key,
             'pageToken': response['nextPageToken']
         }
         request = requests.get(
             'https://www.googleapis.com/youtube/v3/playlistItems',
             params=payload)
         calls += 1
         response = request.json()
         if request.status_code == 200:
             logging.debug('Downloaded Playlist Information')
         else:
             logging.error('Error Downloading Playlist: %s', request.reason)
             self.send_error(reason='Error Downloading Playlist Items')
             return
         for item in response['items']:
             snippet = item['snippet']
             current_video = snippet['resourceId']['videoId']
             if 'Private' in snippet['title']:
                 continue
             logging.debug('PlaylistVideo: %s (%s)', current_video,
                           snippet['title'])
             fe = fg.add_entry()
             fe.title(snippet['title'])
             fe.id(current_video)
             icon = max(snippet['thumbnails'],
                        key=lambda x: snippet['thumbnails'][x]['width'])
             fe.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
             fe.updated(snippet['publishedAt'])
             if playlist[1] == 'video':
                 fe.enclosure(
                     url=f'http://{self.request.host}/video/{current_video}',
                     type="video/mp4")
             elif playlist[1] == 'audio':
                 fe.enclosure(
                     url=f'http://{self.request.host}/audio/{current_video}',
                     type="audio/mpeg")
             fe.author(name=snippet['channelTitle'])
             fe.podcast.itunes_author(snippet['channelTitle'])
             fe.pubDate(snippet['publishedAt'])
             fe.link(href=f'http://www.youtube.com/watch?v={current_video}',
                     title=snippet['title'])
             fe.podcast.itunes_summary(snippet['description'])
             fe.description(snippet['description'])
             if not video or video['expire'] < fe.pubDate():
                 video = {'video': fe.id(), 'expire': fe.pubDate()}
     feed = {
         'feed': fg.rss_str(),
         'expire': datetime.datetime.now() + datetime.timedelta(hours=calls)
     }
     playlist_feed[playlist_name] = feed
     self.write(feed['feed'])
     self.finish()
     video = video['video']
     mp3_file = 'audio/{}.mp3'.format(video)
     if playlist[1] == 'audio' and not os.path.exists(
             mp3_file) and video not in conversion_queue.keys():
         conversion_queue[video] = {
             'status': False,
             'added': datetime.datetime.now()
         }
コード例 #52
0
ファイル: __init__.py プロジェクト: simonw/datasette-atom
def render_atom(datasette, request, sql, columns, rows, database, table,
                query_name, view_name, data):
    from datasette.views.base import DatasetteError

    if not REQUIRED_COLUMNS.issubset(columns):
        raise DatasetteError(
            "SQL query must return columns {}".format(
                ", ".join(REQUIRED_COLUMNS)),
            status=400,
        )
    fg = FeedGenerator()
    fg.generator(
        generator="Datasette",
        version=__version__,
        uri="https://github.com/simonw/datasette",
    )
    fg.id(request.url)
    fg.link(href=request.url, rel="self")
    fg.updated(max(row["atom_updated"] for row in rows))
    title = request.args.get("_feed_title", sql)
    if table:
        title += "/" + table
    if data.get("human_description_en"):
        title += ": " + data["human_description_en"]
    # If this is a canned query the configured title for that over-rides all others
    if query_name:
        try:
            title = datasette.metadata(
                database=database)["queries"][query_name]["title"]
        except (KeyError, TypeError):
            pass
    fg.title(title)

    clean_function = clean
    if query_name:
        # Check allow_unsafe_html_in_canned_queries
        plugin_config = datasette.plugin_config("datasette-atom")
        if plugin_config:
            allow_unsafe_html_in_canned_queries = plugin_config.get(
                "allow_unsafe_html_in_canned_queries")
            if allow_unsafe_html_in_canned_queries is True:
                clean_function = lambda s: s
            elif isinstance(allow_unsafe_html_in_canned_queries, dict):
                allowlist = allow_unsafe_html_in_canned_queries.get(
                    database) or []
                if query_name in allowlist:
                    clean_function = lambda s: s

    # And the rows
    for row in reversed(rows):
        entry = fg.add_entry()
        entry.id(str(row["atom_id"]))
        if "atom_content_html" in columns:
            entry.content(clean_function(row["atom_content_html"]),
                          type="html")
        elif "atom_content" in columns:
            entry.content(row["atom_content"], type="text")
        entry.updated(row["atom_updated"])
        entry.title(str(row["atom_title"]))
        # atom_link is optional
        if "atom_link" in columns:
            entry.link(href=row["atom_link"])
        if "atom_author_name" in columns and row["atom_author_name"]:
            author = {
                "name": row["atom_author_name"],
            }
            for key in ("uri", "email"):
                colname = "atom_author_{}".format(key)
                if colname in columns and row[colname]:
                    author[key] = row[colname]
            entry.author(author)

    return Response(
        fg.atom_str(pretty=True),
        content_type="application/xml; charset=utf-8",
        status=200,
    )
コード例 #53
0
ファイル: files.py プロジェクト: ttrefren/gensite
def gensite(rootdir):
    """ reads the site config, loads the template, and processes each file it finds """
    site_config = siteconfig.SiteConfig(rootdir)

    template = GenSiteTemplate(os.path.join(rootdir, site_config.template))
    destdir = os.path.join(rootdir, site_config.destination_dir)
    sourcedir = os.path.join(rootdir, site_config.source_dir)

    files = gather_source_files(sourcedir, [".md"], site_config)

    articles, unpublished_articles = get_articles(files)

    files_to_be_regenerated = [
        x for x in articles if needs_to_be_regenerated(destdir, x)
    ]
    print("Will generate ", str(len(files_to_be_regenerated)), "files")

    article_menu = generate_navigation_header(site_config)

    for f in files_to_be_regenerated:
        extra_article_mustache_tags = {"article_menu": article_menu}
        template.process_source_file(
            f,
            destdir,
            site_config,
            additional_mustache_tags=extra_article_mustache_tags)

    static_pages = [
        e for e in files
        if (e.template_type() == "static_page" and e.publish() == True)
    ]
    static_pages_to_be_regenerated = [
        x for x in static_pages if needs_to_be_regenerated(destdir, x)
    ]

    if (len(static_pages_to_be_regenerated) != 0):
        print("Will generate ", str(len(static_pages_to_be_regenerated)),
              " static pages")

    for f in static_pages_to_be_regenerated:
        extra_article_mustache_tags = {"article_menu": article_menu}
        template.process_source_file(
            f,
            destdir,
            site_config,
            additional_mustache_tags=extra_article_mustache_tags)

    template.copy_template_files(destdir)
    """ generate feed """
    fg = FeedGenerator()
    fg.id(site_config.blog_name)
    fg.language("en")
    fg.title(site_config.blog_name)
    fg.link(href=site_config.root_url, rel='alternate')
    fg.description(site_config.blog_description)

    for entry in articles:
        dest_file_name = entry.dest_file_name()
        fe = fg.add_entry()
        link = site_config.root_url + dest_file_name
        fe.id(link)
        fe.title(entry.title())
        fe.link(link={"href": link})

        if (entry.summary == ""):
            fe.summary(entry.title())
        else:
            fe.summary(entry.summary)

        date = datetime.datetime.fromtimestamp(
            time.mktime(entry.original_date), UTC())
        fe.published(date)
        fe.updated(date)

    fg.rss_file(os.path.join(destdir, 'rss.xml'), pretty=True)
    fg.atom_file(os.path.join(destdir, 'atom.xml'), pretty=True)

    index_element = template.generate_index(articles)
    index = [e for e in files if e.template_type() == "index"][0]
    i = str(lxml.etree.tostring(index_element, pretty_print=True), "utf-8")
    template.process_source_file(index,
                                 destdir,
                                 site_config,
                                 additional_mustache_tags={"index_content": i},
                                 force_write=True)
    """ tag cloud stuff """
    tag_cloud_template = [
        e for e in files if e.template_type() == "tag_cloud"
    ][0]
    print(tag_cloud_template)
    tag_cloud_json = json.dumps(build_tagging_data(site_config, articles),
                                indent=2,
                                sort_keys=True)
    template.process_source_file(tag_cloud_template,
                                 destdir,
                                 site_config,
                                 additional_mustache_tags={
                                     "tag_json": tag_cloud_json,
                                     "article_menu": article_menu
                                 },
                                 force_write=True)
    """ copy static files """
    static_files = get_files_in_dir(sourcedir)
    num_static_files = 0
    for s in static_files:
        t = os.path.join(sourcedir, s)
        if (os.path.splitext(s)[1] == ".md"):
            continue
        if (s == "config.js"):
            continue
        f = FileDef(os.path.join(sourcedir, s),
                    cache=False,
                    relative_path=os.path.split(s)[0])
        if f.copy_if_required(destdir):
            num_static_files += 1
    print("Copied " + str(num_static_files) + " static files")
    if (len(unpublished_articles) != 0):
        print(
            "The following files are marked as unpublished and were not processed: "
        )
        for u in unpublished_articles:
            print("  ", u.file_name)
コード例 #54
0
async def playlist(request, playlist_id, return_type='video'):
    log.info(f'Playlist: {playlist_id}')
    playlist_name = f'{playlist_id}/{return_type}'
    if playlist_name in playlist_feed and playlist_feed[playlist_name][
            'expire'] > datetime.now():
        return raw(playlist_feed[playlist_name]['feed'],
                   content_type='application/rss+xml')
    calls = 0
    payload = {'part': 'snippet', 'id': playlist_id, 'key': KEY}
    log.debug('Downloaded Playlist Information')
    response = json.loads(await get(
        'https://www.googleapis.com/youtube/v3/playlists', params=payload))
    calls += 1
    fg = FeedGenerator()
    fg.load_extension('podcast')
    fg.generator('PodTube', __version__,
                 'https://github.com/aquacash5/PodTube')
    snippet = response['items'][0]['snippet']
    icon = max(snippet['thumbnails'],
               key=lambda x: snippet['thumbnails'][x]['width'])
    fg.title(snippet['title'])
    fg.id(f'http://{request.headers["host"]}{request.url}')
    fg.description(snippet['description'] or ' ')
    fg.author(name=snippet['channelTitle'])
    fg.image(snippet['thumbnails'][icon]['url'])
    fg.link(href=f'https://www.youtube.com/playlist?list={playlist_id}')
    fg.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
    fg.podcast.itunes_summary(snippet['description'])
    fg.podcast.itunes_category('Technology', 'Podcasting')
    fg.updated(f'{str(datetime.utcnow())}Z')
    response = {'nextPageToken': ''}
    while 'nextPageToken' in response.keys():
        payload = {
            'part': 'snippet',
            'maxResults': 50,
            'playlistId': playlist_id,
            'key': KEY,
            'pageToken': response['nextPageToken']
        }
        response = json.loads(await get(
            'https://www.googleapis.com/youtube/v3/playlistItems',
            params=payload))
        calls += 1
        for item in response['items']:
            snippet = item['snippet']
            current_video = snippet['resourceId']['videoId']
            if 'Private' in snippet['title']:
                continue
            log.debug(f'PlaylistVideo: {current_video} {snippet["title"]}')
            fe = fg.add_entry()
            fe.title(snippet['title'])
            fe.id(current_video)
            icon = max(snippet['thumbnails'],
                       key=lambda x: snippet['thumbnails'][x]['width'])
            fe.podcast.itunes_image(snippet['thumbnails'][icon]['url'])
            fe.updated(snippet['publishedAt'])
            if return_type == 'audio':
                fe.enclosure(
                    url=
                    f'http://{request.headers["host"]}/audio/{current_video}',
                    type="audio/mpeg")
            else:
                fe.enclosure(
                    url=
                    f'http://{request.headers["host"]}/video/{current_video}',
                    type="video/mp4")
            fe.author(name=snippet['channelTitle'])
            fe.podcast.itunes_author(snippet['channelTitle'])
            fe.podcast.itunes_author(snippet['channelTitle'])
            fe.pubdate(snippet['publishedAt'])
            fe.link(href='http://www.youtube.com/watch?v=' + current_video,
                    title=snippet['title'])
            fe.podcast.itunes_summary(snippet['description'])
            fe.description(snippet['description'])
            await sleep(0)
    feed = {
        'feed': fg.rss_str(),
        'expire': datetime.now() + timedelta(hours=calls)
    }
    playlist_feed[playlist_name] = feed
    return raw(feed['feed'], content_type='application/rss+xml')
コード例 #55
0
ファイル: feed.py プロジェクト: fionn/feed
class Feed:
    def __init__(self,
                 url: str,
                 name: str,
                 email: str,
                 title: str = None,
                 generator: str = None,
                 generator_version: str = None,
                 logo: str = None,
                 icon: str = None,
                 description: str = None,
                 language: str = None) -> None:
        self.name = name
        self.email = email

        self.fg = FeedGenerator()
        self.fg.id(url + "feed.atom")
        self.fg.link(href=url + "feed.xml", rel="self")
        self.fg.link(href=url, rel="alternate")
        self.fg.author(name=name, email=email)
        self.fg.contributor(name=name, email=email)
        self.fg.managingEditor(email)
        self.fg.webMaster(email)

        self.fg.title(title)
        self.fg.generator(generator=generator, version=generator_version)
        self.fg.logo(logo)
        self.fg.icon(icon)
        self.fg.description(description)
        self.fg.language(language)

    def add(self, article: Article) -> None:
        feed_entry = self.fg.add_entry()
        feed_entry.id(article.url)
        feed_entry.title(article.title)
        feed_entry.link(href=article.url)
        feed_entry.guid(guid=article.url, permalink=True)
        feed_entry.author(name=self.name, email=self.email)
        feed_entry.summary(article.description or article.snippet)
        feed_entry.content(content=article.content, type="CDATA")
        feed_entry.published(article.date)
        if article.date:
            feed_entry.published(article.date)
            feed_entry.updated(article.date)
        else:
            epoch = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc)
            feed_entry.published(epoch)
            feed_entry.updated(epoch)

    def add_from_blog(self, url: str) -> None:
        blog = Blog(url)
        if not self.fg.title():
            self.fg.title(blog.title)
        for article in blog.articles:
            self.add(article)

    def atom(self) -> bytes:
        return self.fg.atom_str(pretty=True)

    def rss(self) -> bytes:
        return self.fg.rss_str(pretty=True)

    def atom_file(self, filename: str = "feed.atom") -> None:
        self.fg.atom_file(filename, pretty=True)

    def rss_file(self, filename: str = "feed.xml") -> None:
        self.fg.rss_file(filename, pretty=True)
コード例 #56
0
ファイル: app.py プロジェクト: askvictor/pbsrss
def pbs_show(slug):
    print('processing %s' % slug)

    cache_glob = list(p.glob(slug + "*"))
    if cache_glob:
        recent_cache_path = sorted(cache_glob)[-1]
        cache_time_str = re.search(slug + '.([^\.]+).xml',
                                   recent_cache_path.name).group(1)
        cache_time = datetime.datetime.strptime(cache_time_str,
                                                '%Y-%m-%dT%H:%M:%S')
        print(cache_time_str)
        print(datetime.datetime.now().isoformat())
        if cache_time + datetime.timedelta(days=7) > datetime.datetime.now():
            # cached file is still valid; return that
            return send_file(recent_cache_path.open(),
                             mimetype='application/rss+xml')

    show_url = show_format.format(slug=slug)
    show_info = requests.get(show_url).json()
    show_title = show_info['name']

    feed = FeedGenerator()
    feed.load_extension('podcast')
    feed.podcast.itunes_category('Music')
    feed.id(show_url)
    feed.link(href=show_website_format.format(slug=slug), rel='alternate')
    feed.title(show_title)
    desc = show_info['description']
    presenters = show_info['broadcasters']
    if presenters:
        feed.author(name=presenters)
        feed.description(desc + "Presented by " + presenters + ".")
    else:
        feed.description(desc)

    feed.logo(show_info['profileImageUrl'])
    feed.language('en')

    episodes = requests.get(show_info['episodesRestUrl']).json()
    episode_times = []
    for episode in reversed(episodes):
        start_time = datetime.datetime.strptime(episode['start'],
                                                '%Y-%m-%d %H:%M:%S')
        episode_times.append(start_time)
        title = "{} {}".format(show_title, start_time.date())
        media_url = media_format.format(
            slug=slug, timestamp=start_time.strftime("%Y%m%d%H%M"))

        feed_entry = feed.add_entry()
        feed_entry.id(media_url)
        feed_entry.title(title)
        feed_entry.author(name=presenters)
        feed_entry.enclosure(media_url, 0, 'audio/mp4')
        try:
            ep_data = requests.get(episode['episodeRestUrl']).json()
            tracklist_data = requests.get(ep_data['playlistRestUrl']).json()
            tracklist = "<h3>Tracklist</h3>" + "<br>".join(
                [track['title'] for track in tracklist_data])
            feed_entry.description(tracklist)
        except:
            feed_entry.description(title)
    if episode_times:
        # remove all old cache files for this program
        for cachefile in p.glob(slug + "*"):
            cachefile.unlink()

        recent_ep_time = sorted(episode_times)[-1].isoformat()
        feed.rss_file(CACHE_DIR + "/" + slug + " " + recent_ep_time + ".xml",
                      pretty=True)
    return Response(feed.rss_str(pretty=True), mimetype='application/rss+xml')
コード例 #57
0
ファイル: main.py プロジェクト: eddwebster/wfpi
def generate_json_and_rss(event):
    bsnh = Airtable(at_base_key,
                    "Brazilian shirtname holders",
                    api_key=airtable_api_key).get_all(max_records=1000)
    simple_bsnh_unmapped = {b['id']: b['fields'] for b in bsnh}
    simple_bsnh = map_fields(simple_bsnh_unmapped, {
        'Name': "name",
        'Awarded': "awarded",
        'Awardee': "awardee"
    })

    episodes = Airtable(at_base_key, "Episodes",
                        api_key=airtable_api_key).get_all(max_records=1000)
    simple_episodes_unsorted = {e['id']: e['fields'] for e in episodes}
    simple_episodes_unmapped = {
        k: v
        for (k, v) in sorted(simple_episodes_unsorted.items(),
                             key=lambda x: x[1]['Date'],
                             reverse=True)
    }
    simple_episodes = map_fields(
        simple_episodes_unmapped, {
            'Date': 'd',
            'Podcast in archive': 'a',
            'Show took place': 't',
            'Presenter': 'p',
            'Experts': 'e',
            'Reason for show not airing': 'na',
            'Show particularities': 'sp'
        })
    for f in simple_episodes.values():
        if 'p' in f:
            f['p'] = f.pop('p')[0]

    experts = Airtable(at_base_key, "Experts",
                       api_key=airtable_api_key).get_all()
    simple_experts_unmapped = {
        e['id']: prune_field(e['fields'], "Episodes")
        for e in experts
    }
    simple_experts = map_fields(
        simple_experts_unmapped, {
            'Active': 'active',
            'Bio': 'bio',
            'Brazilian shirtname': 'bsn',
            'Instagram': 'instagram',
            'Name': 'name',
            'Region': 'region',
            'Twitter': 'twitter',
            'Website': 'website'
        })

    presenters = Airtable(at_base_key, "Presenters",
                          api_key=airtable_api_key).get_all()
    simple_presenters_unmapped = {
        p['id']: prune_field(p['fields'], "Episodes")
        for p in presenters
    }
    simple_presenters = map_fields(
        simple_presenters_unmapped, {
            'Name': 'name',
            'Brazilian shirtname': 'bsn',
            'Bio': 'bio',
            'Twitter': 'twitter'
        })

    full_data = {
        'bsnh': simple_bsnh,
        'episodes': simple_episodes,
        'experts': simple_experts,
        'presenters': simple_presenters
    }

    # Generate the RSS feed
    fg = FeedGenerator()
    fg.load_extension('podcast')
    fg.title('World Football Phone In Podcast Archive')
    fg.link(href='http://worldfootballphonein.com', rel='alternate')
    fg.logo('http://worldfootballphonein.com/img/profile.jpg')
    fg.subtitle(
        'Unofficial, fan-curated archive of World Football Phone In show.')
    fg.link(href='http://worldfootballphonein.com/podcasts/rss.xml',
            rel='self')
    fg.language('en')

    for e in simple_episodes.values():
        if 'a' not in e:
            continue

        e_url = f"https:// worldfootballphonein.com/podcasts/{e['d'].replace('-', '')}.mp3"
        fe = fg.add_entry()
        fe.id(e_url)
        fe.title('WFPI episode for ' + e['d'])
        fe.description('Footy talk.')
        fe.enclosure(e_url, 0, 'audio/mpeg')

    db_js = "var wfpiDB=" + json.dumps(full_data, separators=(',', ':')) + ";"

    if "GCP_PROJECT" in os.environ:
        storage_client = storage.Client()
        bucket = storage_client.bucket("wfpi-podcasts-archive")
        db = bucket.blob("db.js")
        db.upload_from_string(db_js)
        rss = bucket.blob("rss.xml")
        rss.upload_from_string(fg.rss_str())
    else:
        with open('db.js', 'w') as outfile:
            outfile.write(db_js)
        fg.rss_file('rss.xml')

    return "OK"
コード例 #58
0
    def index():
        global messages

        limit_tag = request.args.get('tag')

        # Sort by last_changed and add the uuid which is usually the key..
        sorted_watches = []
        for uuid, watch in datastore.data['watching'].items():

            if limit_tag != None:
                # Support for comma separated list of tags.
                for tag_in_watch in watch['tag'].split(','):
                    tag_in_watch = tag_in_watch.strip()
                    if tag_in_watch == limit_tag:
                        watch['uuid'] = uuid
                        sorted_watches.append(watch)

            else:
                watch['uuid'] = uuid
                sorted_watches.append(watch)

        sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True)

        existing_tags = datastore.get_all_tags()
        rss = request.args.get('rss')

        if rss:
            fg = FeedGenerator()
            fg.title('changedetection.io')
            fg.description('Feed description')
            fg.link(href='https://changedetection.io')

            for watch in sorted_watches:
                if not watch['viewed']:
                    fe = fg.add_entry()
                    fe.title(watch['url'])
                    fe.link(href=watch['url'])
                    fe.description(watch['url'])
                    fe.guid(watch['uuid'], permalink=False)
                    dt = datetime.datetime.fromtimestamp(
                        int(watch['newest_history_key']))
                    dt = dt.replace(tzinfo=pytz.UTC)
                    fe.pubDate(dt)

            response = make_response(fg.rss_str())
            response.headers.set('Content-Type', 'application/rss+xml')
            return response

        else:
            output = render_template(
                "watch-overview.html",
                watches=sorted_watches,
                messages=messages,
                tags=existing_tags,
                active_tag=limit_tag,
                has_unviewed=datastore.data['has_unviewed'])

            # Show messages but once.
            messages = []

        return output
コード例 #59
0
def weather(request, latitudelongitude):
	# API
	apiKey = 'db88a36f0e9a8b4b62252702452bab42'
	url = 'https://api.darksky.net/forecast/'
	googleMapsApiKey = 'AIzaSyDg9DQ1jVpiyjJtAhy01KhDOgWgYBy6tOw'

	# parse input
	positionParsed = parseQueryToPosition(latitudelongitude)
	positionObject = Position(positionParsed)

	# build URL call
	callURL = url + apiKey + '/' + positionObject.langitude + ',' + positionObject.longitude

	# Call API
	response = requests.get(callURL)

	# DEBUG: convert to Str type (which is json string) for debug purposes
	#json_string = json.dumps(response.json())
	#print (type(json_string))

	# Convert response string into JSON type
	weatherData = response.json()
	#print (weatherData.keys())

	# Get necessary info
	currentWeather = weatherData['currently']
	temperature = str(int(currentWeather['temperature']))
	summary = currentWeather['summary']

	# Get alert if any
	if 'alerts' in weatherData:
		alerts = weatherData['alerts'][0]
		alertsTitle = alerts['title']
		alertsURI = alerts['uri']
	else:
		alertsTitle = ''
	#print (alertsTitle + ' -- ' + alertsURI)

	# Weather hyperlink
	weatherHyperLink = 'http://forecast.weather.gov/MapClick.php?'
	query = { 'lat' : positionObject.langitude, 'lon' : positionObject.longitude}
	weatherHyperLink = weatherHyperLink + urllib.parse.urlencode(query)

	# Set RSS title
	title = ' | Temperature: ' + temperature + '°F | '+ 'Overall, it is ' + summary
	if alertsTitle != '':
	    title += ' | Watch out! ' + alertsTitle

	# Get place in GPS
	place = getplace(positionObject.langitude, positionObject.longitude, googleMapsApiKey)

	# Build RSS channel
	feedChannel = FeedGenerator()
	feedChannel.id('weather_' + str(time.time()))
	feedChannel.title('WeatherPy - ' + place)
	feedChannel.author( {'name':'Ben','email':'*****@*****.**'} )
	feedChannel.subtitle('Powered by Dark Sky API')
	feedChannel.link( href=weatherHyperLink, rel='self' )
	feedChannel.language('en')

	# Build RSS weather item/entry
	feedEntry = feedChannel.add_entry()
	feedEntry.id('weather_' + str(time.time()))
	feedEntry.title(place + title)
	feedEntry.link( href=weatherHyperLink, rel='self' )

	# Get the RSS feed as string
	rssFeed  = feedChannel.rss_str(pretty=True) 
	#print (rssfeed)

	# return as HTTPresponse
	return HttpResponse(rssFeed, content_type='application/xhtml+xml,application/xml')
コード例 #60
0
)
fg.subtitle(
    'This site documents the Ministry of Justice (MoJ) security policies and guidance.'
)
fg.link(href='https://security-guidance.service.justice.gov.uk/', rel='self')
fg.language('en')
fg.contributor(name='Ministry of Justice',
               email='*****@*****.**')

with open('../changeLog.csv') as csvDataFile:
    csvReader = csv.reader(csvDataFile)
    loopCounter = 0
    entryList = []
    ditaEntries = []
    for row in csvReader:
        entryList.append(fg.add_entry())
        entryList[loopCounter].id("" + siteURL + row[0] + "")
        entryList[loopCounter].title("" + row[1] + "")
        entryList[loopCounter].description("" + row[2] + "")
        entryList[loopCounter].link(href="" + siteURL + row[3] + "")
        entryList[loopCounter].pubDate("" + row[4] + "")
        ditaEntries.insert(
            0, "<dlentry><dt>" + row[4] + " <xref href='" + siteURL + row[3] +
            "' format='html' scope='external'>" + row[1] + "</xref></dt><dd>" +
            row[2] + "</dd></dlentry>")
        loopCounter = loopCounter + 1

atomfeed = fg.atom_str(pretty=True)
rssfeed = fg.rss_str(pretty=True)
fg.atom_file('atom.xml')
fg.rss_file('rss.xml')