コード例 #1
0
ファイル: commands.py プロジェクト: wonderpl/dolly-web
def get_youtube_video_data(video_qs, start):
    fields = 'atom:entry(batch:status,atom:id,atom:author(name),atom:published,yt:noembed)'
    while True:
        videos = dict((v.source_videoid, v) for v in
                      video_qs.filter(Video.date_updated < start).limit(50))
        if not videos:
            break
        feed_ids = [('videos', id) for id in videos.keys()]
        for entry in batch_query(feed_ids, dict(fields=fields)).entry:
            id = entry.id.text[-11:]
            videos[id].date_updated = datetime.now()
            if entry.batch_status.code == '200':
                videos[id].source_username = entry.author[0].name.text
                videos[id].date_published = _parse_datetime(entry.published.text)
                if 'noembed' in [e.tag for e in entry.extension_elements]:
                    app.logger.info('%s: marked not visible: noembed', id)
                    videos[id].visible = False
            elif entry.batch_status.code == '404':
                app.logger.info('%s: marked not visible: %s', id, entry.batch_status.reason)
                if not videos[id].source_username:
                    videos[id].source_username = '******'
                videos[id].visible = False
            else:
                app.logger.warning('%s: %s', id, entry.batch_status.reason)
                time.sleep(1)
        Video.query.session.commit()
        if len(videos) == 50:
            time.sleep(60)
コード例 #2
0
ファイル: commands.py プロジェクト: wonderpl/dolly-web
def import_google_movies():
    freshold = datetime.now() - timedelta(days=app.config.get('GOOGLE_MOVIE_FRESHOLD', 120))
    year_format = re.compile(' \((20\d\d)\)')

    for channelid, location in app.config['GOOGLE_MOVIE_LOCATIONS']:
        start = 0
        video_ids = set()
        channel = Channel.query.get(channelid)
        existing = set(v for v, in VideoInstance.query.
                       filter_by(channel=channelid).join(Video).values('source_videoid'))
        while True:
            url = app.config['GOOGLE_MOVIE_URL'] % (location, start)
            html = get_external_resource(url).read()
            video_ids.update(re.findall('youtube.com/watch%3Fv%3D(.{11})', html))
            next = re.search('<a [^>]*start=(\d+)[^>]*><img[^>]*><br>Next</a>', html)
            if next:
                start = int(next.group(1))
                time.sleep(1)   # Don't get blocked by google
            else:
                break
        feed_ids = [('videos', id) for id in video_ids - existing]
        if feed_ids:
            playlist = batch_query(feed_ids, playlist='Googlemovietrailers/uploads')
            videos = []
            for video in playlist.videos:
                year_match = year_format.search(video.title)
                if video.date_published > freshold and (
                        not year_match or int(year_match.group(1)) >= freshold.year):
                    videos.append(video)
                else:
                    app.logger.debug('Skipped import of trailer "%s" (%s)',
                                     video.title, video.date_published)
            added = Video.add_videos(videos, 1)
            channel.add_videos(videos)
            app.logger.info('Added %d trailers to "%s"', added, channel.title)