Beispiel #1
0
    def update(self, feedback):
        orig_video = feedback.video()

        if orig_video == None or type(orig_video).__name__ != "Video":
            return False
        readable_id = orig_video.readable_id
        query = Video.all()
        query.filter('readable_id =', readable_id)
        # The database currently contains multiple Video objects for a
        # particular video.  Some are old.  Some are due to a YouTube sync
        # where the youtube urls changed and our code was producing youtube_ids
        # that ended with '_player'.  This hack gets the most recent valid
        # Video object.
        key_id = 0
        for v in query:
            if v.key().id() > key_id and not v.youtube_id.endswith('_player'):
                video = v
                key_id = v.key().id()
        # End of hack
        if video is not None and video.key() != orig_video.key():
            logging.info("Retargeting Feedback %s from Video %s to Video %s",
                         feedback.key().id(),
                         orig_video.key().id(),
                         video.key().id())
            feedback.targets[0] = video.key()
            return True
        else:
            return False
Beispiel #2
0
    def get(self, readable_id=""):
        # This method displays a video in the context of a particular topic.
        # To do that we first need to find the appropriate topic.  If we aren't
        # given the topic title in a query param, we need to find a topic that
        # the video is a part of.  That requires finding the video, given it
        # readable_id or, to support old URLs, it's youtube_id.
        video = None
        video_id = self.request.get('v')
        topic_id = self.request_string('topic', default="")
        readable_id = urllib.unquote(readable_id)

        # remove any trailing dashes (see issue 1140)
        readable_id = re.sub('-+$', '', readable_id)

        # If either the readable_id or topic title is missing,
        # redirect to the canonical URL that contains them
        if video_id:  # Support for old links
            query = Video.all()
            query.filter('youtube_id =', video_id)
            video = query.get()

            if not video:
                raise MissingVideoException(
                    "Missing video w/ youtube id '%s'" % video_id)

            readable_id = video.readable_id
            topic = video.first_topic()

            if not topic:
                raise MissingVideoException(
                    "No topic has video w/ youtube id '%s'" % video_id)

        ViewVideo.show_video(self, readable_id, topic_id, True)
Beispiel #3
0
    def get(self, readable_id=""):
        # This method displays a video in the context of a particular topic.
        # To do that we first need to find the appropriate topic.  If we aren't
        # given the topic title in a query param, we need to find a topic that
        # the video is a part of.  That requires finding the video, given it
        # readable_id or, to support old URLs, it's youtube_id.
        video = None
        video_id = self.request.get('v')
        topic_id = self.request_string('topic', default="")
        readable_id = urllib.unquote(readable_id)

        # remove any trailing dashes (see issue 1140)
        readable_id = re.sub('-+$', '', readable_id)

        # If either the readable_id or topic title is missing,
        # redirect to the canonical URL that contains them
        if video_id:  # Support for old links
            query = Video.all()
            query.filter('youtube_id =', video_id)
            video = query.get()

            if not video:
                raise MissingVideoException(
                    "Missing video w/ youtube id '%s'" % video_id)

            readable_id = video.readable_id
            topic = video.first_topic()

            if not topic:
                raise MissingVideoException(
                    "No topic has video w/ youtube id '%s'" % video_id)

        ViewVideo.show_video(self, readable_id, topic_id, True)
Beispiel #4
0
    def update(self, feedback):
        orig_video = feedback.video()

        if orig_video == None or type(orig_video).__name__ != "Video":
            return False
        readable_id = orig_video.readable_id
        query = Video.all()
        query.filter('readable_id =', readable_id)
        # The database currently contains multiple Video objects for a
        # particular video.  Some are old.  Some are due to a YouTube sync
        # where the youtube urls changed and our code was producing youtube_ids
        # that ended with '_player'.  This hack gets the most recent valid
        # Video object.
        key_id = 0
        for v in query:
            if v.key().id() > key_id and not v.youtube_id.endswith('_player'):
                video = v
                key_id = v.key().id()
        # End of hack
        if video is not None and video.key() != orig_video.key():
            logging.info("Retargeting Feedback %s from Video %s to Video %s",
                         feedback.key().id(),
                         orig_video.key().id(),
                         video.key().id())
            feedback.targets[0] = video.key()
            return True
        else:
            return False
Beispiel #5
0
 def test_derive_key_name_from_video(self):
     self._set_responses_xrange(BATCH_SIZE)
     _task_handler('UUID')
     videos = Video.all().fetch(BATCH_SIZE)
     for v in videos:
         key = VideoSubtitles.get_key_name('en', v.youtube_id)
         subs = VideoSubtitles.get_by_key_name(key)
         self.assertIsNotNone(subs)
 def test_derive_key_name_from_video(self):
     self._set_responses_xrange(BATCH_SIZE)
     _task_handler('UUID')
     videos = Video.all().fetch(BATCH_SIZE)
     for v in videos:
         key = VideoSubtitles.get_key_name('en', v.youtube_id)
         subs = VideoSubtitles.get_by_key_name(key)
         self.assertIsNotNone(subs)
Beispiel #7
0
    def update_video(self, video_youtube_id):
        v = Video.all().filter('youtube_id =', video_youtube_id).get()
        if v is not None:
            if v.key() not in self.videos:
                self.videos.append(v.key())
        else:
            logging.info("Youtube ID %s not in datastore" % video_youtube_id)

        return
Beispiel #8
0
    def update_video(self, video_youtube_id):
        v = Video.all().filter('youtube_id =', video_youtube_id).get()
        if v is not None:
            if v.key() not in self.videos:
                self.videos.append(v.key())
        else:
            logging.info("Youtube ID %s not in datastore" % video_youtube_id)

        return
Beispiel #9
0
    def test_process_next_batch_on_nonempty_cursor(self):
        offset = 3

        # these should be skipped, they'll DownloadError
        for i in xrange(0, offset):
            Video(youtube_id=str(i)).put()

        # these should be downloaded
        self._set_responses_xrange(offset, BATCH_SIZE + offset)

        query = Video.all()
        query.fetch(offset)
        cursor = query.cursor()

        _task_handler('UUID', cursor=cursor)
        self.assertEqual(VideoSubtitles.all().count(), BATCH_SIZE)
    def test_process_next_batch_on_nonempty_cursor(self):
        offset = 3

        # these should be skipped, they'll DownloadError
        for i in xrange(0, offset):
            Video(youtube_id=str(i)).put()

        # these should be downloaded
        self._set_responses_xrange(offset, BATCH_SIZE + offset)

        query = Video.all()
        query.fetch(offset)
        cursor = query.cursor()

        _task_handler('UUID', cursor=cursor)
        self.assertEqual(VideoSubtitles.all().count(), BATCH_SIZE)
def youtube_get_video_data_dict(youtube_id):
    yt_service = gdata.youtube.service.YouTubeService()

    # Now that we run these queries from the App Engine servers, we need to 
    # explicitly specify our developer_key to avoid being lumped together w/ rest of GAE and
    # throttled by YouTube's "Too many request" quota
    yt_service.developer_key = "AI39si6ctKTnSR_Vx7o7GpkpeSZAKa6xjbZz6WySzTvKVYRDAO7NHBVwofphk82oP-OSUwIZd0pOJyNuWK8bbOlqzJc9OFozrQ"
    yt_service.client_id = "n/a"

    logging.info("trying to get info for youtube_id: %s" % youtube_id)
    try:
        video = yt_service.GetYouTubeVideoEntry(video_id=youtube_id)
    except:
        video = None
    if video:
        video_data = {"youtube_id" : youtube_id,
                      "title" : video.media.title.text.decode('utf-8'),
                      "url" : video.media.player.url.decode('utf-8'),
                      "duration" : int(video.media.duration.seconds)}

        if video.statistics:
            video_data["views"] = int(video.statistics.view_count)

        video_data["description"] = (video.media.description.text or '').decode('utf-8')
        video_data["keywords"] = (video.media.keywords.text or '').decode('utf-8')

        potential_id = re.sub('[^a-z0-9]', '-', video_data["title"].lower());
        potential_id = re.sub('-+$', '', potential_id)  # remove any trailing dashes (see issue 1140)
        potential_id = re.sub('^-+', '', potential_id)  # remove any leading dashes (see issue 1526)                        

        number_to_add = 0
        current_id = potential_id
        while True:
            query = Video.all()
            query.filter('readable_id=', current_id)
            if (query.get() is None): #id is unique so use it and break out
                video_data["readable_id"] = current_id
                break
            else: # id is not unique so will have to go through loop again
                number_to_add+=1
                current_id = potential_id+'-'+number_to_add                       

        return video_data

    return None
def youtube_get_video_data_dict(youtube_id):
    yt_service = third_party.gdata.youtube.service.YouTubeService()

    # Now that we run these queries from the App Engine servers, we need to 
    # explicitly specify our developer_key to avoid being lumped together w/ rest of GAE and
    # throttled by YouTube's "Too many request" quota
    yt_service.developer_key = "AI39si6ctKTnSR_Vx7o7GpkpeSZAKa6xjbZz6WySzTvKVYRDAO7NHBVwofphk82oP-OSUwIZd0pOJyNuWK8bbOlqzJc9OFozrQ"
    yt_service.client_id = "n/a"

    logging.info("trying to get info for youtube_id: %s" % youtube_id)
    try:
        video = yt_service.GetYouTubeVideoEntry(video_id=youtube_id)
    except:
        video = None
    if video:
        video_data = {"youtube_id" : youtube_id,
                      "title" : video.media.title.text.decode('utf-8'),
                      "url" : video.media.player.url.decode('utf-8'),
                      "duration" : int(video.media.duration.seconds)}

        if video.statistics:
            video_data["views"] = int(video.statistics.view_count)

        video_data["description"] = (video.media.description.text or '').decode('utf-8')
        video_data["keywords"] = (video.media.keywords.text or '').decode('utf-8')

        potential_id = re.sub('[^a-z0-9]', '-', video_data["title"].lower());
        potential_id = re.sub('-+$', '', potential_id)  # remove any trailing dashes (see issue 1140)
        potential_id = re.sub('^-+', '', potential_id)  # remove any leading dashes (see issue 1526)                        

        number_to_add = 0
        current_id = potential_id
        while True:
            query = Video.all()
            query.filter('readable_id=', current_id)
            if (query.get() is None): #id is unique so use it and break out
                video_data["readable_id"] = current_id
                break
            else: # id is not unique so will have to go through loop again
                number_to_add+=1
                current_id = potential_id+'-'+number_to_add                       

        return video_data

    return None
    def updateVideoStats(self):
        yt_service = third_party.gdata.youtube.service.YouTubeService()
        # Now that we run these queries from the App Engine servers, we need to 
        # explicitly specify our developer_key to avoid being lumped together w/ rest of GAE and
        # throttled by YouTube's "Too many request" quota
        yt_service.developer_key = "AI39si6ctKTnSR_Vx7o7GpkpeSZAKa6xjbZz6WySzTvKVYRDAO7NHBVwofphk82oP-OSUwIZd0pOJyNuWK8bbOlqzJc9OFozrQ"
        yt_service.client_id = "n/a"

        videos_to_put = set()

        # doing fetch now otherwise query timesout later while doing youtube requests
        # theoretically we can also change this code to use Mapper class:
        # http://code.google.com/appengine/articles/deferred.html
        for i, video in enumerate(Video.all().fetch(100000)):
            entry = None
            youtube_id = video.youtube_id

            # truncating youtubeid at 11 to handle _DUP_X's
            # handling the _DUPs to make it easier to detect content problems when duration = 0
            if re.search("_DUP_\d*$", youtube_id):
                youtube_id = youtube_id[0:11]

            try:
                entry = yt_service.GetYouTubeVideoEntry(video_id=youtube_id)

            except Exception, e:
                logging.info("Error trying to get %s: %s" % 
                            (youtube_id, e))
            
            if entry:
                count = int(entry.statistics.view_count)
                if count != video.views:
                    logging.info("%i: Updating %s from %i to %i views" % 
                                (i, video.title, video.views, count)) 
                    video.views = count
                    videos_to_put.add(video)
                
                duration = int(entry.media.duration.seconds)
                if duration != video.duration:
                    video.duration = duration
                    videos_to_put.add(video)
    def updateVideoStats(self):
        yt_service = gdata.youtube.service.YouTubeService()
        # Now that we run these queries from the App Engine servers, we need to 
        # explicitly specify our developer_key to avoid being lumped together w/ rest of GAE and
        # throttled by YouTube's "Too many request" quota
        yt_service.developer_key = "AI39si6ctKTnSR_Vx7o7GpkpeSZAKa6xjbZz6WySzTvKVYRDAO7NHBVwofphk82oP-OSUwIZd0pOJyNuWK8bbOlqzJc9OFozrQ"
        yt_service.client_id = "n/a"

        videos_to_put = set()

        # doing fetch now otherwise query timesout later while doing youtube requests
        # theoretically we can also change this code to use Mapper class:
        # http://code.google.com/appengine/articles/deferred.html
        for i, video in enumerate(Video.all().fetch(100000)):
            entry = None
            youtube_id = video.youtube_id

            # truncating youtubeid at 11 to handle _DUP_X's
            # handling the _DUPs to make it easier to detect content problems when duration = 0
            if re.search("_DUP_\d*$", youtube_id):
                youtube_id = youtube_id[0:11]

            try:
                entry = yt_service.GetYouTubeVideoEntry(video_id=youtube_id)

            except Exception, e:
                logging.info("Error trying to get %s: %s" % 
                            (youtube_id, e))
            
            if entry:
                count = int(entry.statistics.view_count)
                if count != video.views:
                    logging.info("%i: Updating %s from %i to %i views" % 
                                (i, video.title, video.views, count)) 
                    video.views = count
                    videos_to_put.add(video)
                
                duration = int(entry.media.duration.seconds)
                if duration != video.duration:
                    video.duration = duration
                    videos_to_put.add(video)
Beispiel #15
0
def _task_handler(uid, task_id=0, cursor=None, report=None):
    """Task chain for fetching subtitles from the Universal Subtitles API

    It processes Video models in batches of BATCH_SIZE by fetching the English
    subtitles via an HTTP API call.

    This job runs regularly so fetch failures are fixed from run-to-run.  Fetch
    failures are logged and suppressed as the task marches on.

    Errors include URL fetch timeouts, subtitles put failures, and response
    decoding failures.

    HTTP redirects indicate that the code needs updating to a new API endpoint.
    They are detected and reported separately.
    """

    query = Video.all()
    query.with_cursor(cursor)
    videos = query.fetch(BATCH_SIZE)

    if report is None:
        report = dict(REPORT_TEMPLATE)
        VideoSubtitlesFetchReport(key_name=uid, **report).put()

    report = download_subtitles(videos, report)

    # Generate a report if there is nothing left to process

    if len(videos) < BATCH_SIZE:
        deferred.defer(_task_report_handler, uid, report,
                       _name='%s_report' % uid, _queue=TASK_QUEUE)
    else:
        next_id = task_id + 1
        cursor = query.cursor()
        deferred.defer(_task_handler, uid, next_id, cursor, report,
                       _name='%s_%s' % (uid, next_id),
                       _queue=TASK_QUEUE,
                       _countdown=DEFER_SECONDS)
Beispiel #16
0
    def get(self):
        from exercises import attempt_problem

        login_user = UserData.current()
        exercises_list = [exercise for exercise in Exercise.all()]
        videos_list = [video for video in Video.all()]

        user_count = self.request_int('users', 5)
        for user_id in xrange(0, user_count):
            # Create a new user
            first_name = random.choice(CreateRandomGoalData.first_names)
            last_name = random.choice(CreateRandomGoalData.last_names)
            nickname = "%s %s" % (first_name, last_name)
            email = 'test_%i@automatedrandomdata' % user_id
            user = users.User(email)

            logging.info("Creating user %s: (%i/%i)"
                % (nickname, user_id + 1, user_count))

            user_data = UserData.get_or_insert(
                key_name="test_user_%i" % user_id,
                user=user,
                current_user=user,
                user_id=str(user_id),
                moderator=False,
                last_login=datetime.now(),
                proficient_exercises=[],
                suggested_exercises=[],
                need_to_reassess=True,
                points=0,
                coaches=[login_user.user_email],
                user_email=email,
                user_nickname=nickname,
                )
            user_data.put()

            # Delete user exercise & video progress
            query = UserExercise.all()
            query.filter('user = '******'user = '******'type': 'GoalObjectiveExerciseProficiency',
                        'exercise': random.choice(exercises_list)})

                for objective in xrange(1, random.randint(2, 4)):
                    obj_descriptors.append({
                        'type': 'GoalObjectiveWatchVideo',
                        'video': random.choice(videos_list)})

                title = first_name + "'s Goal #" + str(goal_idx)
                logging.info("Creating goal " + title)

                objectives = GoalObjective.from_descriptors(obj_descriptors,
                    user_data)
                goal = Goal(parent=user_data, title=title,
                    objectives=objectives)
                user_data.save_goal(goal)

                for objective in obj_descriptors:
                    if objective['type'] == 'GoalObjectiveExerciseProficiency':
                        user_exercise = user_data.get_or_insert_exercise(
                            objective['exercise'])
                        chooser = random.randint(1, 120)
                        if chooser >= 60:
                            continue
                        elif chooser > 15:
                            count = 1
                            hints = 0
                        elif chooser < 7:
                            count = 20
                            hints = 0
                        else:
                            count = 25
                            hints = 1
                        logging.info(
                            "Starting exercise: %s (%i problems, %i hints)" %
                            (objective['exercise'].name, count, hints * count))
                        for i in xrange(1, count):
                            attempt_problem(
                                user_data, user_exercise, i, 1, 'TEST', 'TEST',
                                'TEST', True, hints, 0, False, False, "TEST",
                                '0.0.0.0')

                    elif objective['type'] == 'GoalObjectiveWatchVideo':
                        seconds = random.randint(1, 1200)
                        logging.info("Watching %i seconds of video %s"
                            % (seconds, objective['video'].title))
                        VideoLog.add_entry(user_data, objective['video'],
                            seconds, 0, detect_cheat=False)

        self.response.out.write('OK')
Beispiel #17
0
 def get(self):
     self.response.out.write('<html>')
     videos = Video.all()
     for video in videos:
         self.response.out.write('<P>Title: ' + video.title)
Beispiel #18
0
    def get(self):
        from exercises import attempt_problem

        login_user = UserData.current()
        exercises_list = [exercise for exercise in Exercise.all()]
        videos_list = [video for video in Video.all()]

        user_count = self.request_int('users', 5)
        for user_id in xrange(0, user_count):
            # Create a new user
            first_name = random.choice(CreateRandomGoalData.first_names)
            last_name = random.choice(CreateRandomGoalData.last_names)
            nickname = "%s %s" % (first_name, last_name)
            email = 'test_%i@automatedrandomdata' % user_id
            user = users.User(email)

            logging.info("Creating user %s: (%i/%i)" %
                         (nickname, user_id + 1, user_count))

            user_data = UserData.get_or_insert(
                key_name="test_user_%i" % user_id,
                user=user,
                current_user=user,
                user_id=str(user_id),
                moderator=False,
                last_login=datetime.now(),
                proficient_exercises=[],
                suggested_exercises=[],
                need_to_reassess=True,
                points=0,
                coaches=[login_user.user_email],
                user_email=email,
                user_nickname=nickname,
            )
            user_data.put()

            # Delete user exercise & video progress
            query = UserExercise.all()
            query.filter('user = '******'user = '******'type':
                        'GoalObjectiveExerciseProficiency',
                        'exercise':
                        random.choice(exercises_list)
                    })

                for objective in xrange(1, random.randint(2, 4)):
                    obj_descriptors.append({
                        'type': 'GoalObjectiveWatchVideo',
                        'video': random.choice(videos_list)
                    })

                title = first_name + "'s Goal #" + str(goal_idx)
                logging.info("Creating goal " + title)

                objectives = GoalObjective.from_descriptors(
                    obj_descriptors, user_data)
                goal = Goal(parent=user_data,
                            title=title,
                            objectives=objectives)
                user_data.save_goal(goal)

                for objective in obj_descriptors:
                    if objective['type'] == 'GoalObjectiveExerciseProficiency':
                        user_exercise = user_data.get_or_insert_exercise(
                            objective['exercise'])
                        chooser = random.randint(1, 120)
                        if chooser >= 60:
                            continue
                        elif chooser > 15:
                            count = 1
                            hints = 0
                        elif chooser < 7:
                            count = 20
                            hints = 0
                        else:
                            count = 25
                            hints = 1
                        logging.info(
                            "Starting exercise: %s (%i problems, %i hints)" %
                            (objective['exercise'].name, count, hints * count))
                        for i in xrange(1, count):
                            attempt_problem(user_data, user_exercise, i, 1,
                                            'TEST', 'TEST', 'TEST', True,
                                            hints, 0, False, False, "TEST",
                                            '0.0.0.0')

                    elif objective['type'] == 'GoalObjectiveWatchVideo':
                        seconds = random.randint(1, 1200)
                        logging.info("Watching %i seconds of video %s" %
                                     (seconds, objective['video'].title))
                        VideoLog.add_entry(user_data,
                                           objective['video'],
                                           seconds,
                                           0,
                                           detect_cheat=False)

        self.response.out.write('OK')
Beispiel #19
0
 def get(self):
     return
     db.delete(Video.all(keys_only=True))
Beispiel #20
0
 def get(self):
     self.response.out.write('<html>')
     videos = Video.all()
     for video in videos:
         self.response.out.write('<P>Title: ' + video.title)
Beispiel #21
0
def _task_handler(uid, task_id=0, cursor=None, report=None):
    """Task chain for fetching subtitles from the Universal Subtitles API

    It processes Video models in batches of BATCH_SIZE by fetching the English
    subtitles via an HTTP API call.

    This job runs regularly so fetch failures are fixed from run-to-run.  Fetch
    failures are logged and suppressed as the task marches on.

    Errors include URL fetch timeouts, subtitles put failures, and response
    decoding failures.

    HTTP redirects indicate that the code needs updating to a new API endpoint.
    They are detected and reported separately.
    """

    query = Video.all()
    query.with_cursor(cursor)
    videos = query.fetch(BATCH_SIZE)

    if report is None:
        report = dict(REPORT_TEMPLATE)
        VideoSubtitlesFetchReport(key_name=uid, **report).put()

    # Asynchronously fetch. We'll rate-limit by fetching BATCH_SIZE subtitles
    # at each DEFER_SECONDS interval

    rpcs = []
    for video in videos:
        url = UNISUBS_URL % urllib.quote(YOUTUBE_URL % video.youtube_id)
        rpc = urlfetch.create_rpc(deadline=TIMEOUT_SECONDS)
        urlfetch.make_fetch_call(rpc, url)
        rpcs.append((video.youtube_id, rpc))
        report['fetches'] += 1

    # Process asynchronous fetches

    for youtube_id, rpc in rpcs:
        lang = 'en'
        key_name = VideoSubtitles.get_key_name(lang, youtube_id)
        try:
            resp = rpc.get_result()
            if resp.status_code != 200:
                raise RuntimeError('status code: %s' % resp.status_code)

            if resp.final_url:
                logging.warn('%s redirect to %s' % (key_name, resp.final_url))
                report['redirects'] += 1

            json = resp.content.decode('utf-8')

            # Only update stale records

            current = VideoSubtitles.get_by_key_name(key_name)
            if not current or current.json != json:
                new = VideoSubtitles(key_name=key_name, youtube_id=youtube_id,
                                     language=lang, json=json)
                new.put()
                report['writes'] += 1
            else:
                logging.info('%s content already up-to-date' % key_name)
        except Exception, e:
            logging.error('%s subtitles fetch failed: %s' % (key_name, e))
            report['errors'] += 1
Beispiel #22
0
def _task_handler(uid, task_id=0, cursor=None, report=None):
    """Task chain for fetching subtitles from the Universal Subtitles API

    It processes Video models in batches of BATCH_SIZE by fetching the English
    subtitles via an HTTP API call.

    This job runs regularly so fetch failures are fixed from run-to-run.  Fetch
    failures are logged and suppressed as the task marches on.

    Errors include URL fetch timeouts, subtitles put failures, and response
    decoding failures.

    HTTP redirects indicate that the code needs updating to a new API endpoint.
    They are detected and reported separately.
    """

    query = Video.all()
    query.with_cursor(cursor)
    videos = query.fetch(BATCH_SIZE)

    if report is None:
        report = dict(REPORT_TEMPLATE)
        VideoSubtitlesFetchReport(key_name=uid, **report).put()

    # Asynchronously fetch. We'll rate-limit by fetching BATCH_SIZE subtitles
    # at each DEFER_SECONDS interval

    rpcs = []
    for video in videos:
        url = UNISUBS_URL % urllib.quote(YOUTUBE_URL % video.youtube_id)
        rpc = urlfetch.create_rpc(deadline=TIMEOUT_SECONDS)
        urlfetch.make_fetch_call(rpc, url)
        rpcs.append((video.youtube_id, rpc))
        report['fetches'] += 1

    # Process asynchronous fetches

    for youtube_id, rpc in rpcs:
        lang = 'en'
        key_name = VideoSubtitles.get_key_name(lang, youtube_id)
        try:
            resp = rpc.get_result()
            if resp.status_code != 200:
                raise RuntimeError('status code: %s' % resp.status_code)

            if resp.final_url:
                logging.warn('%s redirect to %s' % (key_name, resp.final_url))
                report['redirects'] += 1

            json = resp.content.decode('utf-8')

            # Only update stale records

            current = VideoSubtitles.get_by_key_name(key_name)
            if not current or current.json != json:
                new = VideoSubtitles(key_name=key_name,
                                     youtube_id=youtube_id,
                                     language=lang,
                                     json=json)
                new.put()
                report['writes'] += 1
            else:
                logging.info('%s content already up-to-date' % key_name)
        except Exception, e:
            logging.error('%s subtitles fetch failed: %s' % (key_name, e))
            report['errors'] += 1