Exemple #1
0
 def test_derive_key_name_from_video(self):
     self._set_responses_xrange(BATCH_SIZE)
     _task_handler('UUID')
     videos = Video.all().fetch(BATCH_SIZE)
     for v in videos:
         key = VideoSubtitles.get_key_name('en', v.youtube_id)
         subs = VideoSubtitles.get_by_key_name(key)
         self.assertIsNotNone(subs)
Exemple #2
0
    def test_assume_utf8_encoded_content(self):
        # Universal Subtitles API returns utf-8
        # u'\xc4\xd0' is unicode for the utf-8 byte string '\xc3\x84\xc3\x90'
        utf8_str = '\xc3\x84\xc3\x90'
        unicode_str = u'\xc4\xd0'

        self._set_responses_xrange(1, content=utf8_str)

        _task_handler('UUID')
        self.assertEqual(VideoSubtitles.all().count(), 1)
        subs = VideoSubtitles.all().get()
        self.assertEqual(subs.json, unicode_str)
Exemple #3
0
    def test_process_next_batch_on_nonempty_cursor(self):
        offset = 3

        # these should be skipped, they'll DownloadError
        for i in xrange(0, offset):
            Video(youtube_id=str(i)).put()

        # these should be downloaded
        self._set_responses_xrange(offset, BATCH_SIZE + offset)

        query = Video.all()
        query.fetch(offset)
        cursor = query.cursor()

        _task_handler('UUID', cursor=cursor)
        self.assertEqual(VideoSubtitles.all().count(), BATCH_SIZE)
Exemple #4
0
    def test_should_not_put_duplicate_subtitles(self, info):
        self._set_responses_xrange(BATCH_SIZE, content="some json")

        # first fetch
        _task_handler('UUID', 0)
        self.assertEqual(VideoSubtitles.all().count(), BATCH_SIZE)
        self.assertEqual(info.call_count, 0)

        with patch('unisubs.VideoSubtitles') as MockVideoSubtitles:
            MockVideoSubtitles.get_key_name = VideoSubtitles.get_key_name
            MockVideoSubtitles.get_by_key_name = VideoSubtitles.get_by_key_name
            # second fetch, same content
            _task_handler('UUID', 1)
            self.assertEqual(MockVideoSubtitles.return_value.put.call_count, 0,
                             'duplicate subtitles should not be put()')
            self.assertEqual(info.call_count, BATCH_SIZE,
                             'skipped put should be logged')
Exemple #5
0
def _task_handler(uid, task_id=0, cursor=None, report=None):
    """Task chain for fetching subtitles from the Universal Subtitles API

    It processes Video models in batches of BATCH_SIZE by fetching the English
    subtitles via an HTTP API call.

    This job runs regularly so fetch failures are fixed from run-to-run.  Fetch
    failures are logged and suppressed as the task marches on.

    Errors include URL fetch timeouts, subtitles put failures, and response
    decoding failures.

    HTTP redirects indicate that the code needs updating to a new API endpoint.
    They are detected and reported separately.
    """

    query = Video.all()
    query.with_cursor(cursor)
    videos = query.fetch(BATCH_SIZE)

    if report is None:
        report = dict(REPORT_TEMPLATE)
        VideoSubtitlesFetchReport(key_name=uid, **report).put()

    # Asynchronously fetch. We'll rate-limit by fetching BATCH_SIZE subtitles
    # at each DEFER_SECONDS interval

    rpcs = []
    for video in videos:
        url = UNISUBS_URL % urllib.quote(YOUTUBE_URL % video.youtube_id)
        rpc = urlfetch.create_rpc(deadline=TIMEOUT_SECONDS)
        urlfetch.make_fetch_call(rpc, url)
        rpcs.append((video.youtube_id, rpc))
        report['fetches'] += 1

    # Process asynchronous fetches

    for youtube_id, rpc in rpcs:
        lang = 'en'
        key_name = VideoSubtitles.get_key_name(lang, youtube_id)
        try:
            resp = rpc.get_result()
            if resp.status_code != 200:
                raise RuntimeError('status code: %s' % resp.status_code)

            if resp.final_url:
                logging.warn('%s redirect to %s' % (key_name, resp.final_url))
                report['redirects'] += 1

            json = resp.content.decode('utf-8')

            # Only update stale records

            current = VideoSubtitles.get_by_key_name(key_name)
            if not current or current.json != json:
                new = VideoSubtitles(key_name=key_name,
                                     youtube_id=youtube_id,
                                     language=lang,
                                     json=json)
                new.put()
                report['writes'] += 1
            else:
                logging.info('%s content already up-to-date' % key_name)
        except Exception, e:
            logging.error('%s subtitles fetch failed: %s' % (key_name, e))
            report['errors'] += 1
Exemple #6
0
def _task_handler(uid, task_id=0, cursor=None, report=None):
    """Task chain for fetching subtitles from the Universal Subtitles API

    It processes Video models in batches of BATCH_SIZE by fetching the English
    subtitles via an HTTP API call.

    This job runs regularly so fetch failures are fixed from run-to-run.  Fetch
    failures are logged and suppressed as the task marches on.

    Errors include URL fetch timeouts, subtitles put failures, and response
    decoding failures.

    HTTP redirects indicate that the code needs updating to a new API endpoint.
    They are detected and reported separately.
    """

    query = Video.all()
    query.with_cursor(cursor)
    videos = query.fetch(BATCH_SIZE)

    if report is None:
        report = dict(REPORT_TEMPLATE)
        VideoSubtitlesFetchReport(key_name=uid, **report).put()

    # Asynchronously fetch. We'll rate-limit by fetching BATCH_SIZE subtitles
    # at each DEFER_SECONDS interval

    rpcs = []
    for video in videos:
        url = UNISUBS_URL % urllib.quote(YOUTUBE_URL % video.youtube_id)
        rpc = urlfetch.create_rpc(deadline=TIMEOUT_SECONDS)
        urlfetch.make_fetch_call(rpc, url)
        rpcs.append((video.youtube_id, rpc))
        report['fetches'] += 1

    # Process asynchronous fetches

    for youtube_id, rpc in rpcs:
        lang = 'en'
        key_name = VideoSubtitles.get_key_name(lang, youtube_id)
        try:
            resp = rpc.get_result()
            if resp.status_code != 200:
                raise RuntimeError('status code: %s' % resp.status_code)

            if resp.final_url:
                logging.warn('%s redirect to %s' % (key_name, resp.final_url))
                report['redirects'] += 1

            json = resp.content.decode('utf-8')

            # Only update stale records

            current = VideoSubtitles.get_by_key_name(key_name)
            if not current or current.json != json:
                new = VideoSubtitles(key_name=key_name, youtube_id=youtube_id,
                                     language=lang, json=json)
                new.put()
                report['writes'] += 1
            else:
                logging.info('%s content already up-to-date' % key_name)
        except Exception, e:
            logging.error('%s subtitles fetch failed: %s' % (key_name, e))
            report['errors'] += 1
Exemple #7
0
    def get(self, readable_id=""):

        # This method displays a video in the context of a particular topic.
        # To do that we first need to find the appropriate topic.  If we aren't
        # given the topic title in a query param, we need to find a topic that
        # the video is a part of.  That requires finding the video, given it readable_id
        # or, to support old URLs, it's youtube_id.
        video = None
        topic = None
        video_id = self.request.get('v')
        topic_id = self.request_string('topic', default="")
        readable_id = urllib.unquote(readable_id).decode("utf-8")
        readable_id = re.sub(
            '-+$', '',
            readable_id)  # remove any trailing dashes (see issue 1140)

        # If either the readable_id or topic title is missing,
        # redirect to the canonical URL that contains them
        redirect_to_canonical_url = False
        if video_id:  # Support for old links
            query = Video.all()
            query.filter('youtube_id =', video_id)
            video = query.get()

            if not video:
                raise MissingVideoException(
                    "Missing video w/ youtube id '%s'" % video_id)

            readable_id = video.readable_id
            topic = video.first_topic()

            if not topic:
                raise MissingVideoException(
                    "No topic has video w/ youtube id '%s'" % video_id)

            redirect_to_canonical_url = True

        if topic_id is not None and len(topic_id) > 0:
            topic = Topic.get_by_id(topic_id)
            key_id = 0 if not topic else topic.key().id()

        # If a topic_id wasn't specified or the specified topic wasn't found
        # use the first topic for the requested video.
        if topic is None:
            # Get video by readable_id just to get the first topic for the video
            video = Video.get_for_readable_id(readable_id)
            if video is None:
                raise MissingVideoException("Missing video '%s'" % readable_id)

            topic = video.first_topic()
            if not topic:
                raise MissingVideoException("No topic has video '%s'" %
                                            readable_id)

            redirect_to_canonical_url = True

        exid = self.request_string('exid', default=None)

        if redirect_to_canonical_url:
            qs = {'topic': topic.id}
            if exid:
                qs['exid'] = exid

            urlpath = "/video/%s" % urllib.quote(readable_id)
            url = urlparse.urlunparse(
                ('', '', urlpath, '', urllib.urlencode(qs), ''))
            self.redirect(url, True)
            return

        # If we got here, we have a readable_id and a topic, so we can display
        # the topic and the video in it that has the readable_id.  Note that we don't
        # query the Video entities for one with the requested readable_id because in some
        # cases there are multiple Video objects in the datastore with the same readable_id
        # (e.g. there are 2 "Order of Operations" videos).
        videos = Topic.get_cached_videos_for_topic(topic)
        previous_video = None
        next_video = None
        for v in videos:
            if v.readable_id == readable_id:
                v.selected = 'selected'
                video = v
            elif video is None:
                previous_video = v
            else:
                next_video = v
                break

        # If we're at the beginning or end of a topic, show the adjacent topic.
        # previous_topic/next_topic are the topic to display.
        # previous_video_topic/next_video_topic are the subtopics the videos
        # are actually in.
        previous_topic = None
        previous_video_topic = None
        next_topic = None
        next_video_topic = None

        if not previous_video:
            previous_topic = topic
            while not previous_video:
                previous_topic = previous_topic.get_previous_topic()
                if previous_topic:
                    (previous_video, previous_video_topic
                     ) = previous_topic.get_last_video_and_topic()
                else:
                    break

        if not next_video:
            next_topic = topic
            while not next_video:
                next_topic = next_topic.get_next_topic()
                if next_topic:
                    (next_video, next_video_topic
                     ) = next_topic.get_first_video_and_topic()
                else:
                    break

        if video is None:
            raise MissingVideoException("Missing video '%s'" % readable_id)

        if App.offline_mode:
            video_path = "/videos/" + get_mangled_topic_name(
                topic.id) + "/" + video.readable_id + ".flv"
        else:
            video_path = video.download_video_url()

        if video.description == video.title:
            video.description = None

        related_exercises = video.related_exercises()
        button_top_exercise = None
        if related_exercises:

            def ex_to_dict(exercise):
                return {
                    'name': exercise.display_name,
                    'url': exercise.relative_url,
                }

            button_top_exercise = ex_to_dict(related_exercises[0])

        user_video = UserVideo.get_for_video_and_user_data(
            video, UserData.current(), insert_if_missing=True)

        awarded_points = 0
        if user_video:
            awarded_points = user_video.points

        subtitles_key_name = VideoSubtitles.get_key_name(
            'en', video.youtube_id)
        subtitles = VideoSubtitles.get_by_key_name(subtitles_key_name)
        subtitles_json = None
        if subtitles:
            subtitles_json = subtitles.load_json()

        template_values = {
            'topic': topic,
            'video': video,
            'videos': videos,
            'video_path': video_path,
            'video_points_base': consts.VIDEO_POINTS_BASE,
            'subtitles_json': subtitles_json,
            'button_top_exercise': button_top_exercise,
            'related_exercises': [],  # disabled for now
            'previous_topic': previous_topic,
            'previous_video': previous_video,
            'previous_video_topic': previous_video_topic,
            'next_topic': next_topic,
            'next_video': next_video,
            'next_video_topic': next_video_topic,
            'selected_nav_link': 'watch',
            'awarded_points': awarded_points,
            'issue_labels': ('Component-Videos,Video-%s' % readable_id),
            'author_profile': 'https://plus.google.com/103970106103092409324'
        }
        template_values = qa.add_template_values(template_values, self.request)

        bingo([
            'struggling_videos_landing',
            'suggested_activity_videos_landing',
            'suggested_activity_videos_landing_binary',
        ])
        self.render_jinja2_template('viewvideo.html', template_values)
Exemple #8
0
    def get(self, readable_id=""):

        # This method displays a video in the context of a particular topic.
        # To do that we first need to find the appropriate topic.  If we aren't
        # given the topic title in a query param, we need to find a topic that
        # the video is a part of.  That requires finding the video, given it readable_id
        # or, to support old URLs, it's youtube_id.
        video = None
        topic = None
        video_id = self.request.get('v')
        topic_id = self.request_string('topic', default="")
        readable_id = urllib.unquote(readable_id).decode("utf-8")
        readable_id = re.sub('-+$', '', readable_id)  # remove any trailing dashes (see issue 1140)

        # If either the readable_id or topic title is missing,
        # redirect to the canonical URL that contains them
        redirect_to_canonical_url = False
        if video_id: # Support for old links
            query = Video.all()
            query.filter('youtube_id =', video_id)
            video = query.get()

            if not video:
                raise MissingVideoException("Missing video w/ youtube id '%s'" % video_id)

            readable_id = video.readable_id
            topic = video.first_topic()

            if not topic:
                raise MissingVideoException("No topic has video w/ youtube id '%s'" % video_id)

            redirect_to_canonical_url = True

        if topic_id is not None and len(topic_id) > 0:
            topic = Topic.get_by_id(topic_id)
            key_id = 0 if not topic else topic.key().id()

        # If a topic_id wasn't specified or the specified topic wasn't found
        # use the first topic for the requested video.
        if topic is None:
            # Get video by readable_id just to get the first topic for the video
            video = Video.get_for_readable_id(readable_id)
            if video is None:
                raise MissingVideoException("Missing video '%s'" % readable_id)

            topic = video.first_topic()
            if not topic:
                raise MissingVideoException("No topic has video '%s'" % readable_id)

            redirect_to_canonical_url = True

        exid = self.request_string('exid', default=None)

        if redirect_to_canonical_url:
            qs = {'topic': topic.id}
            if exid:
                qs['exid'] = exid

            urlpath = "/video/%s" % urllib.quote(readable_id)
            url = urlparse.urlunparse(('', '', urlpath, '', urllib.urlencode(qs), ''))
            self.redirect(url, True)
            return

        # If we got here, we have a readable_id and a topic, so we can display
        # the topic and the video in it that has the readable_id.  Note that we don't
        # query the Video entities for one with the requested readable_id because in some
        # cases there are multiple Video objects in the datastore with the same readable_id
        # (e.g. there are 2 "Order of Operations" videos).
        videos = Topic.get_cached_videos_for_topic(topic)
        previous_video = None
        next_video = None
        for v in videos:
            if v.readable_id == readable_id:
                v.selected = 'selected'
                video = v
            elif video is None:
                previous_video = v
            else:
                next_video = v
                break

        # If we're at the beginning or end of a topic, show the adjacent topic.
        # previous_topic/next_topic are the topic to display.
        # previous_video_topic/next_video_topic are the subtopics the videos
        # are actually in.
        previous_topic = None
        previous_video_topic = None
        next_topic = None
        next_video_topic = None

        if not previous_video:
            previous_topic = topic
            while not previous_video:
                previous_topic = previous_topic.get_previous_topic()
                if previous_topic:
                    (previous_video, previous_video_topic) = previous_topic.get_last_video_and_topic()
                else:
                    break

        if not next_video:
            next_topic = topic
            while not next_video:
                next_topic = next_topic.get_next_topic()
                if next_topic:
                    (next_video, next_video_topic) = next_topic.get_first_video_and_topic()
                else:
                    break

        if video is None:
            raise MissingVideoException("Missing video '%s'" % readable_id)

        if App.offline_mode:
            video_path = "/videos/" + get_mangled_topic_name(topic.id) + "/" + video.readable_id + ".flv"
        else:
            video_path = video.download_video_url()

        if video.description == video.title:
            video.description = None

        related_exercises = video.related_exercises()
        button_top_exercise = None
        if related_exercises:
            def ex_to_dict(exercise):
                return {
                    'name': exercise.display_name,
                    'url': exercise.relative_url,
                }
            button_top_exercise = ex_to_dict(related_exercises[0])

        user_video = UserVideo.get_for_video_and_user_data(video, UserData.current(), insert_if_missing=True)

        awarded_points = 0
        if user_video:
            awarded_points = user_video.points

        subtitles_key_name = VideoSubtitles.get_key_name('en', video.youtube_id)
        subtitles = VideoSubtitles.get_by_key_name(subtitles_key_name)
        subtitles_json = None
        if subtitles:
            subtitles_json = subtitles.load_json()

        template_values = {
                            'topic': topic,
                            'video': video,
                            'videos': videos,
                            'video_path': video_path,
                            'video_points_base': consts.VIDEO_POINTS_BASE,
                            'subtitles_json': subtitles_json,
                            'button_top_exercise': button_top_exercise,
                            'related_exercises': [], # disabled for now
                            'previous_topic': previous_topic,
                            'previous_video': previous_video,
                            'previous_video_topic': previous_video_topic,
                            'next_topic': next_topic,
                            'next_video': next_video,
                            'next_video_topic': next_video_topic,
                            'selected_nav_link': 'watch',
                            'awarded_points': awarded_points,
                            'issue_labels': ('Component-Videos,Video-%s' % readable_id),
                            'author_profile': 'https://plus.google.com/103970106103092409324'
                        }
        template_values = qa.add_template_values(template_values, self.request)

        bingo([
            'struggling_videos_landing',
            'suggested_activity_videos_landing',
            'suggested_activity_videos_landing_binary',
        ])
        self.render_jinja2_template('viewvideo.html', template_values)
Exemple #9
0
 def test_process_first_batch_on_empty_cursor(self):
     self._set_responses_xrange(BATCH_SIZE)
     _task_handler('UUID')
     self.assertEqual(VideoSubtitles.all().count(), BATCH_SIZE)