def test_derive_key_name_from_video(self): self._set_responses_xrange(BATCH_SIZE) _task_handler('UUID') videos = Video.all().fetch(BATCH_SIZE) for v in videos: key = VideoSubtitles.get_key_name('en', v.youtube_id) subs = VideoSubtitles.get_by_key_name(key) self.assertIsNotNone(subs)
def test_assume_utf8_encoded_content(self): # Universal Subtitles API returns utf-8 # u'\xc4\xd0' is unicode for the utf-8 byte string '\xc3\x84\xc3\x90' utf8_str = '\xc3\x84\xc3\x90' unicode_str = u'\xc4\xd0' self._set_responses_xrange(1, content=utf8_str) _task_handler('UUID') self.assertEqual(VideoSubtitles.all().count(), 1) subs = VideoSubtitles.all().get() self.assertEqual(subs.json, unicode_str)
def test_process_next_batch_on_nonempty_cursor(self): offset = 3 # these should be skipped, they'll DownloadError for i in xrange(0, offset): Video(youtube_id=str(i)).put() # these should be downloaded self._set_responses_xrange(offset, BATCH_SIZE + offset) query = Video.all() query.fetch(offset) cursor = query.cursor() _task_handler('UUID', cursor=cursor) self.assertEqual(VideoSubtitles.all().count(), BATCH_SIZE)
def test_should_not_put_duplicate_subtitles(self, info): self._set_responses_xrange(BATCH_SIZE, content="some json") # first fetch _task_handler('UUID', 0) self.assertEqual(VideoSubtitles.all().count(), BATCH_SIZE) self.assertEqual(info.call_count, 0) with patch('unisubs.VideoSubtitles') as MockVideoSubtitles: MockVideoSubtitles.get_key_name = VideoSubtitles.get_key_name MockVideoSubtitles.get_by_key_name = VideoSubtitles.get_by_key_name # second fetch, same content _task_handler('UUID', 1) self.assertEqual(MockVideoSubtitles.return_value.put.call_count, 0, 'duplicate subtitles should not be put()') self.assertEqual(info.call_count, BATCH_SIZE, 'skipped put should be logged')
def _task_handler(uid, task_id=0, cursor=None, report=None): """Task chain for fetching subtitles from the Universal Subtitles API It processes Video models in batches of BATCH_SIZE by fetching the English subtitles via an HTTP API call. This job runs regularly so fetch failures are fixed from run-to-run. Fetch failures are logged and suppressed as the task marches on. Errors include URL fetch timeouts, subtitles put failures, and response decoding failures. HTTP redirects indicate that the code needs updating to a new API endpoint. They are detected and reported separately. """ query = Video.all() query.with_cursor(cursor) videos = query.fetch(BATCH_SIZE) if report is None: report = dict(REPORT_TEMPLATE) VideoSubtitlesFetchReport(key_name=uid, **report).put() # Asynchronously fetch. We'll rate-limit by fetching BATCH_SIZE subtitles # at each DEFER_SECONDS interval rpcs = [] for video in videos: url = UNISUBS_URL % urllib.quote(YOUTUBE_URL % video.youtube_id) rpc = urlfetch.create_rpc(deadline=TIMEOUT_SECONDS) urlfetch.make_fetch_call(rpc, url) rpcs.append((video.youtube_id, rpc)) report['fetches'] += 1 # Process asynchronous fetches for youtube_id, rpc in rpcs: lang = 'en' key_name = VideoSubtitles.get_key_name(lang, youtube_id) try: resp = rpc.get_result() if resp.status_code != 200: raise RuntimeError('status code: %s' % resp.status_code) if resp.final_url: logging.warn('%s redirect to %s' % (key_name, resp.final_url)) report['redirects'] += 1 json = resp.content.decode('utf-8') # Only update stale records current = VideoSubtitles.get_by_key_name(key_name) if not current or current.json != json: new = VideoSubtitles(key_name=key_name, youtube_id=youtube_id, language=lang, json=json) new.put() report['writes'] += 1 else: logging.info('%s content already up-to-date' % key_name) except Exception, e: logging.error('%s subtitles fetch failed: %s' % (key_name, e)) report['errors'] += 1
def get(self, readable_id=""): # This method displays a video in the context of a particular topic. # To do that we first need to find the appropriate topic. If we aren't # given the topic title in a query param, we need to find a topic that # the video is a part of. That requires finding the video, given it readable_id # or, to support old URLs, it's youtube_id. video = None topic = None video_id = self.request.get('v') topic_id = self.request_string('topic', default="") readable_id = urllib.unquote(readable_id).decode("utf-8") readable_id = re.sub( '-+$', '', readable_id) # remove any trailing dashes (see issue 1140) # If either the readable_id or topic title is missing, # redirect to the canonical URL that contains them redirect_to_canonical_url = False if video_id: # Support for old links query = Video.all() query.filter('youtube_id =', video_id) video = query.get() if not video: raise MissingVideoException( "Missing video w/ youtube id '%s'" % video_id) readable_id = video.readable_id topic = video.first_topic() if not topic: raise MissingVideoException( "No topic has video w/ youtube id '%s'" % video_id) redirect_to_canonical_url = True if topic_id is not None and len(topic_id) > 0: topic = Topic.get_by_id(topic_id) key_id = 0 if not topic else topic.key().id() # If a topic_id wasn't specified or the specified topic wasn't found # use the first topic for the requested video. if topic is None: # Get video by readable_id just to get the first topic for the video video = Video.get_for_readable_id(readable_id) if video is None: raise MissingVideoException("Missing video '%s'" % readable_id) topic = video.first_topic() if not topic: raise MissingVideoException("No topic has video '%s'" % readable_id) redirect_to_canonical_url = True exid = self.request_string('exid', default=None) if redirect_to_canonical_url: qs = {'topic': topic.id} if exid: qs['exid'] = exid urlpath = "/video/%s" % urllib.quote(readable_id) url = urlparse.urlunparse( ('', '', urlpath, '', urllib.urlencode(qs), '')) self.redirect(url, True) return # If we got here, we have a readable_id and a topic, so we can display # the topic and the video in it that has the readable_id. Note that we don't # query the Video entities for one with the requested readable_id because in some # cases there are multiple Video objects in the datastore with the same readable_id # (e.g. there are 2 "Order of Operations" videos). videos = Topic.get_cached_videos_for_topic(topic) previous_video = None next_video = None for v in videos: if v.readable_id == readable_id: v.selected = 'selected' video = v elif video is None: previous_video = v else: next_video = v break # If we're at the beginning or end of a topic, show the adjacent topic. # previous_topic/next_topic are the topic to display. # previous_video_topic/next_video_topic are the subtopics the videos # are actually in. previous_topic = None previous_video_topic = None next_topic = None next_video_topic = None if not previous_video: previous_topic = topic while not previous_video: previous_topic = previous_topic.get_previous_topic() if previous_topic: (previous_video, previous_video_topic ) = previous_topic.get_last_video_and_topic() else: break if not next_video: next_topic = topic while not next_video: next_topic = next_topic.get_next_topic() if next_topic: (next_video, next_video_topic ) = next_topic.get_first_video_and_topic() else: break if video is None: raise MissingVideoException("Missing video '%s'" % readable_id) if App.offline_mode: video_path = "/videos/" + get_mangled_topic_name( topic.id) + "/" + video.readable_id + ".flv" else: video_path = video.download_video_url() if video.description == video.title: video.description = None related_exercises = video.related_exercises() button_top_exercise = None if related_exercises: def ex_to_dict(exercise): return { 'name': exercise.display_name, 'url': exercise.relative_url, } button_top_exercise = ex_to_dict(related_exercises[0]) user_video = UserVideo.get_for_video_and_user_data( video, UserData.current(), insert_if_missing=True) awarded_points = 0 if user_video: awarded_points = user_video.points subtitles_key_name = VideoSubtitles.get_key_name( 'en', video.youtube_id) subtitles = VideoSubtitles.get_by_key_name(subtitles_key_name) subtitles_json = None if subtitles: subtitles_json = subtitles.load_json() template_values = { 'topic': topic, 'video': video, 'videos': videos, 'video_path': video_path, 'video_points_base': consts.VIDEO_POINTS_BASE, 'subtitles_json': subtitles_json, 'button_top_exercise': button_top_exercise, 'related_exercises': [], # disabled for now 'previous_topic': previous_topic, 'previous_video': previous_video, 'previous_video_topic': previous_video_topic, 'next_topic': next_topic, 'next_video': next_video, 'next_video_topic': next_video_topic, 'selected_nav_link': 'watch', 'awarded_points': awarded_points, 'issue_labels': ('Component-Videos,Video-%s' % readable_id), 'author_profile': 'https://plus.google.com/103970106103092409324' } template_values = qa.add_template_values(template_values, self.request) bingo([ 'struggling_videos_landing', 'suggested_activity_videos_landing', 'suggested_activity_videos_landing_binary', ]) self.render_jinja2_template('viewvideo.html', template_values)
def get(self, readable_id=""): # This method displays a video in the context of a particular topic. # To do that we first need to find the appropriate topic. If we aren't # given the topic title in a query param, we need to find a topic that # the video is a part of. That requires finding the video, given it readable_id # or, to support old URLs, it's youtube_id. video = None topic = None video_id = self.request.get('v') topic_id = self.request_string('topic', default="") readable_id = urllib.unquote(readable_id).decode("utf-8") readable_id = re.sub('-+$', '', readable_id) # remove any trailing dashes (see issue 1140) # If either the readable_id or topic title is missing, # redirect to the canonical URL that contains them redirect_to_canonical_url = False if video_id: # Support for old links query = Video.all() query.filter('youtube_id =', video_id) video = query.get() if not video: raise MissingVideoException("Missing video w/ youtube id '%s'" % video_id) readable_id = video.readable_id topic = video.first_topic() if not topic: raise MissingVideoException("No topic has video w/ youtube id '%s'" % video_id) redirect_to_canonical_url = True if topic_id is not None and len(topic_id) > 0: topic = Topic.get_by_id(topic_id) key_id = 0 if not topic else topic.key().id() # If a topic_id wasn't specified or the specified topic wasn't found # use the first topic for the requested video. if topic is None: # Get video by readable_id just to get the first topic for the video video = Video.get_for_readable_id(readable_id) if video is None: raise MissingVideoException("Missing video '%s'" % readable_id) topic = video.first_topic() if not topic: raise MissingVideoException("No topic has video '%s'" % readable_id) redirect_to_canonical_url = True exid = self.request_string('exid', default=None) if redirect_to_canonical_url: qs = {'topic': topic.id} if exid: qs['exid'] = exid urlpath = "/video/%s" % urllib.quote(readable_id) url = urlparse.urlunparse(('', '', urlpath, '', urllib.urlencode(qs), '')) self.redirect(url, True) return # If we got here, we have a readable_id and a topic, so we can display # the topic and the video in it that has the readable_id. Note that we don't # query the Video entities for one with the requested readable_id because in some # cases there are multiple Video objects in the datastore with the same readable_id # (e.g. there are 2 "Order of Operations" videos). videos = Topic.get_cached_videos_for_topic(topic) previous_video = None next_video = None for v in videos: if v.readable_id == readable_id: v.selected = 'selected' video = v elif video is None: previous_video = v else: next_video = v break # If we're at the beginning or end of a topic, show the adjacent topic. # previous_topic/next_topic are the topic to display. # previous_video_topic/next_video_topic are the subtopics the videos # are actually in. previous_topic = None previous_video_topic = None next_topic = None next_video_topic = None if not previous_video: previous_topic = topic while not previous_video: previous_topic = previous_topic.get_previous_topic() if previous_topic: (previous_video, previous_video_topic) = previous_topic.get_last_video_and_topic() else: break if not next_video: next_topic = topic while not next_video: next_topic = next_topic.get_next_topic() if next_topic: (next_video, next_video_topic) = next_topic.get_first_video_and_topic() else: break if video is None: raise MissingVideoException("Missing video '%s'" % readable_id) if App.offline_mode: video_path = "/videos/" + get_mangled_topic_name(topic.id) + "/" + video.readable_id + ".flv" else: video_path = video.download_video_url() if video.description == video.title: video.description = None related_exercises = video.related_exercises() button_top_exercise = None if related_exercises: def ex_to_dict(exercise): return { 'name': exercise.display_name, 'url': exercise.relative_url, } button_top_exercise = ex_to_dict(related_exercises[0]) user_video = UserVideo.get_for_video_and_user_data(video, UserData.current(), insert_if_missing=True) awarded_points = 0 if user_video: awarded_points = user_video.points subtitles_key_name = VideoSubtitles.get_key_name('en', video.youtube_id) subtitles = VideoSubtitles.get_by_key_name(subtitles_key_name) subtitles_json = None if subtitles: subtitles_json = subtitles.load_json() template_values = { 'topic': topic, 'video': video, 'videos': videos, 'video_path': video_path, 'video_points_base': consts.VIDEO_POINTS_BASE, 'subtitles_json': subtitles_json, 'button_top_exercise': button_top_exercise, 'related_exercises': [], # disabled for now 'previous_topic': previous_topic, 'previous_video': previous_video, 'previous_video_topic': previous_video_topic, 'next_topic': next_topic, 'next_video': next_video, 'next_video_topic': next_video_topic, 'selected_nav_link': 'watch', 'awarded_points': awarded_points, 'issue_labels': ('Component-Videos,Video-%s' % readable_id), 'author_profile': 'https://plus.google.com/103970106103092409324' } template_values = qa.add_template_values(template_values, self.request) bingo([ 'struggling_videos_landing', 'suggested_activity_videos_landing', 'suggested_activity_videos_landing_binary', ]) self.render_jinja2_template('viewvideo.html', template_values)
def test_process_first_batch_on_empty_cursor(self): self._set_responses_xrange(BATCH_SIZE) _task_handler('UUID') self.assertEqual(VideoSubtitles.all().count(), BATCH_SIZE)