def _task_report_handler(uid, report): """Report the results of a task chain Once scheduled, this task will retry until it succeeds. """ report['status'] = 'finished' VideoSubtitlesFetchReport(key_name=uid, **report).put()
def get(self): """Display reports from recent imports""" limit = self.request_int('limit', 25) started = self.request_string('_started') query = VideoSubtitlesFetchReport().all() query.order('-created') reports = query.fetch(limit + 1) truncated = False if len(reports) > limit: truncated = True reports.pop() self.render_jinja2_template('unisubs_report.html', {'reports': reports, 'truncated': truncated, 'started': started})
def get(self): """Display reports from recent imports""" limit = self.request_int('limit', 25) started = self.request_string('_started') query = VideoSubtitlesFetchReport().all() query.order('-created') reports = query.fetch(limit + 1) truncated = False if len(reports) > limit: truncated = True reports.pop() self.render_jinja2_template('unisubs_report.html', { 'reports': reports, 'truncated': truncated, 'started': started })
def _task_handler(uid, task_id=0, cursor=None, report=None): """Task chain for fetching subtitles from the Universal Subtitles API It processes Video models in batches of BATCH_SIZE by fetching the English subtitles via an HTTP API call. This job runs regularly so fetch failures are fixed from run-to-run. Fetch failures are logged and suppressed as the task marches on. Errors include URL fetch timeouts, subtitles put failures, and response decoding failures. HTTP redirects indicate that the code needs updating to a new API endpoint. They are detected and reported separately. """ query = Video.all() query.with_cursor(cursor) videos = query.fetch(BATCH_SIZE) if report is None: report = dict(REPORT_TEMPLATE) VideoSubtitlesFetchReport(key_name=uid, **report).put() # Asynchronously fetch. We'll rate-limit by fetching BATCH_SIZE subtitles # at each DEFER_SECONDS interval rpcs = [] for video in videos: url = UNISUBS_URL % urllib.quote(YOUTUBE_URL % video.youtube_id) rpc = urlfetch.create_rpc(deadline=TIMEOUT_SECONDS) urlfetch.make_fetch_call(rpc, url) rpcs.append((video.youtube_id, rpc)) report['fetches'] += 1 # Process asynchronous fetches for youtube_id, rpc in rpcs: lang = 'en' key_name = VideoSubtitles.get_key_name(lang, youtube_id) try: resp = rpc.get_result() if resp.status_code != 200: raise RuntimeError('status code: %s' % resp.status_code) if resp.final_url: logging.warn('%s redirect to %s' % (key_name, resp.final_url)) report['redirects'] += 1 json = resp.content.decode('utf-8') # Only update stale records current = VideoSubtitles.get_by_key_name(key_name) if not current or current.json != json: new = VideoSubtitles(key_name=key_name, youtube_id=youtube_id, language=lang, json=json) new.put() report['writes'] += 1 else: logging.info('%s content already up-to-date' % key_name) except Exception, e: logging.error('%s subtitles fetch failed: %s' % (key_name, e)) report['errors'] += 1
def test_report_started_immediately(self): _task_handler('UUID') self.assertEqual(VideoSubtitlesFetchReport.all().count(), 1)