def handle(self, *args, **options): if not settings.CENTRAL_SERVER: raise CommandError("This must only be run on the central server.") # Set up the refresh date if not options["date_since_attempt"]: date_since_attempt = datetime.datetime.now() - datetime.timedelta( days=options["days_since_attempt"]) options["date_since_attempt"] = date_since_attempt.strftime( "%m/%d/%Y") converted_date = convert_date_input(options.get("date_since_attempt")) updated_mappings = create_all_mappings( force=options.get("force"), frequency_to_save=5, response_to_check=options.get("response_code"), date_to_check=converted_date) logging.info( "Executed successfully. Updating language => subtitle mapping to record any changes!" ) if updated_mappings: language_srt_map = update_language_srt_map() print_language_availability_table(language_srt_map) logging.info("Process complete.")
def handle(self, *args, **options): if not settings.CENTRAL_SERVER: raise CommandError("This must only be run on the central server.") # Set up the refresh date if not options["date_since_attempt"]: date_since_attempt = datetime.datetime.now() - datetime.timedelta(days=options["days_since_attempt"]) options["date_since_attempt"] = date_since_attempt.strftime("%m/%d/%Y") converted_date = convert_date_input(options.get("date_since_attempt")) updated_mappings = create_all_mappings(force=options.get("force"), frequency_to_save=5, response_to_check=options.get("response_code"), date_to_check=converted_date) logging.info("Executed successfully. Updating language => subtitle mapping to record any changes!") if updated_mappings: language_srt_map = update_language_srt_map() print_language_availability_table(language_srt_map) logging.info("Process complete.")
def download_if_criteria_met(videos, lang_code, force, response_code, date_since_attempt, frequency_to_save, *args, **kwargs): """Execute download of subtitle if it meets the criteria specified by the command line args Note: videos are a dict; keys=youtube_id, values=data Note: lang_code is in IETF format. """ date_specified = convert_date_input(date_since_attempt) # Filter up front, for efficiency (& reporting's sake) n_videos = len(videos) logging.info( "There are (up to) %s total videos with subtitles for language '%s'. Let's go get them!" % ( n_videos, lang_code, )) # Filter based on response code if response_code and response_code != "all": logging.info("Filtering based on response code (%s)..." % response_code) response_code_filter = partial( lambda vid, rcode: rcode == vid["api_response"], rcode=response_code) videos = dict([(k, v) for k, v in videos.iteritems() if response_code_filter(v)]) logging.info( "%4d of %4d videos match your specified response code (%s)" % ( len(videos), n_videos, response_code, )) if date_specified: logging.info("Filtering based on date...") for k in videos.keys(): if not videos[k]["last_attempt"]: continue elif datetime.datetime.strptime(videos[k]["last_attempt"], '%Y-%m-%d') < date_specified: continue elif False: # TODO(bcipolli): check output filename exists, as per # 1359 continue else: del videos[k] logging.info( "%4d of %4d videos need refreshing (last refresh more recent than %s)" % ( len(videos), n_videos, date_specified, )) # Loop over videos needing refreshing n_loops = 0 srt_count = None for youtube_id, entry in videos.items(): previously_downloaded = entry.get("downloaded") if previously_downloaded and not force: logging.info( "Already downloaded %s/%s. To redownload, run again with -f." % ( lang_code, youtube_id, )) continue logging.debug( "Attempting to download subtitle for lang: %s and YouTube ID: %s" % ( lang_code, youtube_id, )) response = download_subtitle(youtube_id, lang_code, format="srt") time_of_attempt = unicode(datetime.datetime.now().date()) if response in ["client-error", "server-error", "unexpected_error"]: # Couldn't download logging.info( "%s/%s.srt: Updating JSON file to record error (%s)." % ( lang_code, youtube_id, response, )) update_json(youtube_id, lang_code, previously_downloaded, response, time_of_attempt) else: dirpath = get_srt_path(lang_code) fullpath = os.path.join(dirpath, youtube_id + ".srt") ensure_dir(dirpath) logging.debug("Writing file to %s" % fullpath) with open(fullpath, 'w') as fp: fp.write(response.encode('UTF-8')) logging.info("%s/%s.srt: Updating JSON file to record success." % ( lang_code, youtube_id, )) update_json(youtube_id, lang_code, True, "success", time_of_attempt) # Update srt availability mapping n_loops += 1 if n_loops % frequency_to_save == 0 or n_loops == len(videos.keys()): srt_count = store_new_counts(lang_code=lang_code) logging.info("%s: On loop %d / %d, stored: subtitle count = %d." % ( lang_code, n_loops, len(videos), srt_count, )) # Summarize output if srt_count is None: # only none if nothing was done. logging.info("Nothing was done.") else: logging.info( "We now have %d subtitles (amara thought they had %d) for language '%s'!" % ( srt_count, n_videos, lang_code, ))
def download_if_criteria_met(videos, lang_code, force, response_code, date_since_attempt, frequency_to_save, *args, **kwargs): """Execute download of subtitle if it meets the criteria specified by the command line args Note: videos are a dict; keys=youtube_id, values=data Note: lang_code is in IETF format. """ date_specified = convert_date_input(date_since_attempt) # Filter up front, for efficiency (& reporting's sake) n_videos = len(videos) logging.info("There are (up to) %s total videos with subtitles for language '%s'. Let's go get them!" % ( n_videos, lang_code, )) # Filter based on response code if response_code and response_code != "all": logging.info("Filtering based on response code (%s)..." % response_code) response_code_filter = partial( lambda vid, rcode: rcode == vid["api_response"], rcode=response_code) videos = dict([(k, v) for k, v in videos.iteritems() if response_code_filter(v)]) logging.info("%4d of %4d videos match your specified response code (%s)" % ( len(videos), n_videos, response_code, )) if date_specified: logging.info("Filtering based on date...") for k in videos.keys(): if not videos[k]["last_attempt"]: continue elif datetime.datetime.strptime(videos[k]["last_attempt"], '%Y-%m-%d') < date_specified: continue elif False: # TODO(bcipolli): check output filename exists, as per # 1359 continue else: del videos[k] logging.info("%4d of %4d videos need refreshing (last refresh more recent than %s)" % ( len(videos), n_videos, date_specified, )) # Loop over videos needing refreshing n_loops = 0 srt_count = None for youtube_id, entry in videos.items(): previously_downloaded = entry.get("downloaded") if previously_downloaded and not force: logging.info("Already downloaded %s/%s. To redownload, run again with -f." % ( lang_code, youtube_id, )) continue logging.debug("Attempting to download subtitle for lang: %s and YouTube ID: %s" % ( lang_code, youtube_id, )) response = download_subtitle(youtube_id, lang_code, format="srt") time_of_attempt = unicode(datetime.datetime.now().date()) if response in ["client-error", "server-error", "unexpected_error"]: # Couldn't download logging.info("%s/%s.srt: Updating JSON file to record error (%s)." % ( lang_code, youtube_id, response, )) update_json(youtube_id, lang_code, previously_downloaded, response, time_of_attempt) else: dirpath = get_srt_path(lang_code) fullpath = os.path.join(dirpath, youtube_id + ".srt") ensure_dir(dirpath) logging.debug("Writing file to %s" % fullpath) with open(fullpath, 'w') as fp: fp.write(response.encode('UTF-8')) logging.info("%s/%s.srt: Updating JSON file to record success." % ( lang_code, youtube_id, )) update_json(youtube_id, lang_code, True, "success", time_of_attempt) # Update srt availability mapping n_loops += 1 if n_loops % frequency_to_save == 0 or n_loops == len(videos.keys()): srt_count = store_new_counts(lang_code=lang_code) logging.info("%s: On loop %d / %d, stored: subtitle count = %d." % ( lang_code, n_loops, len(videos), srt_count, )) # Summarize output if srt_count is None: # only none if nothing was done. logging.info("Nothing was done.") else: logging.info("We now have %d subtitles (amara thought they had %d) for language '%s'!" % ( srt_count, n_videos, lang_code, ))