def update_json(youtube_id, lang_code, downloaded, api_response, time_of_attempt): """Update language_srt_map to reflect download status lang_code in IETF format """ # Open JSON file filepath = get_lang_map_filepath(lang_code) language_srt_map = softload_json(filepath, logger=logging.error) if not language_srt_map: return False # create updated entry entry = language_srt_map[youtube_id] entry["downloaded"] = downloaded entry["api_response"] = api_response entry["last_attempt"] = time_of_attempt if api_response == "success": entry["last_success"] = time_of_attempt # update full-size JSON with new information language_srt_map[youtube_id].update(entry) # write it to file json_file = open(filepath, "wb") json_file.write(json.dumps(language_srt_map)) json_file.close() logging.debug("File updated.") return True
def clear_subtitles_cache(lang_codes=None, locale_root=LOCALE_ROOT): """ Language codes will be converted to django format (e.g. en_US) """ lang_codes = lang_codes or get_langs_with_subtitles() for lang_code in lang_codes: lang_code = lcode_to_ietf(lang_code) # Clear the status file lm_file = get_lang_map_filepath(lang_code) download_status = softload_json(lm_file, raises=True) for key in download_status: download_status[key] = {u'downloaded': False, u'last_success': u'', u'last_attempt': u'', u'api_response': u''} with open(lm_file, "w") as fp: json.dump(download_status, fp) # Delete all srt files srt_path = get_srt_path(lang_code) if os.path.exists(srt_path): shutil.rmtree(srt_path)
def download_srt_from_3rd_party(lang_codes=None, **kwargs): """Download subtitles specified by command line args""" lang_codes = lang_codes or get_all_prepped_lang_codes() bad_languages = {} for lang_code in lang_codes: lang_code = lcode_to_ietf(lang_code) lang_code = get_supported_language_map(lang_code)['amara'] try: lang_map_filepath = get_lang_map_filepath(lang_code) if not os.path.exists(lang_map_filepath): videos = {} # happens if an unknown set for subtitles. else: with open(lang_map_filepath, "r") as fp: videos = json.load(fp) except Exception as e: error_msg = "Error in subtitles metadata file for %s: %s" % (lang_code, e) logging.error(error_msg) bad_languages[lang_code] = error_msg continue try: download_if_criteria_met(videos, lang_code=lang_code, **kwargs) except Exception as e: error_msg = "Error downloading subtitles for %s: %s" % (lang_code, e) logging.error(error_msg) bad_languages[lang_code] = error_msg continue # now report final results if bad_languages: outstr = "Failed to download subtitles for the following languages: %s" % (bad_languages.keys()) outstr += "\n" + str(bad_languages) logging.error(outstr)
class Command(BaseCommand): help = "Update the mapping of subtitles available by language for each video. Location: %s" % ( get_lang_map_filepath("<lang_code>"), ) option_list = BaseCommand.option_list + ( # Basic options make_option( '-f', '--force', action='store_true', dest='force', default=False, help= "Force a new mapping. Cannot be run with other options. Fetches new data for every one of our videos and overwrites current data with fresh data from Amara. Should really only ever be run once, because data can be updated from then on with '-s all'.", metavar="FORCE"), make_option( '-r', '--response-code', action='store', dest='response_code', default=None, help= "Which api-response code to recheck. Can be combined with -d. USAGE: '-r all', '-r client-error', or '-r server-error' (default: None (only download new video info)).", metavar="RESPONSE_CODE"), make_option( '-d', '--date-since-attempt', action='store', dest='date_since_attempt', default=None, help= "Setting a date flag will update only those entries which have not been attempted since that date. Can be combined with -r. This could potentially be useful for updating old subtitles. USAGE: '-d MM/DD/YYYY'" ), make_option( '-y', '--days-since-attempt', action='store', dest='days_since_attempt', default=1, help= "Setting # of days since last attempt; will compute date. USAGE: '-y 1'" ), ) def handle(self, *args, **options): if not settings.CENTRAL_SERVER: raise CommandError("This must only be run on the central server.") # Set up the refresh date if not options["date_since_attempt"]: date_since_attempt = datetime.datetime.now() - datetime.timedelta( days=options["days_since_attempt"]) options["date_since_attempt"] = date_since_attempt.strftime( "%m/%d/%Y") converted_date = convert_date_input(options.get("date_since_attempt")) updated_mappings = create_all_mappings( force=options.get("force"), frequency_to_save=5, response_to_check=options.get("response_code"), date_to_check=converted_date) logging.info( "Executed successfully. Updating language => subtitle mapping to record any changes!" ) if updated_mappings: language_srt_map = update_language_srt_map() print_language_availability_table(language_srt_map) logging.info("Process complete.")
def update_language_srt_map(map_file=SRTS_JSON_FILEPATH): """ Translate the srts_remote_availability dictionary into language specific files that can be used by the cache_subtitles command. Note: srt map deals with amara, so uses ietf codes (e.g. en-us) """ # Load the current download status api_info_map = softload_json(map_file, logger=logging.warn) # Next we want to iterate through those and create a big srt dictionary organized by language code remote_availability_map = {} for youtube_id, data in api_info_map.items(): languages = data.get("language_codes", []) for lang_code in languages: lang_code = lcode_to_ietf(lang_code) if not lang_code in remote_availability_map: #logging.info("Creating language section '%s'" % lang_code) remote_availability_map[lang_code] = {} # This entry will be valid if it's new, otherwise it will be overwitten later remote_availability_map[lang_code][youtube_id] = { "downloaded": False, "api_response": "", "last_attempt": "", "last_success": "", } # Finally we need to iterate through that dictionary and create individual files for each language code for lang_code, new_data in remote_availability_map.items(): # Try to open previous language file lang_map_filepath = get_lang_map_filepath(lang_code) if not os.path.exists(lang_map_filepath): lang_map = {} else: lang_map = softload_json(lang_map_filepath, logger=logging.error) # First, check to see if it's empty (e.g. no subtitles available for any videos) if not new_data: logging.info( "Subtitle support for %s has been terminated; removing." % lang_code) if os.path.exists(lang_map_filepath): os.remove(lang_map_filepath) continue # Compare how many empty entries you are adding and add them to master map old_yt_ids = set(new_data.keys()) new_yt_ids = set(lang_map.keys()) yt_ids_to_add = set(new_data.keys()) - set(lang_map.keys()) yt_ids_to_delete = set(lang_map.keys()) - set(new_data.keys()) if yt_ids_to_add: logging.info("Adding %d new YouTube IDs to language (%s)" % (len(yt_ids_to_add), lang_code)) for yt_id in yt_ids_to_add: lang_map[yt_id] = new_data.get(yt_id) if yt_ids_to_delete: logging.info( "Deleting %d old YouTube IDs from language (%s) because they are no longer supported." % (len(yt_ids_to_delete), lang_code)) for yt_id in yt_ids_to_delete: lang_map.pop(yt_id, None) # Write the new file to the correct location logging.debug("Writing %s" % lang_map_filepath) ensure_dir(os.path.dirname(lang_map_filepath)) with open(lang_map_filepath, 'w') as outfile: json.dump(lang_map, outfile) # Update the big mapping with the most accurate numbers remote_availability_map[lang_code].update(lang_map) # Finally, remove any files not found in the current map at all. if lang_map_filepath: for filename in os.listdir(os.path.dirname(lang_map_filepath)): lang_code = lang_code = filename.split("_")[0] if not lang_code in remote_availability_map: file_to_remove = get_lang_map_filepath(lang_code) logging.info( "Subtitle support for %s has been terminated; removing." % lang_code) if os.path.exists(file_to_remove): os.remove(file_to_remove) else: logging.warn( "Subtitles metadata for %s not found; skipping deletion of non-existent file %s." % (lang_code, file_to_remove)) return remote_availability_map
def update_language_srt_map(map_file=SRTS_JSON_FILEPATH): """ Translate the srts_remote_availability dictionary into language specific files that can be used by the cache_subtitles command. Note: srt map deals with amara, so uses ietf codes (e.g. en-us) """ # Load the current download status api_info_map = softload_json(map_file, logger=logging.warn) # Next we want to iterate through those and create a big srt dictionary organized by language code remote_availability_map = {} for youtube_id, data in api_info_map.items(): languages = data.get("language_codes", []) for lang_code in languages: lang_code = lcode_to_ietf(lang_code) if not lang_code in remote_availability_map: # logging.info("Creating language section '%s'" % lang_code) remote_availability_map[lang_code] = {} # This entry will be valid if it's new, otherwise it will be overwitten later remote_availability_map[lang_code][youtube_id] = { "downloaded": False, "api_response": "", "last_attempt": "", "last_success": "", } # Finally we need to iterate through that dictionary and create individual files for each language code for lang_code, new_data in remote_availability_map.items(): # Try to open previous language file lang_map_filepath = get_lang_map_filepath(lang_code) if not os.path.exists(lang_map_filepath): lang_map = {} else: lang_map = softload_json(lang_map_filepath, logger=logging.error) # First, check to see if it's empty (e.g. no subtitles available for any videos) if not new_data: logging.info("Subtitle support for %s has been terminated; removing." % lang_code) if os.path.exists(lang_map_filepath): os.remove(lang_map_filepath) continue # Compare how many empty entries you are adding and add them to master map old_yt_ids = set(new_data.keys()) new_yt_ids = set(lang_map.keys()) yt_ids_to_add = set(new_data.keys()) - set(lang_map.keys()) yt_ids_to_delete = set(lang_map.keys()) - set(new_data.keys()) if yt_ids_to_add: logging.info("Adding %d new YouTube IDs to language (%s)" % (len(yt_ids_to_add), lang_code)) for yt_id in yt_ids_to_add: lang_map[yt_id] = new_data.get(yt_id) if yt_ids_to_delete: logging.info( "Deleting %d old YouTube IDs from language (%s) because they are no longer supported." % (len(yt_ids_to_delete), lang_code) ) for yt_id in yt_ids_to_delete: lang_map.pop(yt_id, None) # Write the new file to the correct location logging.debug("Writing %s" % lang_map_filepath) ensure_dir(os.path.dirname(lang_map_filepath)) with open(lang_map_filepath, "w") as outfile: json.dump(lang_map, outfile) # Update the big mapping with the most accurate numbers remote_availability_map[lang_code].update(lang_map) # Finally, remove any files not found in the current map at all. if lang_map_filepath: for filename in os.listdir(os.path.dirname(lang_map_filepath)): lang_code = lang_code = filename.split("_")[0] if not lang_code in remote_availability_map: file_to_remove = get_lang_map_filepath(lang_code) logging.info("Subtitle support for %s has been terminated; removing." % lang_code) if os.path.exists(file_to_remove): os.remove(file_to_remove) else: logging.warn( "Subtitles metadata for %s not found; skipping deletion of non-existent file %s." % (lang_code, file_to_remove) ) return remote_availability_map