def languages(request): if "default_language" not in request.session: return {} # temporarily skipped middleware, but we'll get back here again. Tricky Django... return { "default_language": lcode_to_ietf(request.session["default_language"]), "language_choices": request.session["language_choices"], "current_language": lcode_to_ietf(request.session["django_language"]), }
def handle(self, *args, **options): if not settings.CENTRAL_SERVER: raise CommandError("This must only be run on the central server.") # None represents all lang_codes = [lcode_to_ietf(options["lang_code"])] if options["lang_code"] else None del options["lang_code"] if len(args) == 0: validate_language_map(lang_codes) logging.info("Downloading...") download_srt_from_3rd_party(lang_codes=lang_codes, **options) validate_language_map(lang_codes) # again at the end, so output is visible elif len(args) > 1: raise CommandError("Max 1 arg") elif args[0] == "clear": logging.info("Clearing subtitles...") clear_subtitles_cache(lang_codes) else: raise CommandError("Unknown argument: %s" % args[0]) logging.info("Process complete.")
def get_all_prepped_lang_codes(): """Pre-prepped language codes, for downloading srts""" lang_codes = [] for filename in get_all_download_status_files(): lang_code = os.path.basename(filename).split("_")[0] lang_codes.append(lcode_to_ietf(lang_code)) return lang_codes
def handle_po_compile_errors(lang_codes=None, out=None, err=None, rc=None): """ Return list of languages to not rezip due to errors in compile process. Then email admins errors. """ broken_codes = re.findall(r'(?<=ka-lite/locale/)\w+(?=/LC_MESSAGES)', err) or [] if lang_codes: # Only show the errors relevant to the list of language codes passed in. lang_codes = set([lcode_to_django_dir(lc) for lc in lang_codes]) broken_codes = list(set(broken_codes).intersection(lang_codes)) if broken_codes: logging.warning("Found %d errors while compiling in codes %s. Mailing admins report now." % (len(broken_codes), ', '.join(broken_codes))) subject = "Error while compiling po files" commands = "\n".join(["python manage.py compilemessages -l %s" % lc for lc in broken_codes]) message = """The following codes had errors when compiling their po files: %s. Please rerun the following commands to see specific line numbers that need to be corrected on CrowdIn, before we can update the language packs. %s""" % ( ', '.join([lcode_to_ietf(lc) for lc in broken_codes]), commands, ) if not settings.DEBUG: mail_admins(subject=subject, message=message) logging.info("Report sent.") else: logging.info("DEBUG is True so not sending email, but would have sent the following: SUBJECT: %s; MESSAGE: %s" % (subject, message)) return broken_codes
def zip_language_packs(lang_codes=None): """Zip up and expose all language packs converts all into ietf """ lang_codes = lang_codes or os.listdir(LOCALE_ROOT) lang_codes = [lcode_to_ietf(lc) for lc in lang_codes] logging.info("Zipping up %d language pack(s)" % len(lang_codes)) for lang_code_ietf in lang_codes: lang_code_django = lcode_to_django_dir(lang_code_ietf) lang_locale_path = os.path.join(LOCALE_ROOT, lang_code_django) if not os.path.exists(lang_locale_path): logging.warn("Unexpectedly skipping missing directory: %s" % lang_code_django) elif not os.path.isdir(lang_locale_path): logging.error("Skipping language where a file exists where a directory was expected: %s" % lang_code_django) # Create a zipfile for this language zip_filepath = get_language_pack_filepath(lang_code_ietf) ensure_dir(os.path.dirname(zip_filepath)) logging.info("Creating zip file in %s" % zip_filepath) z = zipfile.ZipFile(zip_filepath, 'w', zipfile.ZIP_DEFLATED) # Get every single file in the directory and zip it up for metadata_file in glob.glob('%s/*.json' % lang_locale_path): z.write(os.path.join(lang_locale_path, metadata_file), arcname=os.path.basename(metadata_file)) srt_dirpath = get_srt_path(lang_code_django) for srt_file in glob.glob(os.path.join(srt_dirpath, "*.srt")): z.write(srt_file, arcname=os.path.join("subtitles", os.path.basename(srt_file))) z.close() logging.info("Done.")
def download_latest_translations(project_id=settings.CROWDIN_PROJECT_ID, project_key=settings.CROWDIN_PROJECT_KEY, lang_code="all", zip_file=None, combine_with_po_file=None, rebuild=True): """ Download latest translations from CrowdIn to corresponding locale directory. If zip_file is given, use that as the zip file instead of going through CrowdIn. """ lang_code = lcode_to_ietf(lang_code) # Get zip file of translations if zip_file and os.path.exists(zip_file): logging.info("Using local zip file at %s" % zip_file) z = zipfile.ZipFile(zip_file) # use the name of the zip file to infer the language code, if needed lang_code = lang_code or os.path.splitext(os.path.basename(zip_file))[0] else: # Tell CrowdIn to Build latest package if rebuild: build_translations() logging.info("Attempting to download a zip archive of current translations") request_url = "http://api.crowdin.net/api/project/%s/download/%s.zip?key=%s" % (project_id, lang_code, project_key) try: resp = requests.get(request_url) resp.raise_for_status() except Exception as e: if resp.status_code == 404: logging.info("No translations found for language %s" % lang_code) return None # no translations elif resp.status_code == 401: raise CommandError("401 Unauthorized while trying to access the CrowdIn API. Be sure to set CROWDIN_PROJECT_ID and CROWDIN_PROJECT_KEY in local_settings.py.") else: raise CommandError("%s - couldn't connect to CrowdIn API - cannot continue without downloading %s!" % (e, request_url)) else: logging.info("Successfully downloaded zip archive") # Unpack into temp dir z = zipfile.ZipFile(StringIO.StringIO(resp.content)) if zip_file: with open(zip_file, "wb") as fp: # save the zip file fp.write(resp.content) tmp_dir_path = tempfile.mkdtemp() z.extractall(tmp_dir_path) # Copy over new translations po_file = extract_new_po(tmp_dir_path, combine_with_po_file=combine_with_po_file, lang=lang_code) # Clean up tracks if os.path.exists(tmp_dir_path): shutil.rmtree(tmp_dir_path) return po_file
def handle(self, *args, **options): if settings.CENTRAL_SERVER: raise CommandError("This must only be run on distributed servers server.") lang_code = lcode_to_ietf(options["lang_code"]) software_version = options["software_version"] # Download the language pack try: self.start("Downloading language pack '%s'" % lang_code) zip_file = get_language_pack(lang_code, software_version) # Unpack into locale directory self.next_stage("Unpacking language pack '%s'" % lang_code) unpack_language(lang_code, zip_file) # Update database with meta info self.next_stage("Updating database for language pack '%s'" % lang_code) update_database(lang_code) # self.next_stage("Creating static files for language pack '%s'" % lang_code) update_jsi18n_file(lang_code) # move_srts(lang_code) self.complete("Finished processing language pack %s" % lang_code) except Exception as e: self.cancel(stage_status="error", notes="Error: %s" % e) raise
def move_srts(lang_code): """ Srts live in the locale directory, but that's not exposed at any URL. So instead, we have to move the srts out to /static/subtitles/[lang_code]/ """ lang_code_ietf = lcode_to_ietf(lang_code) lang_code_django = lcode_to_django_dir(lang_code) subtitles_static_dir = os.path.join(settings.STATIC_ROOT, "subtitles") src_dir = os.path.join(LOCALE_ROOT, lang_code_django, "subtitles") dest_dir = get_srt_path(lang_code_django) ensure_dir(dest_dir) lang_subtitles = glob.glob(os.path.join(src_dir, "*.srt")) logging.info("Moving %d subtitles from %s to %s" % (len(lang_subtitles), src_dir, dest_dir)) for fil in lang_subtitles: srt_dest_path = os.path.join(dest_dir, os.path.basename(fil)) if os.path.exists(srt_dest_path): os.remove(srt_dest_path) shutil.move(fil, srt_dest_path) if os.listdir(src_dir): logging.warn("%s is not empty; will not remove. Please check that all subtitles were moved." % src_dir) else: logging.info("Removing empty source directory (%s)." % src_dir) shutil.rmtree(src_dir)
def clear_subtitles_cache(lang_codes=None, locale_root=LOCALE_ROOT): """ Language codes will be converted to django format (e.g. en_US) """ lang_codes = lang_codes or os.listdir(locale_root) for lang_code in lang_codes: lang_code = lcode_to_ietf(lang_code) # Clear the status file lm_file = get_lang_map_filepath(lang_code) with open(lm_file, "r") as fp: download_status = json.load(fp) for key in download_status: download_status[key] = { u"downloaded": False, u"last_success": u"", u"last_attempt": u"", u"api_response": u"", } with open(lm_file, "w") as fp: json.dump(download_status, fp) # Delete all srt files srt_path = get_srt_path(lang_code) if os.path.exists(srt_path): shutil.rmtree(srt_path)
def download_srt_from_3rd_party(lang_codes=None, **kwargs): """Download subtitles specified by command line args""" lang_codes = lang_codes or get_all_prepped_lang_codes() bad_languages = {} for lang_code in lang_codes: lang_code = lcode_to_ietf(lang_code) try: lang_map_filepath = get_lang_map_filepath(lang_code) if not os.path.exists(lang_map_filepath): videos = {} # happens if an unknown set for subtitles. else: with open(lang_map_filepath, "r") as fp: videos = json.load(fp) except Exception as e: error_msg = "Error in subtitles metadata file for %s: %s" % (lang_code, e) logging.error(error_msg) bad_languages[lang_code] = error_msg continue try: download_if_criteria_met(videos, lang_code=lang_code, **kwargs) except Exception as e: error_msg = "Error downloading subtitles for %s: %s" % (lang_code, e) logging.error(error_msg) bad_languages[lang_code] = error_msg continue # now report final results if bad_languages: outstr = "Failed to download subtitles for the following languages: %s" % (bad_languages.keys()) outstr += "\n" + str(bad_languages) logging.error(outstr)
def set_language(self, request, cur_lang): # each request can get the language from the querystring, or from the currently set session language old_lang = request.session.get("django_language", "") if cur_lang != old_lang: logging.debug("setting session language to %s" % cur_lang) # Set the two variables we care most about request.session["django_language"] = cur_lang request.session["default_language"] = cur_lang request.language = lcode_to_ietf(cur_lang)
def get_language_pack(lang_code, software_version): """Download language pack for specified language""" lang_code = lcode_to_ietf(lang_code) logging.info("Retrieving language pack: %s" % lang_code) request_url = get_language_pack_url(lang_code, software_version) r = requests.get(request_url) try: r.raise_for_status() except Exception as e: raise CommandError(e) return r.content
def update_database(lang_code): """Create/update LanguagePack table in database based on given languages metadata""" lang_code = lcode_to_ietf(lang_code) with open(get_language_pack_metadata_filepath(lang_code)) as fp: metadata = json.load(fp) logging.info("Updating database for language pack: %s" % lang_code) pack = get_object_or_None(LanguagePack, code=lang_code) or LanguagePack(code=lang_code) for key, value in metadata.iteritems(): setattr(pack, key, value) pack.save() logging.info("Successfully updated database.")
def update_language_packs(lang_codes=None, download_ka_translations=True, zip_file=None, ka_zip_file=None, use_local=False): # Loop through new UI translations & subtitles, create/update unified meta data generate_metadata(lang_codes=lang_codes) if use_local: for lang_code in lang_codes: lang_code = lcode_to_ietf(lang_code) package_metadata[lang_code] = {} combined_po_file = os.path.join(LOCALE_ROOT, lcode_to_django_dir(lang_code), "LC_MESSAGES", "django.po") combined_metadata = get_po_metadata(combined_po_file) package_metadata[lang_code]["approved_translations"] = combined_metadata["approved_translations"] package_metadata[lang_code]["phrases"] = combined_metadata["phrases"] else: logging.info("Downloading %s language(s)" % lang_codes) # Zip zip_language_packs(lang_codes=lang_codes)
def handle(self, *args, **options): if settings.CENTRAL_SERVER: raise CommandError("This must only be run on the distributed server.") if not options["lang_code"]: raise CommandError("You must specify a language code.") # Get list of exercises lang_code = lcode_to_ietf(options["lang_code"]) exercise_ids = options["exercise_ids"].split(",") if options["exercise_ids"] else None exercise_ids = exercise_ids or ([ex["id"] for ex in get_topic_exercises(topic_id=options["topic_id"])] if options["topic_id"] else None) exercise_ids = exercise_ids or get_node_cache("Exercise").keys() # Download the exercises for exercise_id in exercise_ids: scrape_exercise(exercise_id=exercise_id, lang_code=lang_code, force=options["force"]) logging.info("Process complete.")
def validate_language_map(lang_codes): """ This function will tell you any blockers that you'll hit while running this command. All srt languages must exist in the language map; missing languages will cause errors during command running (which can be long). This function avoids that problem by doing the above consistency check. """ lang_codes = lang_codes or get_all_prepped_lang_codes() missing_langs = [] for lang_code in lang_codes: try: get_language_name(lcode_to_ietf(lang_code), error_on_missing=True) except LanguageNotFoundError: missing_langs.append(lang_code) if missing_langs: logging.warn("Please add the following language codes to %s:\n\t%s" % (LANG_LOOKUP_FILEPATH, missing_langs))
def handle(self, *args, **options): if settings.CENTRAL_SERVER: raise CommandError("This must only be run on the distributed server.") if not options["lang_code"]: raise CommandError("You must specify a language code.") # Get list of videos lang_code = lcode_to_ietf(options["lang_code"]) video_map = get_dubbed_video_map(lang_code) or {} video_ids = options["video_ids"].split(",") if options["video_ids"] else None video_ids = video_ids or ( [vid["id"] for vid in get_topic_videos(topic_id=options["topic_id"])] if options["topic_id"] else None ) video_ids = video_ids or video_map.keys() # Download the videos for video_id in video_ids: if video_id in video_map: youtube_id = video_map[video_id] elif video_id in video_map.values(): # Perhaps they sent in a youtube ID? We can handle that! youtube_id = video_id else: logging.error("No mapping for video_id=%s; skipping" % video_id) continue try: scrape_video(youtube_id=youtube_id, format=options["format"], force=options["force"]) # scrape_thumbnail(youtube_id=youtube_id) logging.info("Access video %s at %s" % (youtube_id, get_node_cache("Video")[video_id][0]["path"])) except Exception as e: logging.error("Failed to download video %s: %s" % (youtube_id, e)) logging.info("Process complete.")
def generate_metadata(lang_codes=None, broken_langs=None, added_ka=False): """Loop through locale folder, create or update language specific meta and create or update master file, skipping broken languages note: broken_langs must be in django format. """ logging.info("Generating new language pack metadata") if broken_langs is None: broken_langs = tuple() lang_codes = lang_codes or os.listdir(LOCALE_ROOT) try: with open(get_language_pack_availability_filepath(), "r") as fp: master_metadata = json.load(fp) if isinstance(master_metadata, list): logging.info("Code switched from list to dict to support single language LanguagePack updates; converting your old list storage for dictionary storage.") master_list = master_metadata master_metadata = {} for lang_meta in master_list: master_metadata[lang_meta["code"]] = lang_meta except Exception as e: logging.warn("Error opening language pack metadata: %s; resetting" % e) master_metadata = {} # loop through all languages in locale, update master file crowdin_meta_dict = download_crowdin_metadata() with open(SUBTITLE_COUNTS_FILEPATH, "r") as fp: subtitle_counts = json.load(fp) for lc in lang_codes: lang_code_django = lcode_to_django_dir(lc) lang_code_ietf = lcode_to_ietf(lc) lang_name = get_language_name(lang_code_ietf) # skips anything not a directory, or with errors if not os.path.isdir(os.path.join(LOCALE_ROOT, lang_code_django)): logging.info("Skipping item %s because it is not a directory" % lang_code_django) continue elif lang_code_django in broken_langs: # broken_langs is django format logging.info("Skipping directory %s because it triggered an error during compilemessages. The admins should have received a report about this and must fix it before this pack will be updateed." % lang_code_django) continue # Gather existing metadata crowdin_meta = next((meta for meta in crowdin_meta_dict if meta["code"] == lang_code_ietf), {}) metadata_filepath = get_language_pack_metadata_filepath(lang_code_ietf) try: with open(metadata_filepath) as fp: local_meta = json.load(fp) except Exception as e: logging.warn("Error opening language pack metadata (%s): %s; resetting" % (metadata_filepath, e)) local_meta = {} try: # update metadata updated_meta = { "code": lcode_to_ietf(crowdin_meta.get("code") or lang_code_django), # user-facing code "name": (crowdin_meta.get("name") or lang_name), "percent_translated": int(crowdin_meta.get("approved_progress", 0)), "phrases": int(crowdin_meta.get("phrases", 0)), "approved_translations": int(crowdin_meta.get("approved", 0)), } # Obtain current number of subtitles entry = subtitle_counts.get(lang_name, {}) srt_count = entry.get("count", 0) updated_meta.update({ "software_version": version.VERSION, "subtitle_count": srt_count, }) except LanguageNotFoundError: logging.error("Unrecognized language; must skip item %s" % lang_code_django) continue language_pack_version = increment_language_pack_version(local_meta, updated_meta) updated_meta["language_pack_version"] = language_pack_version + int(added_ka) local_meta.update(updated_meta) # Write locally (this is used on download by distributed server to update it's database) with open(metadata_filepath, 'w') as output: json.dump(local_meta, output) # Update master (this is used for central server to handle API requests for data) master_metadata[lang_code_ietf] = local_meta # Save updated master ensure_dir(os.path.dirname(get_language_pack_availability_filepath())) with open(get_language_pack_availability_filepath(), 'w') as output: json.dump(master_metadata, output) logging.info("Local record of translations updated")
def update_language_srt_map(map_file=SRTS_JSON_FILEPATH): """ Translate the srts_remote_availability dictionary into language specific files that can be used by the cache_subtitles command. Note: srt map deals with amara, so uses ietf codes (e.g. en-us) """ # Load the current download status try: with open(map_file) as fp: api_info_map = json.load(fp) except Exception as e: # Must be corrupted; start from scratch! logging.warn("Could not open %s for updates; starting from scratch. Error=%s" % (map_file, e)) api_info_map = {} # Next we want to iterate through those and create a big srt dictionary organized by language code remote_availability_map = {} for youtube_id, data in api_info_map.items(): languages = data.get("language_codes", []) for lang_code in languages: lang_code = lcode_to_ietf(lang_code) if not lang_code in remote_availability_map: #logging.info("Creating language section '%s'" % lang_code) remote_availability_map[lang_code] = {} # This entry will be valid if it's new, otherwise it will be overwitten later remote_availability_map[lang_code][youtube_id] = { "downloaded": False, "api_response": "", "last_attempt": "", "last_success": "", } # Finally we need to iterate through that dictionary and create individual files for each language code for lang_code, new_data in remote_availability_map.items(): # Try to open previous language file lang_map_filepath = get_lang_map_filepath(lang_code) if not os.path.exists(lang_map_filepath): lang_map = {} else: try: with open(lang_map_filepath, "r") as fp: lang_map = json.load(fp) except Exception as e: logging.error("Language download status mapping for (%s) is corrupted (%s), rewriting it." % (lang_code, e)) lang_map = {} # First, check to see if it's empty (e.g. no subtitles available for any videos) if not new_data: logging.info("Subtitle support for %s has been terminated; removing." % lang_code) if os.path.exists(lang_map_filepath): os.remove(lang_map_filepath) continue # Compare how many empty entries you are adding and add them to master map old_yt_ids = set(new_data.keys()) new_yt_ids = set(lang_map.keys()) yt_ids_to_add = set(new_data.keys()) - set(lang_map.keys()) yt_ids_to_delete = set(lang_map.keys()) - set(new_data.keys()) if yt_ids_to_add: logging.info("Adding %d new YouTube IDs to language (%s)" % (len(yt_ids_to_add), lang_code)) for yt_id in yt_ids_to_add: lang_map[yt_id] = new_data.get(yt_id) if yt_ids_to_delete: logging.info("Deleting %d old YouTube IDs from language (%s) because they are no longer supported." % (len(yt_ids_to_delete), lang_code)) for yt_id in yt_ids_to_delete: lang_map.pop(yt_id, None) # Write the new file to the correct location logging.debug("Writing %s" % lang_map_filepath) ensure_dir(os.path.dirname(lang_map_filepath)) with open(lang_map_filepath, 'w') as outfile: json.dump(lang_map, outfile) # Update the big mapping with the most accurate numbers remote_availability_map[lang_code].update(lang_map) # Finally, remove any files not found in the current map at all. if lang_map_filepath: for filename in os.listdir(os.path.dirname(lang_map_filepath)): lang_code = lang_code = filename.split("_")[0] if not lang_code in remote_availability_map: file_to_remove = get_lang_map_filepath(lang_code) logging.info("Subtitle support for %s has been terminated; removing." % lang_code) if os.path.exists(file_to_remove): os.remove(file_to_remove) else: logging.warn("Subtitles metadata for %s not found; skipping deletion of non-existent file %s." % (lang_code, file_to_remove)) return remote_availability_map