コード例 #1
0
def languages(request):
    if "default_language" not in request.session:
        return {}  # temporarily skipped middleware, but we'll get back here again.  Tricky Django...

    return {
        "default_language": lcode_to_ietf(request.session["default_language"]),
        "language_choices": request.session["language_choices"],
        "current_language": lcode_to_ietf(request.session["django_language"]),
    }
コード例 #2
0
    def handle(self, *args, **options):
        if not settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the central server.")

        # None represents all
        lang_codes = [lcode_to_ietf(options["lang_code"])] if options["lang_code"] else None
        del options["lang_code"]

        if len(args) == 0:
            validate_language_map(lang_codes)

            logging.info("Downloading...")
            download_srt_from_3rd_party(lang_codes=lang_codes, **options)

            validate_language_map(lang_codes)  # again at the end, so output is visible

        elif len(args) > 1:
            raise CommandError("Max 1 arg")

        elif args[0] == "clear":
            logging.info("Clearing subtitles...")
            clear_subtitles_cache(lang_codes)

        else:
            raise CommandError("Unknown argument: %s" % args[0])

        logging.info("Process complete.")
コード例 #3
0
def get_all_prepped_lang_codes():
    """Pre-prepped language codes, for downloading srts"""
    lang_codes = []
    for filename in get_all_download_status_files():
        lang_code = os.path.basename(filename).split("_")[0]
        lang_codes.append(lcode_to_ietf(lang_code))
    return lang_codes
コード例 #4
0
def handle_po_compile_errors(lang_codes=None, out=None, err=None, rc=None):
    """
    Return list of languages to not rezip due to errors in compile process.
    Then email admins errors.
    """

    broken_codes = re.findall(r'(?<=ka-lite/locale/)\w+(?=/LC_MESSAGES)', err) or []

    if lang_codes:
        # Only show the errors relevant to the list of language codes passed in.
        lang_codes = set([lcode_to_django_dir(lc) for lc in lang_codes])
        broken_codes = list(set(broken_codes).intersection(lang_codes))

    if broken_codes:
        logging.warning("Found %d errors while compiling in codes %s. Mailing admins report now."  % (len(broken_codes), ', '.join(broken_codes)))
        subject = "Error while compiling po files"
        commands = "\n".join(["python manage.py compilemessages -l %s" % lc for lc in broken_codes])
        message =  """The following codes had errors when compiling their po files: %s.
                   Please rerun the following commands to see specific line numbers
                   that need to be corrected on CrowdIn, before we can update the language packs.
                   %s""" % (
            ', '.join([lcode_to_ietf(lc) for lc in broken_codes]),
            commands,
        )
        if not settings.DEBUG:
            mail_admins(subject=subject, message=message)
            logging.info("Report sent.")
        else:
            logging.info("DEBUG is True so not sending email, but would have sent the following: SUBJECT: %s; MESSAGE: %s"  % (subject, message))

    return broken_codes
コード例 #5
0
def zip_language_packs(lang_codes=None):
    """Zip up and expose all language packs

    converts all into ietf
    """

    lang_codes = lang_codes or os.listdir(LOCALE_ROOT)
    lang_codes = [lcode_to_ietf(lc) for lc in lang_codes]
    logging.info("Zipping up %d language pack(s)" % len(lang_codes))

    for lang_code_ietf in lang_codes:
        lang_code_django = lcode_to_django_dir(lang_code_ietf)
        lang_locale_path = os.path.join(LOCALE_ROOT, lang_code_django)

        if not os.path.exists(lang_locale_path):
            logging.warn("Unexpectedly skipping missing directory: %s" % lang_code_django)
        elif not os.path.isdir(lang_locale_path):
            logging.error("Skipping language where a file exists where a directory was expected: %s" % lang_code_django)

        # Create a zipfile for this language
        zip_filepath = get_language_pack_filepath(lang_code_ietf)
        ensure_dir(os.path.dirname(zip_filepath))
        logging.info("Creating zip file in %s" % zip_filepath)
        z = zipfile.ZipFile(zip_filepath, 'w', zipfile.ZIP_DEFLATED)

        # Get every single file in the directory and zip it up
        for metadata_file in glob.glob('%s/*.json' % lang_locale_path):
            z.write(os.path.join(lang_locale_path, metadata_file), arcname=os.path.basename(metadata_file))

        srt_dirpath = get_srt_path(lang_code_django)
        for srt_file in glob.glob(os.path.join(srt_dirpath, "*.srt")):
            z.write(srt_file, arcname=os.path.join("subtitles", os.path.basename(srt_file)))
        z.close()
    logging.info("Done.")
コード例 #6
0
def download_latest_translations(project_id=settings.CROWDIN_PROJECT_ID,
                                 project_key=settings.CROWDIN_PROJECT_KEY,
                                 lang_code="all",
                                 zip_file=None,
                                 combine_with_po_file=None,
                                 rebuild=True):
    """
    Download latest translations from CrowdIn to corresponding locale
    directory. If zip_file is given, use that as the zip file
    instead of going through CrowdIn.

    """
    lang_code = lcode_to_ietf(lang_code)

    # Get zip file of translations
    if zip_file and os.path.exists(zip_file):
        logging.info("Using local zip file at %s" % zip_file)
        z = zipfile.ZipFile(zip_file)
        # use the name of the zip file to infer the language code, if needed
        lang_code = lang_code or os.path.splitext(os.path.basename(zip_file))[0]

    else:
        # Tell CrowdIn to Build latest package
        if rebuild:
            build_translations()

        logging.info("Attempting to download a zip archive of current translations")
        request_url = "http://api.crowdin.net/api/project/%s/download/%s.zip?key=%s" % (project_id, lang_code, project_key)
        try:
            resp = requests.get(request_url)
            resp.raise_for_status()
        except Exception as e:
            if resp.status_code == 404:
                logging.info("No translations found for language %s" % lang_code)
                return None  # no translations
            elif resp.status_code == 401:
                raise CommandError("401 Unauthorized while trying to access the CrowdIn API. Be sure to set CROWDIN_PROJECT_ID and CROWDIN_PROJECT_KEY in local_settings.py.")
            else:
                raise CommandError("%s - couldn't connect to CrowdIn API - cannot continue without downloading %s!" % (e, request_url))
        else:
            logging.info("Successfully downloaded zip archive")

        # Unpack into temp dir
        z = zipfile.ZipFile(StringIO.StringIO(resp.content))

        if zip_file:
            with open(zip_file, "wb") as fp:  # save the zip file
                fp.write(resp.content)

    tmp_dir_path = tempfile.mkdtemp()
    z.extractall(tmp_dir_path)

    # Copy over new translations
    po_file = extract_new_po(tmp_dir_path, combine_with_po_file=combine_with_po_file, lang=lang_code)

    # Clean up tracks
    if os.path.exists(tmp_dir_path):
        shutil.rmtree(tmp_dir_path)

    return po_file
コード例 #7
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on distributed servers server.")

        lang_code = lcode_to_ietf(options["lang_code"])
        software_version = options["software_version"]

        # Download the language pack
        try:
            self.start("Downloading language pack '%s'" % lang_code)
            zip_file = get_language_pack(lang_code, software_version)

            # Unpack into locale directory
            self.next_stage("Unpacking language pack '%s'" % lang_code)
            unpack_language(lang_code, zip_file)

            # Update database with meta info
            self.next_stage("Updating database for language pack '%s'" % lang_code)
            update_database(lang_code)

            #
            self.next_stage("Creating static files for language pack '%s'" % lang_code)
            update_jsi18n_file(lang_code)

            #
            move_srts(lang_code)
            self.complete("Finished processing language pack %s" % lang_code)
        except Exception as e:
            self.cancel(stage_status="error", notes="Error: %s" % e)
            raise
コード例 #8
0
def move_srts(lang_code):
    """
    Srts live in the locale directory, but that's not exposed at any URL.  So instead,
    we have to move the srts out to /static/subtitles/[lang_code]/
    """
    lang_code_ietf = lcode_to_ietf(lang_code)
    lang_code_django = lcode_to_django_dir(lang_code)

    subtitles_static_dir = os.path.join(settings.STATIC_ROOT, "subtitles")
    src_dir = os.path.join(LOCALE_ROOT, lang_code_django, "subtitles")
    dest_dir = get_srt_path(lang_code_django)
    ensure_dir(dest_dir)

    lang_subtitles = glob.glob(os.path.join(src_dir, "*.srt"))
    logging.info("Moving %d subtitles from %s to %s" % (len(lang_subtitles), src_dir, dest_dir))

    for fil in lang_subtitles:
        srt_dest_path = os.path.join(dest_dir, os.path.basename(fil))
        if os.path.exists(srt_dest_path):
            os.remove(srt_dest_path)
        shutil.move(fil, srt_dest_path)

    if os.listdir(src_dir):
        logging.warn("%s is not empty; will not remove.  Please check that all subtitles were moved." % src_dir)
    else:
        logging.info("Removing empty source directory (%s)." % src_dir)
        shutil.rmtree(src_dir)
コード例 #9
0
def clear_subtitles_cache(lang_codes=None, locale_root=LOCALE_ROOT):
    """
    Language codes will be converted to django format (e.g. en_US)
    """
    lang_codes = lang_codes or os.listdir(locale_root)
    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)

        # Clear the status file
        lm_file = get_lang_map_filepath(lang_code)
        with open(lm_file, "r") as fp:
            download_status = json.load(fp)
        for key in download_status:
            download_status[key] = {
                u"downloaded": False,
                u"last_success": u"",
                u"last_attempt": u"",
                u"api_response": u"",
            }
        with open(lm_file, "w") as fp:
            json.dump(download_status, fp)

        # Delete all srt files
        srt_path = get_srt_path(lang_code)
        if os.path.exists(srt_path):
            shutil.rmtree(srt_path)
コード例 #10
0
def download_srt_from_3rd_party(lang_codes=None, **kwargs):
    """Download subtitles specified by command line args"""

    lang_codes = lang_codes or get_all_prepped_lang_codes()
    bad_languages = {}

    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)

        try:
            lang_map_filepath = get_lang_map_filepath(lang_code)
            if not os.path.exists(lang_map_filepath):
                videos = {}  # happens if an unknown set for subtitles.
            else:
                with open(lang_map_filepath, "r") as fp:
                    videos = json.load(fp)
        except Exception as e:
            error_msg = "Error in subtitles metadata file for %s: %s" % (lang_code, e)
            logging.error(error_msg)
            bad_languages[lang_code] = error_msg
            continue

        try:
            download_if_criteria_met(videos, lang_code=lang_code, **kwargs)
        except Exception as e:
            error_msg = "Error downloading subtitles for %s: %s" % (lang_code, e)
            logging.error(error_msg)
            bad_languages[lang_code] = error_msg
            continue

    # now report final results
    if bad_languages:
        outstr = "Failed to download subtitles for the following languages: %s" % (bad_languages.keys())
        outstr += "\n" + str(bad_languages)
        logging.error(outstr)
コード例 #11
0
    def set_language(self, request, cur_lang):
        # each request can get the language from the querystring, or from the currently set session language
        old_lang = request.session.get("django_language", "")
        if cur_lang != old_lang:
            logging.debug("setting session language to %s" % cur_lang)

        # Set the two variables we care most about
        request.session["django_language"] = cur_lang
        request.session["default_language"] = cur_lang
        request.language = lcode_to_ietf(cur_lang)
コード例 #12
0
def get_language_pack(lang_code, software_version):
    """Download language pack for specified language"""

    lang_code = lcode_to_ietf(lang_code)
    logging.info("Retrieving language pack: %s" % lang_code)
    request_url = get_language_pack_url(lang_code, software_version)
    r = requests.get(request_url)
    try:
        r.raise_for_status()
    except Exception as e:
        raise CommandError(e)

    return r.content
コード例 #13
0
def update_database(lang_code):
    """Create/update LanguagePack table in database based on given languages metadata"""

    lang_code = lcode_to_ietf(lang_code)
    with open(get_language_pack_metadata_filepath(lang_code)) as fp:
        metadata = json.load(fp)

    logging.info("Updating database for language pack: %s" % lang_code)

    pack = get_object_or_None(LanguagePack, code=lang_code) or LanguagePack(code=lang_code)
    for key, value in metadata.iteritems():
        setattr(pack, key, value)
    pack.save()

    logging.info("Successfully updated database.")
コード例 #14
0
def update_language_packs(lang_codes=None, download_ka_translations=True, zip_file=None, ka_zip_file=None, use_local=False):

    # Loop through new UI translations & subtitles, create/update unified meta data
    generate_metadata(lang_codes=lang_codes)
    if use_local:
        for lang_code in lang_codes:
            lang_code = lcode_to_ietf(lang_code)
            package_metadata[lang_code] = {}
            combined_po_file = os.path.join(LOCALE_ROOT, lcode_to_django_dir(lang_code), "LC_MESSAGES", "django.po")
            combined_metadata = get_po_metadata(combined_po_file)
            package_metadata[lang_code]["approved_translations"] = combined_metadata["approved_translations"]
            package_metadata[lang_code]["phrases"]               = combined_metadata["phrases"]

    else:
        logging.info("Downloading %s language(s)" % lang_codes)

    # Zip
    zip_language_packs(lang_codes=lang_codes)
コード例 #15
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the distributed server.")

        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")

        # Get list of exercises
        lang_code = lcode_to_ietf(options["lang_code"])
        exercise_ids = options["exercise_ids"].split(",") if options["exercise_ids"] else None
        exercise_ids = exercise_ids or ([ex["id"] for ex in get_topic_exercises(topic_id=options["topic_id"])] if options["topic_id"] else None)
        exercise_ids = exercise_ids or get_node_cache("Exercise").keys()

        # Download the exercises
        for exercise_id in exercise_ids:
            scrape_exercise(exercise_id=exercise_id, lang_code=lang_code, force=options["force"])

        logging.info("Process complete.")
コード例 #16
0
def validate_language_map(lang_codes):
    """
    This function will tell you any blockers that you'll hit while
    running this command.

    All srt languages must exist in the language map; missing languages
    will cause errors during command running (which can be long).
    This function avoids that problem by doing the above consistency check.
    """
    lang_codes = lang_codes or get_all_prepped_lang_codes()
    missing_langs = []
    for lang_code in lang_codes:
        try:
            get_language_name(lcode_to_ietf(lang_code), error_on_missing=True)
        except LanguageNotFoundError:
            missing_langs.append(lang_code)

    if missing_langs:
        logging.warn("Please add the following language codes to %s:\n\t%s" % (LANG_LOOKUP_FILEPATH, missing_langs))
コード例 #17
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the distributed server.")

        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")

        # Get list of videos
        lang_code = lcode_to_ietf(options["lang_code"])
        video_map = get_dubbed_video_map(lang_code) or {}
        video_ids = options["video_ids"].split(",") if options["video_ids"] else None
        video_ids = video_ids or (
            [vid["id"] for vid in get_topic_videos(topic_id=options["topic_id"])] if options["topic_id"] else None
        )
        video_ids = video_ids or video_map.keys()

        # Download the videos
        for video_id in video_ids:
            if video_id in video_map:
                youtube_id = video_map[video_id]

            elif video_id in video_map.values():
                # Perhaps they sent in a youtube ID?  We can handle that!
                youtube_id = video_id
            else:
                logging.error("No mapping for video_id=%s; skipping" % video_id)
                continue

            try:
                scrape_video(youtube_id=youtube_id, format=options["format"], force=options["force"])
                # scrape_thumbnail(youtube_id=youtube_id)
                logging.info("Access video %s at %s" % (youtube_id, get_node_cache("Video")[video_id][0]["path"]))
            except Exception as e:
                logging.error("Failed to download video %s: %s" % (youtube_id, e))

        logging.info("Process complete.")
コード例 #18
0
def generate_metadata(lang_codes=None, broken_langs=None, added_ka=False):
    """Loop through locale folder, create or update language specific meta
    and create or update master file, skipping broken languages

    note: broken_langs must be in django format.

    """
    logging.info("Generating new language pack metadata")

    if broken_langs is None:
        broken_langs = tuple()

    lang_codes = lang_codes or os.listdir(LOCALE_ROOT)
    try:
        with open(get_language_pack_availability_filepath(), "r") as fp:
            master_metadata = json.load(fp)
        if isinstance(master_metadata, list):
            logging.info("Code switched from list to dict to support single language LanguagePack updates; converting your old list storage for dictionary storage.")
            master_list = master_metadata
            master_metadata = {}
            for lang_meta in master_list:
                master_metadata[lang_meta["code"]] = lang_meta
    except Exception as e:
        logging.warn("Error opening language pack metadata: %s; resetting" % e)
        master_metadata = {}

    # loop through all languages in locale, update master file
    crowdin_meta_dict = download_crowdin_metadata()
    with open(SUBTITLE_COUNTS_FILEPATH, "r") as fp:
        subtitle_counts = json.load(fp)

    for lc in lang_codes:
        lang_code_django = lcode_to_django_dir(lc)
        lang_code_ietf = lcode_to_ietf(lc)
        lang_name = get_language_name(lang_code_ietf)

        # skips anything not a directory, or with errors
        if not os.path.isdir(os.path.join(LOCALE_ROOT, lang_code_django)):
            logging.info("Skipping item %s because it is not a directory" % lang_code_django)
            continue
        elif lang_code_django in broken_langs:  # broken_langs is django format
            logging.info("Skipping directory %s because it triggered an error during compilemessages. The admins should have received a report about this and must fix it before this pack will be updateed." % lang_code_django)
            continue

        # Gather existing metadata
        crowdin_meta = next((meta for meta in crowdin_meta_dict if meta["code"] == lang_code_ietf), {})
        metadata_filepath = get_language_pack_metadata_filepath(lang_code_ietf)
        try:
            with open(metadata_filepath) as fp:
                local_meta = json.load(fp)
        except Exception as e:
            logging.warn("Error opening language pack metadata (%s): %s; resetting" % (metadata_filepath, e))
            local_meta = {}

        try:
            # update metadata
            updated_meta = {
                "code": lcode_to_ietf(crowdin_meta.get("code") or lang_code_django),  # user-facing code
                "name": (crowdin_meta.get("name") or lang_name),
                "percent_translated": int(crowdin_meta.get("approved_progress", 0)),
                "phrases": int(crowdin_meta.get("phrases", 0)),
                "approved_translations": int(crowdin_meta.get("approved", 0)),
            }

            # Obtain current number of subtitles
            entry = subtitle_counts.get(lang_name, {})
            srt_count = entry.get("count", 0)

            updated_meta.update({
                "software_version": version.VERSION,
                "subtitle_count": srt_count,
            })

        except LanguageNotFoundError:
            logging.error("Unrecognized language; must skip item %s" % lang_code_django)
            continue

        language_pack_version = increment_language_pack_version(local_meta, updated_meta)
        updated_meta["language_pack_version"] = language_pack_version + int(added_ka)
        local_meta.update(updated_meta)

        # Write locally (this is used on download by distributed server to update it's database)
        with open(metadata_filepath, 'w') as output:
            json.dump(local_meta, output)

        # Update master (this is used for central server to handle API requests for data)
        master_metadata[lang_code_ietf] = local_meta

    # Save updated master
    ensure_dir(os.path.dirname(get_language_pack_availability_filepath()))
    with open(get_language_pack_availability_filepath(), 'w') as output:
        json.dump(master_metadata, output)
    logging.info("Local record of translations updated")
コード例 #19
0
def update_language_srt_map(map_file=SRTS_JSON_FILEPATH):
    """
    Translate the srts_remote_availability dictionary into language specific files
    that can be used by the cache_subtitles command.

    Note: srt map deals with amara, so uses ietf codes (e.g. en-us)
    """
    # Load the current download status
    try:
        with open(map_file) as fp:
            api_info_map = json.load(fp)
    except Exception as e:
        # Must be corrupted; start from scratch!
        logging.warn("Could not open %s for updates; starting from scratch.  Error=%s" % (map_file, e))
        api_info_map = {}

    # Next we want to iterate through those and create a big srt dictionary organized by language code
    remote_availability_map = {}
    for youtube_id, data in api_info_map.items():
        languages = data.get("language_codes", [])
        for lang_code in languages:
            lang_code = lcode_to_ietf(lang_code)
            if not lang_code in remote_availability_map:
                #logging.info("Creating language section '%s'" % lang_code)
                remote_availability_map[lang_code] = {}
            # This entry will be valid if it's new, otherwise it will be overwitten later
            remote_availability_map[lang_code][youtube_id] = {
                "downloaded": False,
                "api_response": "",
                "last_attempt": "",
                "last_success": "",
            }

    # Finally we need to iterate through that dictionary and create individual files for each language code
    for lang_code, new_data in remote_availability_map.items():

        # Try to open previous language file
        lang_map_filepath = get_lang_map_filepath(lang_code)
        if not os.path.exists(lang_map_filepath):
            lang_map = {}
        else:
            try:
                with open(lang_map_filepath, "r") as fp:
                    lang_map = json.load(fp)
            except Exception as e:
                logging.error("Language download status mapping for (%s) is corrupted (%s), rewriting it." % (lang_code, e))
                lang_map = {}

        # First, check to see if it's empty (e.g. no subtitles available for any videos)
        if not new_data:
            logging.info("Subtitle support for %s has been terminated; removing." % lang_code)
            if os.path.exists(lang_map_filepath):
                os.remove(lang_map_filepath)
            continue

        # Compare how many empty entries you are adding and add them to master map
        old_yt_ids = set(new_data.keys())
        new_yt_ids = set(lang_map.keys())
        yt_ids_to_add = set(new_data.keys()) - set(lang_map.keys())
        yt_ids_to_delete = set(lang_map.keys()) - set(new_data.keys())

        if yt_ids_to_add:
            logging.info("Adding %d new YouTube IDs to language (%s)" % (len(yt_ids_to_add), lang_code))
            for yt_id in yt_ids_to_add:
                lang_map[yt_id] = new_data.get(yt_id)

        if yt_ids_to_delete:
            logging.info("Deleting %d old YouTube IDs from language (%s) because they are no longer supported." % (len(yt_ids_to_delete), lang_code))
            for yt_id in yt_ids_to_delete:
                lang_map.pop(yt_id, None)

        # Write the new file to the correct location
        logging.debug("Writing %s" % lang_map_filepath)
        ensure_dir(os.path.dirname(lang_map_filepath))
        with open(lang_map_filepath, 'w') as outfile:
            json.dump(lang_map, outfile)

        # Update the big mapping with the most accurate numbers
        remote_availability_map[lang_code].update(lang_map)

    # Finally, remove any files not found in the current map at all.
    if lang_map_filepath:
        for filename in os.listdir(os.path.dirname(lang_map_filepath)):
            lang_code = lang_code = filename.split("_")[0]
            if not lang_code in remote_availability_map:
                file_to_remove = get_lang_map_filepath(lang_code)
                logging.info("Subtitle support for %s has been terminated; removing." % lang_code)
                if os.path.exists(file_to_remove):
                    os.remove(file_to_remove)
                else:
                    logging.warn("Subtitles metadata for %s not found; skipping deletion of non-existent file %s." % (lang_code, file_to_remove))

    return remote_availability_map