コード例 #1
0
    def handle(self, *args, **options):
        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")

        lang_code = lcode_to_ietf(options["lang_code"])
        if lang_code not in AVAILABLE_EXERCISE_LANGUAGE_CODES:
            logging.info("No exercises available for language %s" % lang_code)

        else:
            # Get list of exercises
            exercise_ids = options["exercise_ids"].split(
                ",") if options["exercise_ids"] else None
            exercise_ids = exercise_ids or ([
                ex["id"]
                for ex in get_topic_exercises(topic_id=options["topic_id"])
            ] if options["topic_id"] else None)
            exercise_ids = exercise_ids or get_node_cache("Exercise").keys()

            # Download the exercises
            for exercise_id in exercise_ids:
                scrape_exercise(exercise_id=exercise_id,
                                lang_code=lang_code,
                                force=options["force"])

        logging.info("Process complete.")
コード例 #2
0
def move_srts(lang_code):
    """
    Srts live in the locale directory, but that's not exposed at any URL.  So instead,
    we have to move the srts out to /static/subtitles/[lang_code]/
    """
    lang_code_ietf = lcode_to_ietf(lang_code)
    lang_code_django = lcode_to_django_dir(lang_code)

    subtitles_static_dir = os.path.join(settings.STATIC_ROOT, "subtitles")
    src_dir = os.path.join(LOCALE_ROOT, lang_code_django, "subtitles")
    dest_dir = get_srt_path(lang_code_django)
    ensure_dir(dest_dir)

    lang_subtitles = glob.glob(os.path.join(src_dir, "*.srt"))
    logging.info("Moving %d subtitles from %s to %s" %
                 (len(lang_subtitles), src_dir, dest_dir))

    for fil in lang_subtitles:
        srt_dest_path = os.path.join(dest_dir, os.path.basename(fil))
        if os.path.exists(srt_dest_path):
            os.remove(srt_dest_path
                      )  # we're going to replace any srt with a newer version
        shutil.move(fil, srt_dest_path)

    if not os.path.exists(src_dir):
        logging.info("No subtitles for language pack %s" % lang_code)
    elif os.listdir(src_dir):
        logging.warn(
            "%s is not empty; will not remove.  Please check that all subtitles were moved."
            % src_dir)
    else:
        logging.info("Removing empty source directory (%s)." % src_dir)
        shutil.rmtree(src_dir)
コード例 #3
0
def get_all_prepped_lang_codes():
    """Pre-prepped language codes, for downloading srts"""
    lang_codes = []
    for filename in get_all_download_status_files():
        lang_code = os.path.basename(filename).split("_")[0]
        lang_codes.append(lcode_to_ietf(lang_code))
    return lang_codes
コード例 #4
0
def get_all_prepped_lang_codes():
    """Pre-prepped language codes, for downloading srts"""
    lang_codes = []
    for filename in get_all_download_status_files():
        lang_code = os.path.basename(filename).split("_")[0]
        lang_codes.append(lcode_to_ietf(lang_code))
    return lang_codes
コード例 #5
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError(
                "This must only be run on distributed servers server.")

        lang_code = lcode_to_ietf(options["lang_code"])
        lang_name = get_language_name(lang_code)
        software_version = options["software_version"]
        logging.info(
            "Downloading language pack for lang_name=%s, software_version=%s" %
            (lang_name, software_version))

        # Download the language pack
        try:
            if options['file']:
                self.start(
                    _("Using local language pack '%(filepath)s'") %
                    {"filepath": options['file']})
                zip_filepath = options['file']
            else:
                self.start(
                    _("Downloading language pack '%(lang_code)s'") %
                    {"lang_code": lang_code})
                zip_filepath = get_language_pack(lang_code,
                                                 software_version,
                                                 callback=self.cb)

            # Unpack into locale directory
            self.next_stage(
                _("Unpacking language pack '%(lang_code)s'") %
                {"lang_code": lang_code})
            unpack_language(lang_code, zip_filepath=zip_filepath)

            #
            self.next_stage(
                _("Creating static files for language pack '%(lang_code)s'") %
                {"lang_code": lang_code})
            update_jsi18n_file(lang_code)

            self.next_stage(
                _("Moving files to their appropriate local disk locations."))
            move_dubbed_video_map(lang_code)
            move_exercises(lang_code)
            move_srts(lang_code)
            move_video_sizes_file(lang_code)

            self.next_stage()
            call_command("collectstatic", interactive=False)

            self.next_stage(_("Invalidate caches"))
            caching.invalidate_all_caches()

            self.complete(
                _("Finished processing language pack %(lang_name)s.") %
                {"lang_name": get_language_name(lang_code)})
        except Exception as e:
            self.cancel(stage_status="error",
                        notes=_("Error: %(error_msg)s") %
                        {"error_msg": unicode(e)})
            raise
コード例 #6
0
    def handle(self, *args, **options):
        if not settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the central server.")

        # None represents all
        lang_codes = [lcode_to_ietf(options["lang_code"])] if options["lang_code"] else None
        del options["lang_code"]

        if len(args) > 1:
            raise CommandError("Max 1 arg")

        elif len(args) == 1:
            if args[0] == "clear":
                logging.info("Clearing subtitles...")
                clear_subtitles_cache(lang_codes)

            else:
                raise CommandError("Unknown argument: %s" % args[0])

        else:
            validate_language_map(lang_codes)

            logging.info("Downloading...")
            download_srt_from_3rd_party(lang_codes=lang_codes, **options)

            validate_language_map(lang_codes)  # again at the end, so output is visible

            # for compatibility with KA Lite versions less than 0.10.3
            for lang in (lang_codes or get_langs_with_subtitles()):
                generate_srt_availability_file(lang)

        logging.info("Process complete.")
コード例 #7
0
def move_srts(lang_code):
    """
    Srts live in the locale directory, but that's not exposed at any URL.  So instead,
    we have to move the srts out to /static/subtitles/[lang_code]/
    """
    lang_code_ietf = lcode_to_ietf(lang_code)
    lang_code_django = lcode_to_django_dir(lang_code)

    subtitles_static_dir = os.path.join(settings.USER_STATIC_FILES, "subtitles")
    src_dir = os.path.join(settings.USER_WRITABLE_LOCALE_DIR, lang_code_django, "subtitles")
    dest_dir = get_srt_path(lang_code_django)
    ensure_dir(dest_dir)

    lang_subtitles = glob.glob(os.path.join(src_dir, "*.srt"))
    logging.info("Moving %d subtitles from %s to %s" % (len(lang_subtitles), src_dir, dest_dir))

    for fil in lang_subtitles:
        srt_dest_path = os.path.join(dest_dir, os.path.basename(fil))
        if os.path.exists(srt_dest_path):
            os.remove(srt_dest_path)  # we're going to replace any srt with a newer version
        shutil.move(fil, srt_dest_path)

    if not os.path.exists(src_dir):
        logging.info("No subtitles for language pack %s" % lang_code)
    elif os.listdir(src_dir):
        logging.warn("%s is not empty; will not remove.  Please check that all subtitles were moved." % src_dir)
    else:
        logging.info("Removing empty source directory (%s)." % src_dir)
        shutil.rmtree(src_dir)
コード例 #8
0
    def handle(self, *args, **options):
        if not settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the central server.")

        # None represents all
        lang_codes = [lcode_to_ietf(options["lang_code"])
                      ] if options["lang_code"] else None
        del options["lang_code"]

        if len(args) > 1:
            raise CommandError("Max 1 arg")

        elif len(args) == 1:
            if args[0] == "clear":
                logging.info("Clearing subtitles...")
                clear_subtitles_cache(lang_codes)

            else:
                raise CommandError("Unknown argument: %s" % args[0])

        else:
            validate_language_map(lang_codes)

            logging.info("Downloading...")
            download_srt_from_3rd_party(lang_codes=lang_codes, **options)

            validate_language_map(
                lang_codes)  # again at the end, so output is visible

            # for compatibility with KA Lite versions less than 0.10.3
            for lang in (lang_codes or get_langs_with_subtitles()):
                generate_srt_availability_file(lang)

        logging.info("Process complete.")
コード例 #9
0
def clear_subtitles_cache(lang_codes=None, locale_root=LOCALE_ROOT):
    """
    Language codes will be converted to django format (e.g. en_US)
    """
    lang_codes = lang_codes or get_langs_with_subtitles()
    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)

        # Clear the status file
        lm_file = get_lang_map_filepath(lang_code)
        download_status = softload_json(lm_file, raises=True)
        for key in download_status:
            download_status[key] = {
                u'downloaded': False,
                u'last_success': u'',
                u'last_attempt': u'',
                u'api_response': u''
            }
        with open(lm_file, "w") as fp:
            json.dump(download_status, fp)

        # Delete all srt files
        srt_path = get_srt_path(lang_code)
        if os.path.exists(srt_path):
            shutil.rmtree(srt_path)
コード例 #10
0
def get_language_pack(lang_code, software_version, callback):
    """Download language pack for specified language"""

    lang_code = lcode_to_ietf(lang_code)
    logging.info("Retrieving language pack: %s" % lang_code)
    request_url = get_language_pack_url(lang_code, software_version)
    logging.debug("Downloading zip from %s" % request_url)
    path, response = download_file(request_url, callback=callback_percent_proxy(callback))
    return path
コード例 #11
0
def get_language_pack(lang_code, software_version, callback):
    """Download language pack for specified language"""

    lang_code = lcode_to_ietf(lang_code)
    logging.info("Retrieving language pack: %s" % lang_code)
    request_url = get_language_pack_url(lang_code, software_version)
    logging.debug("Downloading zip from %s" % request_url)
    path, response = download_file(request_url, callback=callback_percent_proxy(callback))
    return path
コード例 #12
0
ファイル: api_views.py プロジェクト: SG345/ka-lite
def start_languagepack_download(request):
    if not request.method == 'POST':
        raise Exception(_("Must call API endpoint with POST verb."))

    data = json.loads(request.raw_post_data)  # Django has some weird post processing into request.POST, so use .body
    lang_code = lcode_to_ietf(data['lang'])

    force_job('languagepackdownload', _("Language pack download"), lang_code=lang_code, locale=request.language)

    return JsonResponseMessageSuccess(_("Successfully started language pack download for %(lang_name)s.") % {"lang_name": get_language_name(lang_code)})
コード例 #13
0
def start_languagepack_download(request):
    if not request.POST:
        raise Exception(_("Must call API endpoint with POST verb."));

    data = json.loads(request.raw_post_data)  # Django has some weird post processing into request.POST, so use raw_post_data
    lang_code = lcode_to_ietf(data['lang'])

    force_job('languagepackdownload', _("Language pack download"), lang_code=lang_code, locale=request.language)

    return JsonResponseMessageSuccess(_("Started language pack download for language %(lang_code)s successfully.") % {"lang_code": lang_code})
コード例 #14
0
def start_languagepack_download(request):
    if not request.POST:
        raise Exception(_("Must call API endpoint with POST verb."));

    data = json.loads(request.raw_post_data)  # Django has some weird post processing into request.POST, so use raw_post_data
    lang_code = lcode_to_ietf(data['lang'])

    force_job('languagepackdownload', _("Language pack download"), lang_code=lang_code, locale=request.language)

    return JsonResponseMessageSuccess(_("Started language pack download for language %(lang_code)s successfully.") % {"lang_code": lang_code})
コード例 #15
0
ファイル: __init__.py プロジェクト: jeepurs/ka-lite-central
def get_language_pack_filepath(lang_code, version=VERSION):
    """Returns location on disk of a language pack.

    Args:
        lang_code: string code, ietf format (will be converted)
        version: string (e.g. 0.10.3)

    Returns:
        string: absolute (local) filepath to the requested language pack.
    """
    return os.path.join(LANGUAGE_PACK_ROOT, version, "%s.zip" % lcode_to_ietf(lang_code))
コード例 #16
0
ファイル: __init__.py プロジェクト: 2flcastro/ka-lite-central
def get_language_pack_filepath(lang_code, version=SHORTVERSION):
    """Returns location on disk of a language pack.

    Args:
        lang_code: string code, ietf format (will be converted)
        version: string (e.g. 0.10.3)

    Returns:
        string: absolute (local) filepath to the requested language pack.
    """
    return os.path.join(LANGUAGE_PACK_ROOT, version, "%s.zip" % lcode_to_ietf(lang_code))
コード例 #17
0
ファイル: __init__.py プロジェクト: 2flcastro/ka-lite-central
def get_supported_language_map(lang_code=None):
    lang_code = lcode_to_ietf(lang_code)
    global SUPPORTED_LANGUAGE_MAP
    if not SUPPORTED_LANGUAGE_MAP:
        with open(SUPPORTED_LANGUAGES_FILEPATH) as f:
            SUPPORTED_LANGUAGE_MAP = json.loads(f.read())

    if not lang_code:
        return SUPPORTED_LANGUAGE_MAP
    else:
        lang_map = defaultdict(lambda: lang_code)
        lang_map.update(SUPPORTED_LANGUAGE_MAP.get(lang_code) or {})
        return lang_map
コード例 #18
0
def get_supported_language_map(lang_code=None):
    lang_code = lcode_to_ietf(lang_code)
    global SUPPORTED_LANGUAGE_MAP
    if not SUPPORTED_LANGUAGE_MAP:
        with open(SUPPORTED_LANGUAGES_FILEPATH) as f:
            SUPPORTED_LANGUAGE_MAP = json.loads(f.read())

    if not lang_code:
        return SUPPORTED_LANGUAGE_MAP
    else:
        lang_map = defaultdict(lambda: lang_code)
        lang_map.update(SUPPORTED_LANGUAGE_MAP.get(lang_code) or {})
        return lang_map
コード例 #19
0
ファイル: __init__.py プロジェクト: jeepurs/ka-lite-central
def get_language_pack_metadata_filepath(lang_code, version=VERSION):
    """Returns the location on disk of the metadata associated with a to-be-built language pack.

    Args:
        lang_code: string, ietf format (will be converted)
        version: string (e.g. 0.10.3)

    Returns:
        string: absolute (local) filepath to the requested metadata file.
    """
    lang_code = lcode_to_ietf(lang_code)
    metadata_filename = "%s_metadata.json" % lang_code

    return os.path.join(get_lp_build_dir(lang_code, version=version), metadata_filename)
コード例 #20
0
ファイル: __init__.py プロジェクト: 2flcastro/ka-lite-central
def get_language_pack_metadata_filepath(lang_code, version=SHORTVERSION):
    """Returns the location on disk of the metadata associated with a to-be-built language pack.

    Args:
        lang_code: string, ietf format (will be converted)
        version: string (e.g. 0.10.3)

    Returns:
        string: absolute (local) filepath to the requested metadata file.
    """
    lang_code = lcode_to_ietf(lang_code)
    metadata_filename = "%s_metadata.json" % lang_code

    return os.path.join(get_lp_build_dir(lang_code, version=version), metadata_filename)
コード例 #21
0
def get_language_pack(lang_code, software_version, callback):
    """Download language pack for specified language"""

    lang_code = lcode_to_ietf(lang_code)
    logging.info("Retrieving language pack: %s" % lang_code)
    request_url = get_language_pack_url(lang_code, software_version)
    logging.debug("Downloading zip from %s" % request_url)

    # aron: hack, download_file uses urllib.urlretrieve, which doesn't
    # return a status code. So before we make the full request, we
    # check first if the said lang pack url exists. If not, error out.
    if requests.head(request_url).status_code == 404:
        raise requests.exceptions.HTTPError("Language pack %s not found. Please double check that it exists." % lang_code)

    path, response = download_file(request_url, callback=callback_percent_proxy(callback))
    return path
コード例 #22
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError(
                "This must only be run on the distributed server.")

        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")

        #
        ensure_dir(settings.CONTENT_ROOT)

        # Get list of videos
        lang_code = lcode_to_ietf(options["lang_code"])
        video_map = get_dubbed_video_map(lang_code) or {}
        video_ids = options["video_ids"].split(
            ",") if options["video_ids"] else None
        video_ids = video_ids or ([
            vid["id"] for vid in get_topic_videos(topic_id=options["topic_id"])
        ] if options["topic_id"] else None)
        video_ids = video_ids or video_map.keys()

        # Download the videos
        for video_id in video_ids:
            if video_id in video_map:
                youtube_id = video_map[video_id]

            elif video_id in video_map.values():
                # Perhaps they sent in a youtube ID?  We can handle that!
                youtube_id = video_id
            else:
                logging.error("No mapping for video_id=%s; skipping" %
                              video_id)
                continue

            try:
                scrape_video(youtube_id=youtube_id,
                             format=options["format"],
                             force=options["force"])
                #scrape_thumbnail(youtube_id=youtube_id)
                logging.info(
                    "Access video %s at %s" %
                    (youtube_id, get_node_cache("Video")[video_id][0]["path"]))
            except Exception as e:
                logging.error("Failed to download video %s: %s" %
                              (youtube_id, e))

        logging.info("Process complete.")
コード例 #23
0
ファイル: api_views.py プロジェクト: theaverageguy/ka-lite
def start_languagepack_download(request):
    if not request.method == 'POST':
        raise Exception(_("Must call API endpoint with POST verb."))

    data = json.loads(
        request.raw_post_data
    )  # Django has some weird post processing into request.POST, so use .body
    lang_code = lcode_to_ietf(data['lang'])

    force_job('languagepackdownload',
              _("Language pack download"),
              lang_code=lang_code,
              locale=request.language)

    return JsonResponseMessageSuccess(
        _("Successfully started language pack download for %(lang_name)s.") %
        {"lang_name": get_language_name(lang_code)})
コード例 #24
0
def get_language_pack(lang_code, software_version, callback):
    """Download language pack for specified language"""

    lang_code = lcode_to_ietf(lang_code)
    logging.info("Retrieving language pack: %s" % lang_code)
    request_url = get_language_pack_url(lang_code, software_version)
    logging.debug("Downloading zip from %s" % request_url)

    # aron: hack, download_file uses urllib.urlretrieve, which doesn't
    # return a status code. So before we make the full request, we
    # check first if the said lang pack url exists. If not, error out.
    if requests.head(request_url).status_code == 404:
        raise requests.exceptions.HTTPError(
            "Language pack %s not found. Please double check that it exists." %
            lang_code)

    path, response = download_file(request_url,
                                   callback=callback_percent_proxy(callback))
    return path
コード例 #25
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on distributed servers server.")

        lang_code = lcode_to_ietf(options["lang_code"])
        lang_name = get_language_name(lang_code)
        software_version = options["software_version"]
        logging.info("Downloading language pack for lang_name=%s, software_version=%s" % (lang_name, software_version))

        # Download the language pack
        try:
            if options['file']:
                self.start(_("Using local language pack '%(filepath)s'") % {"filepath": options['file']})
                zip_filepath = options['file']
            else:
                self.start(_("Downloading language pack '%(lang_code)s'") % {"lang_code": lang_code})
                zip_filepath = get_language_pack(lang_code, software_version, callback=self.cb)

            # Unpack into locale directory
            self.next_stage(_("Unpacking language pack '%(lang_code)s'") % {"lang_code": lang_code})
            unpack_language(lang_code, zip_filepath=zip_filepath)

            #
            self.next_stage(_("Creating static files for language pack '%(lang_code)s'") % {"lang_code": lang_code})
            update_jsi18n_file(lang_code)


            self.next_stage(_("Moving files to their appropriate local disk locations."))
            move_dubbed_video_map(lang_code)
            move_exercises(lang_code)
            move_srts(lang_code)
            move_video_sizes_file(lang_code)

            self.next_stage()
            call_command("collectstatic", interactive=False)

            self.next_stage(_("Invalidate caches"))
            caching.invalidate_all_caches()

            self.complete(_("Finished processing language pack %(lang_name)s.") % {"lang_name": get_language_name(lang_code)})
        except Exception as e:
            self.cancel(stage_status="error", notes=_("Error: %(error_msg)s") % {"error_msg": unicode(e)})
            raise
コード例 #26
0
    def handle(self, *args, **options):
        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")


        lang_code = lcode_to_ietf(options["lang_code"])
        if lang_code not in AVAILABLE_EXERCISE_LANGUAGE_CODES:
            logging.info("No exercises available for language %s" % lang_code)

        else:
            # Get list of exercises
            exercise_ids = options["exercise_ids"].split(",") if options["exercise_ids"] else None
            exercise_ids = exercise_ids or ([ex["id"] for ex in get_topic_exercises(topic_id=options["topic_id"])] if options["topic_id"] else None)
            exercise_ids = exercise_ids or get_node_cache("Exercise").keys()

            # Download the exercises
            for exercise_id in exercise_ids:
                scrape_exercise(exercise_id=exercise_id, lang_code=lang_code, force=options["force"])

        logging.info("Process complete.")
コード例 #27
0
def clear_subtitles_cache(lang_codes=None, locale_root=LOCALE_ROOT):
    """
    Language codes will be converted to django format (e.g. en_US)
    """
    lang_codes = lang_codes or get_langs_with_subtitles()
    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)

        # Clear the status file
        lm_file = get_lang_map_filepath(lang_code)
        download_status = softload_json(lm_file, raises=True)
        for key in download_status:
            download_status[key] = {u'downloaded': False, u'last_success': u'', u'last_attempt': u'', u'api_response': u''}
        with open(lm_file, "w") as fp:
            json.dump(download_status, fp)

        # Delete all srt files
        srt_path = get_srt_path(lang_code)
        if os.path.exists(srt_path):
            shutil.rmtree(srt_path)
コード例 #28
0
def validate_language_map(lang_codes):
    """
    This function will tell you any blockers that you'll hit while
    running this command.

    All srt languages must exist in the language map; missing languages
    will cause errors during command running (which can be long).
    This function avoids that problem by doing the above consistency check.
    """
    lang_codes = lang_codes or get_all_prepped_lang_codes()
    missing_langs = []
    for lang_code in lang_codes:
        try:
            get_language_name(lcode_to_ietf(lang_code), error_on_missing=True)
        except LanguageNotFoundError:
            missing_langs.append(lang_code)

    if missing_langs:
        logging.warn("Please add the following language codes to %s:\n\t%s" % (
            settings.LANG_LOOKUP_FILEPATH, missing_langs,
        ))
コード例 #29
0
def scrape_exercise(exercise_id, lang_code, force=False):
    ietf_lang_code = lcode_to_ietf(lang_code)

    exercise_dest_filepath = get_exercise_filepath(exercise_id, lang_code=lang_code)
    exercise_localized_root = os.path.dirname(exercise_dest_filepath)

    if os.path.exists(exercise_dest_filepath) and not force:
        return

    exercise_url = "https://es.khanacademy.org/khan-exercises/exercises/%s.html?lang=%s" % (exercise_id, ietf_lang_code)
    logging.info("Retrieving exercise %s from %s" % (exercise_id, exercise_url))

    try:
        ensure_dir(exercise_localized_root)

        resp = requests.get(exercise_url)
        resp.raise_for_status()
        with open(exercise_dest_filepath, "wb") as fp:
            fp.write(resp.content)
    except Exception as e:
        logging.error("Failed to download %s: %s" % (exercise_url, e))
コード例 #30
0
def validate_language_map(lang_codes):
    """
    This function will tell you any blockers that you'll hit while
    running this command.

    All srt languages must exist in the language map; missing languages
    will cause errors during command running (which can be long).
    This function avoids that problem by doing the above consistency check.
    """
    lang_codes = lang_codes or get_all_prepped_lang_codes()
    missing_langs = []
    for lang_code in lang_codes:
        try:
            get_language_name(lcode_to_ietf(lang_code), error_on_missing=True)
        except LanguageNotFoundError:
            missing_langs.append(lang_code)

    if missing_langs:
        logging.warn("Please add the following language codes to %s:\n\t%s" % (
            settings.LANG_LOOKUP_FILEPATH,
            missing_langs,
        ))
コード例 #31
0
def download_srt_from_3rd_party(lang_codes=None, **kwargs):
    """Download subtitles specified by command line args"""

    lang_codes = lang_codes or get_all_prepped_lang_codes()
    bad_languages = {}

    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)
        lang_code = get_supported_language_map(lang_code)['amara']

        try:
            lang_map_filepath = get_lang_map_filepath(lang_code)
            if not os.path.exists(lang_map_filepath):
                videos = {}  # happens if an unknown set for subtitles.
            else:
                with open(lang_map_filepath, "r") as fp:
                    videos = json.load(fp)
        except Exception as e:
            error_msg = "Error in subtitles metadata file for %s: %s" % (
                lang_code, e)
            logging.error(error_msg)
            bad_languages[lang_code] = error_msg
            continue

        try:
            download_if_criteria_met(videos, lang_code=lang_code, **kwargs)
        except Exception as e:
            error_msg = "Error downloading subtitles for %s: %s" % (lang_code,
                                                                    e)
            logging.error(error_msg)
            bad_languages[lang_code] = error_msg
            continue

    # now report final results
    if bad_languages:
        outstr = "Failed to download subtitles for the following languages: %s" % (
            bad_languages.keys())
        outstr += "\n" + str(bad_languages)
        logging.error(outstr)
コード例 #32
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the distributed server.")

        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")

        #
        ensure_dir(settings.CONTENT_ROOT)

        # Get list of videos
        lang_code = lcode_to_ietf(options["lang_code"])
        video_map = get_dubbed_video_map(lang_code) or {}
        video_ids = options["video_ids"].split(",") if options["video_ids"] else None
        video_ids = video_ids or (
            [vid["id"] for vid in get_topic_videos(topic_id=options["topic_id"])] if options["topic_id"] else None
        )
        video_ids = video_ids or video_map.keys()

        # Download the videos
        for video_id in video_ids:
            if video_id in video_map:
                youtube_id = video_map[video_id]

            elif video_id in video_map.values():
                # Perhaps they sent in a youtube ID?  We can handle that!
                youtube_id = video_id
            else:
                logging.error("No mapping for video_id=%s; skipping" % video_id)
                continue

            try:
                scrape_video(youtube_id=youtube_id, format=options["format"], force=options["force"])
                # scrape_thumbnail(youtube_id=youtube_id)
                logging.info("Access video %s at %s" % (youtube_id, get_node_cache("Video")[video_id][0]["path"]))
            except Exception as e:
                logging.error("Failed to download video %s: %s" % (youtube_id, e))

        logging.info("Process complete.")
コード例 #33
0
def download_srt_from_3rd_party(lang_codes=None, **kwargs):
    """Download subtitles specified by command line args"""

    lang_codes = lang_codes or get_all_prepped_lang_codes()
    bad_languages = {}

    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)
        lang_code = get_supported_language_map(lang_code)['amara']

        try:
            lang_map_filepath = get_lang_map_filepath(lang_code)
            if not os.path.exists(lang_map_filepath):
                videos = {}  # happens if an unknown set for subtitles.
            else:
                with open(lang_map_filepath, "r") as fp:
                    videos = json.load(fp)
        except Exception as e:
            error_msg = "Error in subtitles metadata file for %s: %s" % (lang_code, e)
            logging.error(error_msg)
            bad_languages[lang_code] = error_msg
            continue

        try:
            download_if_criteria_met(videos, lang_code=lang_code, **kwargs)
        except Exception as e:
            error_msg = "Error downloading subtitles for %s: %s" % (lang_code, e)
            logging.error(error_msg)
            bad_languages[lang_code] = error_msg
            continue

    # now report final results
    if bad_languages:
        outstr = "Failed to download subtitles for the following languages: %s" % (bad_languages.keys())
        outstr += "\n" + str(bad_languages)
        logging.error(outstr)
コード例 #34
0
def scrape_exercise(exercise_id, lang_code, force=False):
    ietf_lang_code = lcode_to_ietf(lang_code)

    exercise_dest_filepath = get_exercise_filepath(exercise_id,
                                                   lang_code=lang_code)
    exercise_localized_root = os.path.dirname(exercise_dest_filepath)

    if os.path.exists(exercise_dest_filepath) and not force:
        return

    exercise_url = "https://es.khanacademy.org/khan-exercises/exercises/%s.html?lang=%s" % (
        exercise_id, ietf_lang_code)
    logging.info("Retrieving exercise %s from %s" %
                 (exercise_id, exercise_url))

    try:
        ensure_dir(exercise_localized_root)

        resp = requests.get(exercise_url)
        resp.raise_for_status()
        with open(exercise_dest_filepath, "wb") as fp:
            fp.write(resp.content)
    except Exception as e:
        logging.error("Failed to download %s: %s" % (exercise_url, e))
コード例 #35
0
def update_language_srt_map(map_file=SRTS_JSON_FILEPATH):
    """
    Translate the srts_remote_availability dictionary into language specific files
    that can be used by the cache_subtitles command.

    Note: srt map deals with amara, so uses ietf codes (e.g. en-us)
    """
    # Load the current download status
    api_info_map = softload_json(map_file, logger=logging.warn)

    # Next we want to iterate through those and create a big srt dictionary organized by language code
    remote_availability_map = {}
    for youtube_id, data in api_info_map.items():
        languages = data.get("language_codes", [])
        for lang_code in languages:
            lang_code = lcode_to_ietf(lang_code)
            if not lang_code in remote_availability_map:
                #logging.info("Creating language section '%s'" % lang_code)
                remote_availability_map[lang_code] = {}
            # This entry will be valid if it's new, otherwise it will be overwitten later
            remote_availability_map[lang_code][youtube_id] = {
                "downloaded": False,
                "api_response": "",
                "last_attempt": "",
                "last_success": "",
            }

    # Finally we need to iterate through that dictionary and create individual files for each language code
    for lang_code, new_data in remote_availability_map.items():

        # Try to open previous language file
        lang_map_filepath = get_lang_map_filepath(lang_code)
        if not os.path.exists(lang_map_filepath):
            lang_map = {}
        else:
            lang_map = softload_json(lang_map_filepath, logger=logging.error)

        # First, check to see if it's empty (e.g. no subtitles available for any videos)
        if not new_data:
            logging.info("Subtitle support for %s has been terminated; removing." % lang_code)
            if os.path.exists(lang_map_filepath):
                os.remove(lang_map_filepath)
            continue

        # Compare how many empty entries you are adding and add them to master map
        old_yt_ids = set(new_data.keys())
        new_yt_ids = set(lang_map.keys())
        yt_ids_to_add = set(new_data.keys()) - set(lang_map.keys())
        yt_ids_to_delete = set(lang_map.keys()) - set(new_data.keys())

        if yt_ids_to_add:
            logging.info("Adding %d new YouTube IDs to language (%s)" % (len(yt_ids_to_add), lang_code))
            for yt_id in yt_ids_to_add:
                lang_map[yt_id] = new_data.get(yt_id)

        if yt_ids_to_delete:
            logging.info("Deleting %d old YouTube IDs from language (%s) because they are no longer supported." % (len(yt_ids_to_delete), lang_code))
            for yt_id in yt_ids_to_delete:
                lang_map.pop(yt_id, None)

        # Write the new file to the correct location
        logging.debug("Writing %s" % lang_map_filepath)
        ensure_dir(os.path.dirname(lang_map_filepath))
        with open(lang_map_filepath, 'w') as outfile:
            json.dump(lang_map, outfile)

        # Update the big mapping with the most accurate numbers
        remote_availability_map[lang_code].update(lang_map)

    # Finally, remove any files not found in the current map at all.
    if lang_map_filepath:
        for filename in os.listdir(os.path.dirname(lang_map_filepath)):
            lang_code = lang_code = filename.split("_")[0]
            if not lang_code in remote_availability_map:
                file_to_remove = get_lang_map_filepath(lang_code)
                logging.info("Subtitle support for %s has been terminated; removing." % lang_code)
                if os.path.exists(file_to_remove):
                    os.remove(file_to_remove)
                else:
                    logging.warn("Subtitles metadata for %s not found; skipping deletion of non-existent file %s." % (lang_code, file_to_remove))

    return remote_availability_map
コード例 #36
0
def update_language_srt_map(map_file=SRTS_JSON_FILEPATH):
    """
    Translate the srts_remote_availability dictionary into language specific files
    that can be used by the cache_subtitles command.

    Note: srt map deals with amara, so uses ietf codes (e.g. en-us)
    """
    # Load the current download status
    api_info_map = softload_json(map_file, logger=logging.warn)

    # Next we want to iterate through those and create a big srt dictionary organized by language code
    remote_availability_map = {}
    for youtube_id, data in api_info_map.items():
        languages = data.get("language_codes", [])
        for lang_code in languages:
            lang_code = lcode_to_ietf(lang_code)
            if not lang_code in remote_availability_map:
                #logging.info("Creating language section '%s'" % lang_code)
                remote_availability_map[lang_code] = {}
            # This entry will be valid if it's new, otherwise it will be overwitten later
            remote_availability_map[lang_code][youtube_id] = {
                "downloaded": False,
                "api_response": "",
                "last_attempt": "",
                "last_success": "",
            }

    # Finally we need to iterate through that dictionary and create individual files for each language code
    for lang_code, new_data in remote_availability_map.items():

        # Try to open previous language file
        lang_map_filepath = get_lang_map_filepath(lang_code)
        if not os.path.exists(lang_map_filepath):
            lang_map = {}
        else:
            lang_map = softload_json(lang_map_filepath, logger=logging.error)

        # First, check to see if it's empty (e.g. no subtitles available for any videos)
        if not new_data:
            logging.info(
                "Subtitle support for %s has been terminated; removing." %
                lang_code)
            if os.path.exists(lang_map_filepath):
                os.remove(lang_map_filepath)
            continue

        # Compare how many empty entries you are adding and add them to master map
        old_yt_ids = set(new_data.keys())
        new_yt_ids = set(lang_map.keys())
        yt_ids_to_add = set(new_data.keys()) - set(lang_map.keys())
        yt_ids_to_delete = set(lang_map.keys()) - set(new_data.keys())

        if yt_ids_to_add:
            logging.info("Adding %d new YouTube IDs to language (%s)" %
                         (len(yt_ids_to_add), lang_code))
            for yt_id in yt_ids_to_add:
                lang_map[yt_id] = new_data.get(yt_id)

        if yt_ids_to_delete:
            logging.info(
                "Deleting %d old YouTube IDs from language (%s) because they are no longer supported."
                % (len(yt_ids_to_delete), lang_code))
            for yt_id in yt_ids_to_delete:
                lang_map.pop(yt_id, None)

        # Write the new file to the correct location
        logging.debug("Writing %s" % lang_map_filepath)
        ensure_dir(os.path.dirname(lang_map_filepath))
        with open(lang_map_filepath, 'w') as outfile:
            json.dump(lang_map, outfile)

        # Update the big mapping with the most accurate numbers
        remote_availability_map[lang_code].update(lang_map)

    # Finally, remove any files not found in the current map at all.
    if lang_map_filepath:
        for filename in os.listdir(os.path.dirname(lang_map_filepath)):
            lang_code = lang_code = filename.split("_")[0]
            if not lang_code in remote_availability_map:
                file_to_remove = get_lang_map_filepath(lang_code)
                logging.info(
                    "Subtitle support for %s has been terminated; removing." %
                    lang_code)
                if os.path.exists(file_to_remove):
                    os.remove(file_to_remove)
                else:
                    logging.warn(
                        "Subtitles metadata for %s not found; skipping deletion of non-existent file %s."
                        % (lang_code, file_to_remove))

    return remote_availability_map