def make_language_pack(lang, version, sublangargs, filename, no_assessment_items, no_subtitles, no_assessment_resources):
    node_data, subtitle_data, interface_catalog, content_catalog = retrieve_language_resources(version, sublangargs, no_subtitles)

    subtitles, subtitle_paths = subtitle_data.keys(), subtitle_data.values()

    node_data = translate_nodes(node_data, content_catalog)
    node_data = list(node_data)
    node_data, dubbed_video_count = apply_dubbed_video_map(node_data, subtitles, sublangargs["video_lang"])

    html_exercise_ids, assessment_exercise_ids, node_data = separate_exercise_types(node_data)
    html_exercise_path, translated_html_exercise_ids = retrieve_html_exercises(html_exercise_ids, lang)

    # now include only the assessment item resources that we need
    all_assessment_data, all_assessment_files = retrieve_all_assessment_item_data(
        no_item_data=no_assessment_items,
        no_item_resources=no_assessment_resources
    )

    assessment_data = list(translate_assessment_item_text(all_assessment_data, content_catalog)) if lang != "en" else all_assessment_data

    node_data = remove_untranslated_exercises(node_data, translated_html_exercise_ids, assessment_data) if lang != "en" else node_data

    pack_metadata = generate_kalite_language_pack_metadata(lang, version, interface_catalog, content_catalog, subtitles,
                                                           dubbed_video_count)

    bundle_language_pack(str(filename), node_data, interface_catalog, interface_catalog,
                         pack_metadata, assessment_data, all_assessment_files, subtitle_paths, html_exercise_path)
Example #2
0
def retrieve_assessment_item_data(assessment_item, lang=None, force=False, no_item_data=False, no_item_resources=False, content_catalog=None) -> (dict, [str]):
    """
    Retrieve assessment item data and images for a single assessment item.
    :param assessment_item: id of assessment item
    :param lang: language to retrieve data in
    :param force: refetch assessment item and images even if it exists on disk
    :return: tuple of dict of assessment item data and list of paths to files
    """
    if no_item_data:
        return {}, []

    if lang:
        url = "http://www.khanacademy.org/api/v1/assessment_items/{assessment_item}?lang={lang}".format(lang=lang, assessment_item=assessment_item)
        filename = "assessment_items/{assessment_item}_{lang}.json".format(lang=lang, assessment_item=assessment_item)
    else:
        url = "http://www.khanacademy.org/api/v1/assessment_items/{assessment_item}"
        filename = "assessment_items/{assessment_item}.json"
    try:
        url = url.format(assessment_item=assessment_item)
        filename = filename.format(assessment_item=assessment_item)
        path = download_assessment_item_data(url, filename=filename, lang=lang, force=force)
    except requests.RequestException:
        logging.error("Download failure for assessment item: {assessment_item}".format(assessment_item=assessment_item))
        raise

    with open(path, "r") as f:
        item_data = json.load(f)

    # TEMP HACK: translate the item text here before URLs are localized, because otherwise, later, Crowdin strings no longer match
    if lang != "en" and content_catalog is not None:
        item_data = list(translate_assessment_item_text([item_data], content_catalog))
        if item_data:
            item_data = item_data[0]
        else:  # if no translation, return empty assessment_item
            return {}, []

    image_urls = find_all_image_urls(item_data)
    graphie_urls = find_all_graphie_urls(item_data)
    urls = list(itertools.chain(image_urls, graphie_urls))

    def _download_image_urls(url):
        filename = MANUAL_IMAGE_URL_TO_FILENAME_MAPPING.get(url, os.path.basename(url))
        filepath = _get_subpath_from_filename(filename)
        return download_and_cache_file(url, filename=filepath)

    file_paths = [] if no_item_resources else list(map(_download_image_urls, urls))

    item_data = localize_image_urls(item_data)
    item_data = localize_content_links(item_data)
    item_data = localize_graphie_urls(item_data)

    # Validate assessment item content.
    for k, v in ujson.loads(item_data["item_data"]).items():
        if k == "question":
            if not v.get("content"):
                logging.info("Found empty assessment content from KA's API {assessment_item}".format(assessment_item=assessment_item))
                return {}, []

    return item_data, file_paths
    def test_doesnt_returns_all_items(self):
        catalog = generate_catalog()

        sample_data = [
            {"id": "not_in_catalog", "item_data": '"wala ito sa catalog"'},
            {"id": "not_translated", "item_data": '"Heart failure"'},
            {"id": "translated", "item_data": '"Millions"'},
        ]

        translated = [node.get("id") for node in translate_assessment_item_text(sample_data, catalog)]

        assert "translated" in translated
        assert "not_in_catalog" in translated
        assert "not_translated" in translated
def make_language_pack(lang, version, sublangargs, filename, ka_domain,
                       no_assessment_items, no_subtitles,
                       no_assessment_resources, no_dubbed_videos):
    node_data, subtitle_data, interface_catalog, content_catalog = retrieve_language_resources(
        version, sublangargs, ka_domain, no_subtitles, no_dubbed_videos)

    subtitles, subtitle_paths = subtitle_data.keys(), subtitle_data.values()

    node_data = translate_nodes(node_data, content_catalog)
    node_data = list(node_data)
    node_data, dubbed_video_count = apply_dubbed_video_map(
        node_data, subtitles, sublangargs["video_lang"])

    html_exercise_ids, assessment_exercise_ids, node_data = separate_exercise_types(
        node_data)
    html_exercise_path, translated_html_exercise_ids = retrieve_html_exercises(
        html_exercise_ids, lang)

    # now include only the assessment item resources that we need
    all_assessment_data, all_assessment_files = retrieve_all_assessment_item_data(
        no_item_data=no_assessment_items,
        no_item_resources=no_assessment_resources,
        node_data=node_data,
        lang=lang,
    )
    all_assessment_data = list(
        remove_assessment_data_with_empty_widgets(all_assessment_data))
    node_data = remove_nonexistent_assessment_items_from_exercises(
        node_data, all_assessment_data)

    node_data = clean_node_data_items(node_data)
    assessment_data = list(
        translate_assessment_item_text(
            all_assessment_data,
            content_catalog)) if lang != "en" else all_assessment_data

    node_data = remove_untranslated_exercises(
        node_data, translated_html_exercise_ids,
        assessment_data) if lang != "en" else node_data

    pack_metadata = generate_kalite_language_pack_metadata(
        lang, version, interface_catalog, content_catalog, subtitles,
        dubbed_video_count)

    bundle_language_pack(str(filename), node_data, interface_catalog,
                         interface_catalog, pack_metadata, assessment_data,
                         all_assessment_files, subtitle_paths,
                         html_exercise_path)
Example #5
0
def make_language_pack(lang, version, sublangargs, filename, ka_domain,
                       no_assessment_items, no_subtitles,
                       no_assessment_resources, no_dubbed_videos):
    node_data, subtitle_data, content_catalog = retrieve_language_resources(
        version, sublangargs, ka_domain, no_subtitles, no_dubbed_videos)

    subtitles, subtitle_paths = subtitle_data.keys(), subtitle_data.values()

    node_data = translate_nodes(node_data, content_catalog)
    node_data = list(node_data)
    node_data, dubbed_video_count = apply_dubbed_video_map(
        node_data, subtitles, sublangargs["video_lang"])

    html_exercise_ids, assessment_exercise_ids, node_data = separate_exercise_types(
        node_data)
    html_exercise_path, translated_html_exercise_ids = retrieve_html_exercises(
        html_exercise_ids, lang)

    # now include only the assessment item resources that we need
    all_assessment_data, all_assessment_files = retrieve_all_assessment_item_data(
        no_item_data=no_assessment_items,
        no_item_resources=no_assessment_resources,
        node_data=node_data,
        lang=lang,
    )
    all_assessment_data = list(
        remove_assessment_data_with_empty_widgets(all_assessment_data))
    node_data = remove_nonexistent_assessment_items_from_exercises(
        node_data, all_assessment_data)

    node_data = clean_node_data_items(node_data)
    assessment_data = list(
        translate_assessment_item_text(
            all_assessment_data,
            content_catalog)) if lang != "en" else all_assessment_data

    node_data = remove_untranslated_exercises(
        node_data, translated_html_exercise_ids,
        assessment_data) if lang != "en" else node_data
    node_data = list(node_data)
    node_data = sorted(node_data, key=lambda x: x.get('sort_order'))

    with open('node_data_{0}.pickle'.format(lang), 'wb') as handle:
        pickle.dump(node_data, handle)

    with open('assessment_data_{0}.pickle'.format(lang), 'wb') as handle:
        pickle.dump(assessment_data, handle)
Example #6
0
    def test_doesnt_return_untranslated_items(self):
        catalog = generate_catalog()

        sample_data = {
            "not_in_catalog": {
                "item_data": '"wala ito sa catalog"'
            },
            "not_translated": {
                "item_data": '"Heart failure"'
            },
            "translated": {
                "item_data": '"Millions"'
            }
        }

        translated = [id for id, _ in translate_assessment_item_text(sample_data, catalog)]

        assert "translated" in translated
        assert "not_in_catalog" not in translated
        assert "not_translated" not in translated
Example #7
0
    def test_doesnt_returns_all_items(self):
        catalog = generate_catalog()

        sample_data = [{
            "id": "not_in_catalog",
            "item_data": '"wala ito sa catalog"'
        }, {
            "id": "not_translated",
            "item_data": '"Heart failure"'
        }, {
            "id": "translated",
            "item_data": '"Millions"'
        }]

        translated = [
            node.get("id")
            for node in translate_assessment_item_text(sample_data, catalog)
        ]

        assert "translated" in translated
        assert "not_in_catalog" in translated
        assert "not_translated" in translated