def get_khan_topic_tree(lang="en", curr_key=None):
    if lang == "sw":
        response = make_request(V2_API_URL.format(lang="swa",
                                                  projection=PROJECTION_KEYS),
                                timeout=120)
    else:
        response = make_request(V2_API_URL.format(lang=lang,
                                                  projection=PROJECTION_KEYS),
                                timeout=120)

    topic_tree = ujson.loads(response.content)
    # if name of lang is passed in, get language code
    if getlang_by_name(lang):
        lang = getlang_by_name(lang).primary_code

    if lang not in SUPPORTED_LANGS:
        global translations
        translations = retrieve_translations(lang_code=lang)

    # Flatten node_data
    flattened_tree = [
        node for node_list in topic_tree.values() for node in node_list
    ]

    # convert to dict with ids as keys
    tree_dict = {node["id"]: node for node in flattened_tree}

    return _recurse_create(tree_dict["x00000000"], tree_dict, lang=lang)
def generate_dubbed_video_mappings_from_csv():

    resp = make_request(KA_LITE_DUBBED_LIST, timeout=120)
    csv_data = resp.content.decode("utf-8")

    # This CSV file is in standard format: separated by ",", quoted by '"'
    reader = csv.reader(StringIO(csv_data))
    video_map = {}
    header_row = []

    # Loop through each row in the spreadsheet.
    for row in reader:
        # skip over the header rows
        if row[0].strip() in ["", "UPDATED:"]:
            continue

        elif row[0] == "SERIAL":
            # Read the header row.
            header_row = [
                v.lower() for v in row
            ]  # lcase all header row values (including language names)
            slug_idx = header_row.index("title id")
            english_idx = header_row.index("english")
            assert slug_idx != -1, "Video slug column header should be found."
            assert english_idx != -1, "English video column header should be found."

        else:
            # Rows 6 and beyond are data.
            assert len(row) == len(
                header_row), "Values line length equals headers line length"

            # Grab the slug and english video ID.
            video_slug = row[slug_idx]
            english_video_id = row[english_idx]
            assert english_video_id, "English Video ID should not be empty"
            assert video_slug, "Slug should not be empty"

            # English video is the first video ID column,
            #   and following columns (until the end) are other languages.
            # Loop through those columns and, if a video exists,
            #   add it to the dictionary.
            for idx in range(english_idx, len(row)):
                if not row[idx]:  # make sure there's a dubbed video
                    continue

                lang = header_row[idx]
                if lang not in video_map:  # add the first level if it doesn't exist
                    video_map[lang] = {}
                dubbed_youtube_id = row[idx]
                if english_video_id == dubbed_youtube_id and lang != "english":
                    print(
                        "Removing entry for (%s, %s): dubbed and english youtube ID are the same."
                        % (lang, english_video_id))
                else:
                    video_map[lang][english_video_id] = row[
                        idx]  # add the corresponding video id for the video, in this language.
    return video_map
Beispiel #3
0
 def get_assessment_items(self):
     items_list = []
     kalang = ASSESSMENT_LANGUAGE_MAPPING.get(self.lang, self.lang)
     for ai_id in self.assessment_items:
         item_url = ASSESSMENT_URL.format(assessment_item=ai_id, kalang=kalang)
         item = make_request(item_url).json()
         # check if assessment item is fully translated, before adding it to list
         if item["isFullyTranslated"]:
             ai = KhanAssessmentItem(item["id"], item["itemData"], self.source_url)
             items_list.append(ai)
     return items_list
def get_video_id_english_mappings():
    projection = json.dumps(
        {"videos": [OrderedDict([("youtubeId", 1), ("id", 1)])]})

    r = make_request(V2_API_URL.format(lang='en', projection=projection),
                     timeout=120)
    english_video_data = r.json()
    english_video_data = english_video_data["videos"]

    mapping = {n["id"]: n["youtubeId"] for n in english_video_data}

    return mapping
    def get_assessment_items(self):
        items_list = []
        lang = ASSESSMENT_LANGUAGE_MAPPING.get(self.lang, self.lang)
        for i in self.assessment_items:
            item_url = ASSESSMENT_URL.format(assessment_item=i["id"],
                                             lang=lang)
            item = make_request(item_url).json()
            # check if assessment item is fully translated, before adding it to list
            if item["is_fully_translated"]:
                items_list.append(
                    KhanAssessmentItem(item["id"], item["item_data"],
                                       self.source_url))

        return items_list
def retrieve_translations(lang_code, includes="*.po"):

    if lang_code in SUPPORTED_LANGS:
        return {}

    lang_code = CROWDIN_LANGUAGE_MAPPING.get(lang_code, lang_code)

    r = make_request(CROWDIN_URL.format(key=os.environ['KA_CROWDIN_SECRET_KEY'], lang_code=lang_code), timeout=180)

    with open('crowdin.zip', "wb") as f:
        for chunk in r.iter_content(1024):
            f.write(chunk)

    zip_extraction_path = tempfile.mkdtemp()

    with zipfile.ZipFile('crowdin.zip') as zf:
        zf.extractall(zip_extraction_path)

    all_filenames = glob.iglob(
        os.path.join(zip_extraction_path, "**"),
        recursive=True
    )
    filenames = fnmatch.filter(all_filenames, includes)

    # use the polib library, since it's much faster at concatenating
    # po files.  it doesn't have a dict interface though, so we'll
    # reread the file using babel.Catalog.
    with tempfile.NamedTemporaryFile() as f:
        main_pofile = polib.POFile(fpath=f.name)

        for filename in filenames:
            pofile = polib.pofile(filename)
            main_pofile.merge(pofile)

        for entry in main_pofile:
            entry.obsolete = False

        main_pofile.save()

    shutil.rmtree(zip_extraction_path)

    msgid_mapping = Catalog(main_pofile)

    return msgid_mapping
Beispiel #7
0
def get_khan_api_json(lang, update=False):
    """
    Get all data for language `lang` from the KA API at /api/v2/topics/topictree
    """
    filename = 'khan_academy_json_{}.json'.format(lang)
    filepath = os.path.join(KHAN_API_CACHE_DIR, filename)
    if os.path.exists(filepath) and not update:
        print('Loaded KA API json from cache', filepath)
        data = json.load(open(filepath))
    else:
        print('Downloading KA API json for lang =', lang)
        url = V2_API_URL.format(lang=lang, projection=PROJECTION_KEYS)
        LOGGER.debug('khan API url=' + url)
        response = make_request(url, timeout=120)
        data = response.json()
        if not os.path.exists(KHAN_API_CACHE_DIR):
            os.makedirs(KHAN_API_CACHE_DIR, exist_ok=True)
        json.dump(data, open(filepath, 'w'), ensure_ascii=False, indent=4)
    return data
def generate_common_core_mapping():

    resp = make_request(COMMON_CORE_SPREADSHEET, timeout=120)
    csv_data = resp.content.decode("utf-8")

    # This CSV file is in standard format: separated by ",", quoted by '"'
    reader = csv.reader(StringIO(csv_data))
    slug_standard_map = {}
    header_row = []

    # Loop through each row in the spreadsheet.
    for row in reader:

        if row[0] == "Grade":
            # Read the header row.
            header_row = [v.lower()
                          for v in row]  # lcase all header row values
            grade_idx = header_row.index("grade")
            common_core_idx = header_row.index("common core area")
            standard_idx = header_row.index("standard")
            skill_name_idx = header_row.index("name of skill on khan academy")
            link_idx = header_row.index("link to skill")
            description_idx = header_row.index("description")
            area_idx = header_row.index("area")
        else:
            # Grab CC standard and link to exercise
            standard_tag = row[standard_idx]
            link = row[link_idx]
            if not link or not standard_tag:
                continue

            # parse out slug from link and set standard tag
            slug = link.split("e/")[1]
            slug_standard_map[slug] = standard_tag

    return slug_standard_map