def addin_dubbed_video_mappings(node_data, lang=en_lang_code): # Get the dubbed videos from the spreadsheet and substitute them # for the video, and topic attributes of the returned data struct. build_path = os.path.join(os.getcwd(), "build") # Create a dubbed_video_mappings.json, at build folder. if os.path.exists(os.path.join(build_path, "dubbed_video_mappings.json")): logging.info("Dubbed videos json already exist at %s" % (DUBBED_VIDEOS_MAPPING_FILEPATH)) else: main() # Get the list of video ids from dubbed video mappings lang_code = get_lang_name(lang).lower() dubbed_videos_path = os.path.join(build_path, "dubbed_video_mappings.json") with open(dubbed_videos_path, "r") as f: dubbed_videos_load = ujson.load(f) dubbed_videos_list = dubbed_videos_load.get(lang_code) # If dubbed_videos_list is None It means that the language code is not available in dubbed video mappings. if not dubbed_videos_list: return node_data # Get the current youtube_ids, and topic_paths from the khan api node data. youtube_ids = [] topic_paths = [] for node in node_data: node_kind = node.get("kind") if node_kind == NodeType.video: youtube_ids.append(node.get("youtube_id")) if node_kind == NodeType.topic: topic_paths.append(node.get("path")) en_nodes_path = os.path.join(build_path, "en_nodes.json") with open(en_nodes_path, "r") as f: en_node_load = ujson.load(f) en_node_list = [] # The en_nodes.json must be the same data structure to node_data variable from khan api. for node in en_node_load: node_kind = node.get("kind") if node_kind == NodeType.video: youtube_id = node["youtube_id"] if not youtube_id in youtube_ids: if youtube_id in dubbed_videos_list: node["youtube_id"] = dubbed_videos_list[youtube_id] node["translated_youtube_lang"] = lang en_node_list.append(node) youtube_ids.append(youtube_id) # Append all topics that's not in topic_paths list. if node_kind == NodeType.topic: if not node["path"] in topic_paths: en_node_list.append(node) topic_paths.append(node["path"]) node_data += en_node_list return node_data
def add_dubbed_video_mappings(node_data, lang=EN_LANG_CODE): # Get the dubbed videos from the spreadsheet and substitute them # for the video, and topic attributes of the returned data struct. # Create a dubbed_video_mappings.json, at build folder. build_path = os.path.join(os.getcwd(), "build") if os.path.exists(os.path.join(build_path, "dubbed_video_mappings.json")): logging.info('Dubbed videos json already exist at %s' % (DUBBED_VIDEOS_MAPPING_FILEPATH)) else: main() # Get the list of video ids from dubbed video mappings dubbed_videos_path = os.path.join(build_path, "dubbed_video_mappings.json") with open(dubbed_videos_path, 'r') as f: dubbed_videos_load = ujson.load(f) """ Dubbed video mappings may use the ka_name, lang_name or native_name as reference to get a dictionary of language videos. """ lang_name = get_lang_ka_name(lang).lower() dubbed_videos_list = dubbed_videos_load.get(lang_name) if not dubbed_videos_list: lang_name = get_lang_name(lang).lower() dubbed_videos_list = dubbed_videos_load.get(lang_name) # If dubbed_videos_list is None It means that the language code is not available in dubbed video mappings. if not dubbed_videos_list: # Look up for the native name if the get_lang_name is null. lang_native_name = get_lang_native_name(lang).lower() dubbed_videos_list = dubbed_videos_load.get(lang_native_name) if not dubbed_videos_list: return node_data youtube_ids = [] topic_path_list = [] for node in node_data: node_kind = node.get("kind") if node_kind == NodeType.video: if node["translated_youtube_lang"] == lang: youtube_ids.append(node.get("youtube_id")) if node_kind == NodeType.topic: topic_path_list.append(node.get("path")) # Generate and cache `en_nodes.json` for dubbed video mappings. url = API_URL.format(projection=json.dumps(PROJECTION_KEYS), lang=EN_LANG_CODE, ka_domain=KA_DOMAIN) download_and_clean_kalite_data(url, lang=EN_LANG_CODE, ignorecache=False, filename="en_nodes.json") en_nodes_path = os.path.join(build_path, "en_nodes.json") with open(en_nodes_path, 'r') as f: en_node_load = ujson.load(f) translated_node_list = [] # The en_nodes.json must be the same data structure to node_data variable from khan api. for node in en_node_load: node_kind = node.get("kind") # Append all topics that's not in topic path list. if (node_kind == NodeType.topic): if not node["path"] in topic_path_list: translated_node_list.append(node) topic_path_list.append(node["path"]) if (node_kind == NodeType.video): youtube_id = node["youtube_id"] if youtube_id not in youtube_ids: if youtube_id in dubbed_videos_list: node["youtube_id"] = dubbed_videos_list[youtube_id] node["translated_youtube_lang"] = lang translated_node_list.append(node) youtube_ids.append(youtube_id) # remove all video nodes who have a dubbed video associated with them node_data = [ node for node in node_data if node.get('youtube_id') not in dubbed_videos_list ] node_data += translated_node_list return node_data
def add_dubbed_video_mappings(node_data, lang=EN_LANG_CODE): # Get the dubbed videos from the spreadsheet and substitute them # for the video, and topic attributes of the returned data struct. # Create a dubbed_video_mappings.json, at build folder. build_path = os.path.join(os.getcwd(), "build") if os.path.exists(os.path.join(build_path, "dubbed_video_mappings.json")): logging.info("Dubbed videos json already exist at %s" % (DUBBED_VIDEOS_MAPPING_FILEPATH)) else: main() # Get the list of video ids from dubbed video mappings dubbed_videos_path = os.path.join(build_path, "dubbed_video_mappings.json") with open(dubbed_videos_path, "r") as f: dubbed_videos_load = ujson.load(f) """ Dubbed video mappings may use the ka_name, lang_name or native_name as reference to get a dictionary of language videos. """ lang_name = get_lang_ka_name(lang).lower() dubbed_videos_list = dubbed_videos_load.get(lang_name) if not dubbed_videos_list: lang_name = get_lang_name(lang).lower() dubbed_videos_list = dubbed_videos_load.get(lang_name) # If dubbed_videos_list is None It means that the language code is not available in dubbed video mappings. if not dubbed_videos_list: # Look up for the native name if the get_lang_name is null. lang_native_name = get_lang_native_name(lang).lower() dubbed_videos_list = dubbed_videos_load.get(lang_native_name) if not dubbed_videos_list: return node_data youtube_ids = [] topic_path_list = [] for node in node_data: node_kind = node.get("kind") if node_kind == NodeType.video: if node["translated_youtube_lang"] == lang: youtube_ids.append(node.get("youtube_id")) if node_kind == NodeType.topic: topic_path_list.append(node.get("path")) # Generate and cache `en_nodes.json` for dubbed video mappings. url = API_URL.format(projection=json.dumps(PROJECTION_KEYS), lang=EN_LANG_CODE, ka_domain=KA_DOMAIN) download_and_clean_kalite_data(url, lang=EN_LANG_CODE, ignorecache=False, filename="en_nodes.json") en_nodes_path = os.path.join(build_path, "en_nodes.json") with open(en_nodes_path, "r") as f: en_node_load = ujson.load(f) translated_node_list = [] # The en_nodes.json must be the same data structure to node_data variable from khan api. for node in en_node_load: node_kind = node.get("kind") # Append all topics that's not in topic path list. if node_kind == NodeType.topic: if not node["path"] in topic_path_list: translated_node_list.append(node) topic_path_list.append(node["path"]) if node_kind == NodeType.video: youtube_id = node["youtube_id"] if not youtube_id in youtube_ids: if youtube_id in dubbed_videos_list: node["youtube_id"] = dubbed_videos_list[youtube_id] node["translated_youtube_lang"] = lang translated_node_list.append(node) youtube_ids.append(youtube_id) node_data += translated_node_list return node_data