def test_known_codes(): lang_obj = languages.getlang('en') assert lang_obj is not None, 'English not found' assert lang_obj.name == "English", 'Wrong name' assert lang_obj.native_name == "English", 'Wrong native_name' lang_obj = languages.getlang('pt-BR') assert lang_obj is not None, 'Brazilian Portuguese not found' assert lang_obj.name == "Portuguese, Brazil", 'Wrong name' assert lang_obj.native_name == "Português (Brasil)", 'Wrong native_name' lang_obj = languages.getlang('zul') assert lang_obj is not None, 'Zulu not found' assert lang_obj.name == "Zulu", 'Wrong name' assert lang_obj.native_name == "isiZulu", 'Wrong native_name'
def _get_lang_native_name(code): try: lang_name = languages.getlang(code).native_name except AttributeError: logger.warning("Did not find language code {} in our le_utils.constants!".format(code)) lang_name = None return lang_name
def set_language(self, language): """ Set self.language to internal lang. repr. code from str or Language object. """ if isinstance(language, str): language_obj = languages.getlang(language) if language_obj: self.language = language_obj.code else: raise TypeError("Language code {} not found".format(language)) if isinstance(language, languages.Language): self.language = language.code
def get_lang_obj(self): if self.name != "": lang_code = self.code lang_name = self.name language_obj = getlang_by_name( lang_name) if not getlang(lang_name) else getlang(lang_name) if not language_obj: if UND_LANG[self.name]: self.set_value(UND_LANG[self.name]["name"], UND_LANG[self.name]["code"], UND_LANG[self.name]["native_name"]) return True else: self.set_value(language_obj.name, language_obj.code, language_obj.native_name) return True else: return False
def _get_lang_native_name(code): try: lang_name = languages.getlang(code).native_name except AttributeError: logger.warning( "Did not find language code {} in our le_utils.constants!". format(code)) lang_name = None return lang_name
def test_set_language(): sub1 = SubtitleFile('path', language='en') sub2 = SubtitleFile('path', language=languages.getlang('es')) assert isinstance(sub1.language, str), "Subtitles must be converted to Language class" assert isinstance(sub2.language, str), "Subtitles can be passed as Langauge models" assert sub1.language == 'en', "Subtitles must have a language" assert sub2.language == 'es', "Subtitles must have a language" pytest.raises(TypeError, SubtitleFile, 'path', language='notalanguage')
def getlang_patched(language): """A patched version of languages.getlang that tries to fallback to a closest match if not found.""" if languages.getlang(language): return language # Try matching on the prefix: e.g. zh-Hans --> zh first_part = language.split('-')[0] if languages.getlang(first_part): return first_part # See if pycountry can find this language and if so, match by language name # to resolve other inconsistencies. e.g. YouTube might use "zu" while # le_utils uses "zul". pyc_lang = pycountry.languages.get(alpha_2=first_part) if pyc_lang: return _LANGUAGE_NAME_LOOKUP.get(pyc_lang.name) return None
def linkAssignment(material): url = material["link"]["url"] session = requests.Session() session.headers[ "User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36" html = session.get(url).content soup = bs(html, "html.parser") script_files = [] css_files = [] for script in soup.find_all("script"): if script.attrs.get("src"): # if the tag has the attribute 'src' script_url = urljoin(url, script.attrs.get("src")) script_files.append(script_url) for css in soup.find_all("link"): if css.attrs.get("href"): css_url = urljoin(url, css.attrs.get("href")) css_files.append(css_url) with HTMLWriter('./myzipper.zip') as zipper: with open("index.html", "w", encoding="utf-8") as f: index_content = soup.prettify() zipper.write_index_contents(index_content) with open("javascript_files.js") as f: for js_file in script_files: script_path = zipper.write_url(js_file, "scripts.js", directory="src") script = "<script src='{}' type='text/javascript'></script>".format( script_path) with open("css_files.css", "w") as f: for css_file in css_files: print(css_file, file=f) css_path = zipper.write_url(css_file, "style.css", directory="styles") extra_head = "<link href='{}' rel='stylesheet'></link>".format( css_path) link_file = HTMLZipFile(path='./myzipper.zip') link_node = HTML5AppNode(source_id=material["link"]["url"], title=material["link"]["title"], license=get_license( licenses.CC_BY, copyright_holder='Copyright holder name'), language=getlang('en').id, derive_thumbnail=False, thumbnail=None, files=[link_file]) return link_node
def parse_lang_and_variant_from_kwargs(self, kwargs): """ Helper method to parse and validate the `lang` and `variant` options. Returns: (lang, variant), where `lang` uses internal repr. from le-utils and `variant` (str or None) identifies different channel version. """ if "lang" not in kwargs: raise ValueError('Khan Academy chef must be run with lang=<code>') lang = kwargs["lang"] assert getlang(lang), 'Language code ' + lang + ' not recognized' variant = kwargs.get("variant", None) return lang, variant
def get_channel_title(lang=None, variant=None): """ Return KA channel title for le-utils code `lang` and variant `variant`. """ if variant and (lang, variant) in CHANNEL_TITLE_LOOKUP: return CHANNEL_TITLE_LOOKUP[(lang, variant)] elif lang in CHANNEL_TITLE_LOOKUP: return CHANNEL_TITLE_LOOKUP[lang] else: lang_obj = getlang(lang) title = "Khan Academy ({})".format(lang_obj.first_native_name) return title
def get_channel_description(lang=None, variant=None): """ Find KA channel description for le-utils code `lang` and variant `variant`. """ if variant and (lang, variant) in CHANNEL_DESCRIPTION_LOOKUP: return CHANNEL_DESCRIPTION_LOOKUP[(lang, variant)] elif lang in CHANNEL_DESCRIPTION_LOOKUP: return CHANNEL_DESCRIPTION_LOOKUP[lang] else: lang_obj = getlang(lang) description = "Khan Academy content for {}.".format(lang_obj.name) return description
def __init__(self, youtube_id, language=None, **kwargs): self.youtube_url = 'http://www.youtube.com/watch?v={}'.format(youtube_id) if isinstance(language, languages.Language): # for backward compatibility language = language.code self.youtube_language = language # youtube language code (can differ from internal repr.) language_obj = languages.getlang(language) # lookup `language` using internal representation # if language_obj not None, we know `language` is a valid language_id in the internal repr. if language_obj is None: # if `language` not found using internal repr. language_obj = languages.getlang_by_alpha2(language) # try to match by two-letter ISO code language = language_obj.code # update `language` argument from internal repr. language_id super(YouTubeSubtitleFile, self).__init__(language=language, **kwargs) assert self.language, "Subtitles must have a language"
def should_include_subtitle(youtube_language, target_lang): """ Determine whether subtitles with language code `youtube_language` available for a YouTube video should be imported as part of the Khan Academy chef run for language `target_lang` (internal language code). """ lang_obj = get_language_with_alpha2_fallback(youtube_language) target_lang_obj = getlang(target_lang) if lang_obj.primary_code == target_lang_obj.primary_code: return True # accept if the same language code even if different locale else: return False
def _get_language_with_alpha2_fallback(language_code): """ Lookup language code `language_code` (string) in the internal language codes, and if that fails, try to map map `language_code` to the internal represention using the `getlang_by_alpha2` helper method. Returns either a le-utils Language object or None if both lookups fail. """ # 1. try to lookup `language` using internal representation language_obj = languages.getlang(language_code) # if language_obj not None, we know `language` is a valid language_id in the internal repr. if language_obj is None: # 2. try to match by two-letter ISO code language_obj = languages.getlang_by_alpha2(language_code) return language_obj
def get_channel(self, **kwargs): lang_code = kwargs.get("lang", "en") lang = getlang(lang_code) channel = nodes.ChannelNode( source_id="KA ({0})".format(lang_code), source_domain="khanacademy.org-test", title="Khan Academy ({0}) - TEST".format(lang.native_name), description='Khan Academy content for {}.'.format(lang.name), thumbnail= "https://upload.wikimedia.org/wikipedia/commons/1/15/Khan_Academy_Logo_Old_version_2015.jpg", ) return channel
def transform_vertical_to_exercise(vertical, parent_title=None): """ Parse an Edraaak `test_vertical' or `knowledge_check_vertical` to exercise. """ if 'children' not in vertical: return None description = '' # Extract an optional description from the first html node first_child = vertical['children'][0] if first_child['kind'] == 'html': description = extract_text_from_html_item(first_child, translate_from='ar') if parent_title: exercise_title = parent_title + ' ' + vertical['display_name'] else: exercise_title = vertical['display_name'] # Exercise node exercise_dict = dict( kind=content_kinds.EXERCISE, title=exercise_title, author='Edraak', source_id=vertical['url_name'], description=description, language=getlang('ar').code, license=EDRAAK_LICENSE, exercise_data={ 'mastery_model': exercises.M_OF_N, 'randomize': False, 'm': 5, # By default require 3 to count as mastery }, # thumbnail= questions=[], ) for child in vertical['children']: if child['kind'] == 'problem': parsed_problem = parse_questions_from_problem(child) exercise_dict['questions'].extend(parsed_problem['questions']) # Update m in case less than 3 quesitons in the exercise if len(exercise_dict['questions']) < 5: exercise_dict['exercise_data']['m'] = len(exercise_dict['questions']) return exercise_dict
def get_json_tree_path(self, *args, **kwargs): """ Return path to ricecooker json tree file. Override this method to use a custom filename, e.g., for channel with multiple languages. """ # Channel language if "lang" in kwargs: language_code = kwargs["lang"] else: language_code = ( "en" ) # default to en if no language specified on command line lang_obj = getlang(language_code) or getlang_by_name(language_code) json_filename = self.RICECOOKER_JSON_TREE_TPL.format(lang_obj.code) json_tree_path = os.path.join(self.TREES_DATA_DIR, json_filename) return json_tree_path
def videoAssignment(material): video_node = VideoNode( source_id=material["youtubeVideo"] ["id"], # usually set source_id to youtube_id title=material["youtubeVideo"]["title"], license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), language=getlang('en').id, derive_thumbnail=True, # video-specicig flag thumbnail=None, files=[ YouTubeVideoFile(youtube_id=material["youtubeVideo"]["id"], high_resolution=False, language='en'), YouTubeSubtitleFile(youtube_id=material["youtubeVideo"]["id"], language='en') ]) return video_node
def pre_run(self, args, options): """ Build the ricecooker json tree for the entire channel. """ LOGGER.info('in pre_run...') ricecooker_json_tree = dict( title='Edraak Courses (العربيّة)', # a humand-readbale title source_domain=EDRAAK_COURSES_DOMAIN, # content provider's domain source_id= 'continuing-education-courses', # an alphanumeric channel ID description=EDRAAK_COURSES_CHANNEL_DESCRIPTION, thumbnail='./chefdata/edraak-logo.png', # logo created from SVG language=getlang('ar').code, # language code of channel children=[], ) self.add_content_nodes(ricecooker_json_tree) json_tree_path = self.get_json_tree_path() write_tree_to_json_tree(json_tree_path, ricecooker_json_tree)
def make_topic_for_settings(title, ffmpeg_settings): """ Assumes global VIDEO_URLS available. """ topic = TopicNode( source_id=title, title=title, description='', author=None, language=getlang('en').id, thumbnail=None, ) for counter, video_url in enumerate(VIDEO_URLS): vid_number = counter + 1 video_title = 'Video ' + str(vid_number) video_node = make_video_node(video_title, video_url, ffmpeg_settings=ffmpeg_settings) topic.add_child(video_node) return topic
class SampleChef(SushiChef): """ The chef class that takes care of uploading channel to Kolibri Studio. We'll call its `main()` method from the command line script. """ channel_info = { 'CHANNEL_SOURCE_DOMAIN': 'source.org', # content provider's domain 'CHANNEL_SOURCE_ID': 'uber-big-ricecooker-channel', # an alphanumeric channel ID 'CHANNEL_TITLE': 'UBERRRRRR count of topics', # a humand-readbale title 'CHANNEL_LANGUAGE': getlang('en').id, # language code of channel 'CHANNEL_THUMBNAIL': 'http://quantlabs.net/blog/wp-content/uploads/2015/11/pythonlogo.jpg', # (optional) local path or url to image file 'CHANNEL_DESCRIPTION': 'This channel was created from the files in the ' 'content/ dir and the metadata provided in Python' } def construct_channel(self, *args, **kwargs): """ Create ChannelNode and build topic tree. """ channel = self.get_channel( *args, **kwargs) # create ChannelNode from data in self.channel_info self.create_content_nodes(channel) raise_for_invalid_channel(channel) return channel def create_content_nodes(self, channel): """ This function uses the methods `add_child` and `add_file` to build the hierarchy of topic nodes and content nodes. Every content node is associated with the underlying file node. """ make_random_subtree(channel, 2)
def youtubeNode(url): #Picking out youtube video ID from URL url_data = urlparse(url) query = urlparse.parse_qs(url_data.query) videoID = query["v"][0] r = requests.get(url).text # grabs request of the URL #Get video title bs = bs4.BeautifulSoup(r, "html.parser") videoTitle = bs.find_all('title', limit=1) #videoTitle includes html tags, stripping them newTitle = str(re.sub('<.*?>', '', str(videoTitle))) #May have to delete if there are brackets in title newTitle = newTitle.replace("]", '') newTitle = newTitle.replace("[", '') #Create Video Node video_node = VideoNode( source_id=videoID, # usually set source_id to youtube_id title=str(newTitle), license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), language=getlang('en').id, derive_thumbnail=True, # video-specicig flag thumbnail=None, files=[ YouTubeVideoFile(youtube_id=videoID, high_resolution=False, language='en'), YouTubeSubtitleFile(youtube_id=videoID, language='en') ]) #Return Video Node return video_node
def get_channel(self, **kwargs): LANGUAGE = kwargs.get("lang", "en") lang_obj = getlang(LANGUAGE) if LANGUAGE == "en": source_id_suffix = '' else: source_id_suffix = '-{}'.format(LANGUAGE) description = CHANNEL_DESCRIPTIONS.get(LANGUAGE, None) if description is None: description = CHANNEL_DESCRIPTIONS['en'] channel = ChannelNode( source_domain='phet.colorado.edu', source_id='phet-html5-simulations{}'.format(source_id_suffix), title='PhET Interactive Simulations ({})'.format( lang_obj.native_name), thumbnail='chefdata/phet-logo-TM-partners.png', description=description, language=lang_obj, ) return channel
def get_channel(self, **kwargs): LANGUAGE = kwargs.get("lang", "en") lang_obj = getlang(LANGUAGE) title_id_suffix = LANGUAGE source_id_suffix = '-{}'.format(LANGUAGE) if LANGUAGE == "en": source_id_suffix = '' elif LANGUAGE == "ar": title_id_suffix = lang_obj.native_name channel = ChannelNode( source_domain='phet.colorado.edu', source_id='phet-html5-simulations{}'.format(source_id_suffix), title='PhET Interactive Simulations ({})'.format(title_id_suffix), thumbnail= 'https://phet.colorado.edu/images/phet-social-media-logo.png', description= 'The PhET Interactive Simulations project at the University of Colorado Boulder provides a collection of 140 interactive simulations for teaching and learning science and math for upper middle school and high school students. Most content available supports chemistry and physics learning.', language=lang_obj, ) return channel
def transform_html_vertical(vertical, parent_title=None): """ Parses the `html` children of the vertical to generate document nodes from linked pdfs, extract downloadable resources, or a standalone html5 app node of the html content for all other cases. Returns: nodes, downloadable_resources """ if 'children' not in vertical: LOGGER.warning('found empty vertical' + str(vertical)) return [], [] assert all(ch['kind'] == 'html' for ch in vertical['children']), 'non htmls found' nodes = [] downloadable_resources = [] htmls = [ch for ch in vertical['children'] if ch['kind'] == 'html'] for html in htmls: if 'downloadable_resources' in html and html['downloadable_resources']: LOGGER.debug(' found downloadable_resources') resources = html['downloadable_resources'] for resource in resources: ext = resource['ext'] if ext == 'pdf': pdf_node = dict( kind=content_kinds.DOCUMENT, title=resource['title'], description=resource.get('description', ''), source_id=resource['relhref'], license=EDRAAK_LICENSE, language=getlang('ar').code, files=[], ) file_dict = dict( file_type=file_types.DOCUMENT, path=resource['relhref'], language=getlang('ar').code, ) pdf_node['files'].append(file_dict) nodes.append(pdf_node) else: downloadable_resources.append(resource) else: LOGGER.debug(' packaging html content') html5app_dict = dict( kind=content_kinds.HTML5, title=vertical['display_name'], # title=EDRAAK_STRINGS['downloadable_resources'], description=html.get('description', ''), source_id=html['url_name'], license=EDRAAK_LICENSE, language=getlang('ar').code, files=[], ) zip_path = package_html_content_as_html5_zip_file(html) zip_file = dict( file_type=file_types.HTML5, path=zip_path, language=getlang('ar').code, ) html5app_dict['files'].append(zip_file) nodes.append(html5app_dict) # return nodes, downloadable_resources
def create_exercise_nodes(self, channel): """ This function adds a few exercise nodes to the channel content tree. TODO: handle exercises with embedded image links + base64 encoded data. """ # EXERCISES exercices_folder = TopicNode( source_id='uniqid011', title='Exercise Nodes', description='Put folder description here', author=None, language=getlang('en').id, thumbnail=None, ) channel.add_child(exercices_folder) exercise1 = ExerciseNode( source_id='uniqid012', title='Basic questions', author='LE content team', description= 'Showcase of the simple exercises supported by Ricecooker and Studio', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), thumbnail=None, exercise_data={ 'mastery_model': exercises.M_OF_N, # or exercises.DO_ALL 'randomize': True, 'm': 2, 'n': 3, }, questions=[ MultipleSelectQuestion( id='ex2aQ1', question= "Which numbers are even?\n\nTest local image include: ![](content/ricecooker-channel-files/html5_vuejs.jpg)", correct_answers=[ "2", "4", ], all_answers=["1", "2", "3", "4", "5"], hints=[ "There are two answers.", "Both answers are multiples of two." ]), SingleSelectQuestion( id='ex2aQ2', question="What is 2 times 3?", correct_answer="6", all_answers=["2", "3", "5", "6"], ), InputQuestion( id='ex2aQ3', question="Name a factor of 10.", answers=["1", "2", "5", "10"], ) ]) exercices_folder.add_child(exercise1) # LOAD JSON DATA (as string) FOR PERSEUS QUESTIONS SAMPLE_PERSEUS_4_JSON = open( './content/ricecooker-channel-files/perseus_graph_question.json', 'r').read() exercise2 = ExerciseNode( source_id='baszzzs1', title='Perseus questions', author='LE content team', description='An example exercise with Persus questions', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), thumbnail=None, exercise_data={ 'mastery_model': exercises.M_OF_N, # or exercises.DO_ALL 'randomize': True, 'm': 1, 'n': 1, }, questions=[ PerseusQuestion( id='ex2bQ4', raw_data=SAMPLE_PERSEUS_4_JSON, source_url= 'https://github.com/learningequality/sample-channels/blob/master/contentnodes/exercise/sample_perseus04.json' ), ]) exercices_folder.add_child(exercise2)
class SampleChef(SushiChef): """ The chef class that takes care of uploading channel to Kolibri Studio. We'll call its `main()` method from the command line script. """ channel_info = { 'CHANNEL_SOURCE_DOMAIN': 'source.org', # content provider's domain 'CHANNEL_SOURCE_ID': 'sample-ricecooker-channel', # an alphanumeric channel ID 'CHANNEL_TITLE': 'Sample Ricecooker Channel', # a humand-readbale title 'CHANNEL_LANGUAGE': getlang('en').id, # language code of channel 'CHANNEL_THUMBNAIL': 'http://quantlabs.net/blog/wp-content/uploads/2015/11/pythonlogo.jpg', # (optional) local path or url to image file 'CHANNEL_DESCRIPTION': 'This channel was created from the files in the ' 'content/ dir and metadata specified in Python' } def construct_channel(self, *args, **kwargs): """ Create ChannelNode and build topic tree. """ channel = self.get_channel( *args, **kwargs) # create ChannelNode from data in self.channel_info self.create_content_nodes(channel) self.create_exercise_nodes(channel) raise_for_invalid_channel(channel) return channel def create_content_nodes(self, channel): """ This function uses the methods `add_child` and `add_file` to build the hierarchy of topic nodes (nested folder structure) and content nodes. Every content node is associated with one or more files. """ content_nodes_folder = TopicNode( source_id='uniqid001', title='Content Nodes', description='Put folder description here', author=None, language=getlang('en').id, thumbnail=None, ) channel.add_child(content_nodes_folder) # AUDIO audio_nodes_folder = TopicNode( source_id='uniqid002', title='Audio Files Folder', description='Put folder description here', author=None, language=getlang('en').id, thumbnail=None, ) content_nodes_folder.add_child(audio_nodes_folder) audio_node = AudioNode( source_id='uniqid003', title='Whale sounds', author='First Last (author\'s name)', description='Put file description here', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), thumbnail=None, files=[], ) audio_nodes_folder.add_child(audio_node) audio_file = AudioFile( path= './content/ricecooker-channel-files/Whale_sounds.mp3', # note path can also be a URL language=getlang('en').id) audio_node.add_file(audio_file) # DOCUMENTS documents_folder = TopicNode( source_id='uniqid004', title='Document Nodes', description='Put folder description here', author=None, language=getlang('en').id, thumbnail=None, ) content_nodes_folder.add_child(documents_folder) document_node = DocumentNode( source_id='uniqid005', title= 'The Supreme Court\u2019s Ruling in Brown vs. Board of Education', author='First Last (author\'s name)', description='Put file description here', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), thumbnail=None, files=[ DocumentFile( path= './content/ricecooker-channel-files/brown-vs-board-of-education.pdf', language=getlang('en').id) ]) documents_folder.add_child(document_node) # HTML5 APPS html5apps_folder = TopicNode( source_id='uniqid006', title='HTML5App Nodes', description='Put folder description here', author=None, language=getlang('en').id, thumbnail=None, ) content_nodes_folder.add_child(html5apps_folder) html5_node = HTML5AppNode( source_id='uniqid007', title='HTMLWeb capabilities test', author='First Last (author\'s name)', description= 'Tests different HTML/JS capabilities. What capabilities are allowed and disallowed by the sandboxed iframe used to render HTML5App nodes on Kolibri.', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), thumbnail='./content/ricecooker-channel-files/html5_tests.jpg', files=[ HTMLZipFile( path='./content/ricecooker-channel-files/html5_tests.zip', language=getlang('en').id) ]) html5apps_folder.add_child(html5_node) html5_node2 = HTML5AppNode( source_id='uniqid008', title='Sample Vue.js app', author='First Last (author\'s name)', description='Put file description here', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), thumbnail='./content/ricecooker-channel-files/html5_vuejs.jpg', files=[ HTMLZipFile( path='./content/ricecooker-channel-files/html5_vuejs.zip', language=getlang('en').id) ]) html5apps_folder.add_child(html5_node2) # VIDEOS videos_folder = TopicNode( source_id='uniqid009', title='Video Nodes', description='Put folder description here', author=None, language=getlang('en').id, thumbnail=None, ) content_nodes_folder.add_child(videos_folder) video_node = VideoNode( source_id='uniqid010', title='Wave particle duality explained in 2 mins', author='First Last (author\'s name)', description='Put file description here', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), derive_thumbnail=True, # video-specicig flag thumbnail=None, files=[ VideoFile( path= './content/ricecooker-channel-files/Wave_particle_duality.mp4', language=getlang('en').id) ]) videos_folder.add_child(video_node) youtube_id = 'VJyk81HmcZQ' video_node2 = VideoNode( source_id=youtube_id, # usually set source_id to youtube_id title='Estimating division that results in non whole numbers', author='Sal Khan', description='Video description would go here', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Khan Academy'), derive_thumbnail=True, # video-specicig flag thumbnail=None, files=[ YouTubeVideoFile(youtube_id=youtube_id, high_resolution=False, language='en'), YouTubeSubtitleFile(youtube_id=youtube_id, language='ko') ]) videos_folder.add_child(video_node2) def create_exercise_nodes(self, channel): """ This function adds a few exercise nodes to the channel content tree. TODO: handle exercises with embedded image links + base64 encoded data. """ # EXERCISES exercices_folder = TopicNode( source_id='uniqid011', title='Exercise Nodes', description='Put folder description here', author=None, language=getlang('en').id, thumbnail=None, ) channel.add_child(exercices_folder) exercise1 = ExerciseNode( source_id='uniqid012', title='Basic questions', author='LE content team', description= 'Showcase of the simple exercises supported by Ricecooker and Studio', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), thumbnail=None, exercise_data={ 'mastery_model': exercises.M_OF_N, # or exercises.DO_ALL 'randomize': True, 'm': 2, 'n': 3, }, questions=[ MultipleSelectQuestion( id='ex2aQ1', question= "Which numbers are even?\n\nTest local image include: ![](content/ricecooker-channel-files/html5_vuejs.jpg)", correct_answers=[ "2", "4", ], all_answers=["1", "2", "3", "4", "5"], hints=[ "There are two answers.", "Both answers are multiples of two." ]), SingleSelectQuestion( id='ex2aQ2', question="What is 2 times 3?", correct_answer="6", all_answers=["2", "3", "5", "6"], ), InputQuestion( id='ex2aQ3', question="Name a factor of 10.", answers=["1", "2", "5", "10"], ) ]) exercices_folder.add_child(exercise1) # LOAD JSON DATA (as string) FOR PERSEUS QUESTIONS SAMPLE_PERSEUS_4_JSON = open( './content/ricecooker-channel-files/perseus_graph_question.json', 'r').read() exercise2 = ExerciseNode( source_id='baszzzs1', title='Perseus questions', author='LE content team', description='An example exercise with Persus questions', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), thumbnail=None, exercise_data={ 'mastery_model': exercises.M_OF_N, # or exercises.DO_ALL 'randomize': True, 'm': 1, 'n': 1, }, questions=[ PerseusQuestion( id='ex2bQ4', raw_data=SAMPLE_PERSEUS_4_JSON, source_url= 'https://github.com/learningequality/sample-channels/blob/master/contentnodes/exercise/sample_perseus04.json' ), ]) exercices_folder.add_child(exercise2)
f.close() return f # returns a closed file descriptor which we use for name attribute def test_bad_subtitles_raises(bad_subtitles_file): subs_file = SubtitleFile(bad_subtitles_file.name, language='en') pytest.raises(ValueError, subs_file.process_file) PRESSURECOOKER_REPO_URL = "https://raw.githubusercontent.com/bjester/pressurecooker/" PRESSURECOOKER_FILES_URL_BASE = PRESSURECOOKER_REPO_URL + "pycaption/tests/files/subtitles/" PRESSURECOOKER_SUBS_FIXTURES = [ { 'srcfilename': 'basic.srt', 'subtitlesformat': 'srt', 'language': languages.getlang('ar'), 'check_words': 'البعض أكثر' }, { 'srcfilename': 'encapsulated.sami', 'subtitlesformat': 'sami', 'language': 'en', 'check_words': 'we have this vision of Einstein', }, { 'srcfilename': 'basic.vtt', 'subtitlesformat': 'vtt', 'language': 'ar', 'check_words': 'البعض أكثر' }, {
def create_node(node, assessment_dict, subtitle_path, vtt_videos, base_path, lite_version, lang_code): kind = node.get('kind') # Exercise node creation if kind == 'Exercise': child_node = ExerciseNode( source_id=node['id'], title=node['title'], exercise_data={ 'mastery_model': node.get('suggested_completion_criteria') }, description='' if node.get("description") is None else node.get( "description", '')[:400], license=licenses.ALL_RIGHTS_RESERVED, thumbnail=node.get('image_url_256'), ) # build exercise urls for previews full_path = base_path + node.get('path').strip('khan') slug = full_path.split('/')[-2] full_path = full_path.replace(slug, 'e') + slug # attach Perseus questions to Exercises for item in node['all_assessment_items']: # we replace all references to assessment images with the local file path to the image for match in re.finditer(FILE_URL_REGEX, assessment_dict[item['id']]["item_data"]): file_path = str(match.group(0)).replace('\\', '') file_path = file_path.replace(REPLACE_STRING, IMAGE_DL_LOCATION) assessment_dict[item['id']]["item_data"] = re.sub( FILE_URL_REGEX, file_path, assessment_dict[item['id']]["item_data"], 1) question = PerseusQuestion( id=item['id'], raw_data=assessment_dict[item['id']]['item_data'], source_url=full_path if not lite_version else None, ) child_node.add_question(question) # Topic node creation elif kind == 'Topic': child_node = TopicNode( source_id=node["id"], title=node["title"], description='' if node.get("description") is None else node.get( "description", '')[:400]) # Video node creation elif kind == 'Video': # standard download url for KA videos download_url = "https://cdn.kastatic.org/KA-youtube-converted/{0}.mp4/{1}.mp4".format( node['youtube_id'], node['youtube_id']) files = [VideoFile(download_url)] if node['youtube_id'] in vtt_videos: files.append( SubtitleFile(subtitle_path + '/{}.vtt'.format(node['youtube_id']), language=getlang(lang_code))) child_node = VideoNode( source_id=node["id"], title=node["title"], description='' if node.get("description") is None else node.get( "description", '')[:400], files=files, thumbnail=node.get('image_url'), license=licenses.CC_BY_NC_SA) else: # unknown content file format return None return child_node
def test_unknown_code(): lang_obj = languages.getlang('unknownd-code') assert lang_obj is None, 'Unknown lang code returned non-None'
def _recurse_create(node, tree_dict, lang="en"): node["translatedTitle"] = translations.get(node["translatedTitle"], node["translatedTitle"]) node["translatedDescription"] = translations.get( node["translatedDescription"], node["translatedDescription"]) if node["kind"] == "Exercise": khan_node = KhanExercise( id=node[ "name"], # ID is the name of exercise node, for backwards compatibility title=node["translatedTitle"], description=node["translatedDescription"], slug=node["slug"], thumbnail=node["imageUrl"], assessment_items=node["allAssessmentItems"], mastery_model=node["suggestedCompletionCriteria"], source_url=node["kaUrl"], lang=lang, ) elif node["kind"] == "Topic": khan_node = KhanTopic( id=node[ "slug"], # ID is the slug of topic node, for backwards compatibility title=node["translatedTitle"], description=node["translatedDescription"], slug=node["slug"], lang=lang, ) elif node["kind"] == "Video": name = getlang(lang).name.lower() if node["translatedYoutubeLang"] != lang: if video_map.get(name): if video_map[name].get(node["translatedYoutubeId"]): node["translatedYoutubeId"] = video_map[name].get( node["translatedYoutubeId"]) node["translatedYoutubeLang"] = lang if node.get("translatedDescriptionHtml"): video_description = html2text( translations.get(node["translatedDescriptionHtml"], node["translatedDescriptionHtml"]))[:400] elif node.get("translatedDescription"): video_description = translations.get( node["translatedDescription"], node["translatedDescription"])[:400] else: video_description = "" khan_node = KhanVideo( id=node["id"], title=node["translatedTitle"], description=video_description, slug=node["slug"], thumbnail=node["imageUrl"], license=node["licenseName"], download_urls=node["downloadUrls"], # for backwards compatibility, youtubeId is the source_id for chef video nodes # these should be the english youtubeIds corresponding to the translated youtubeId youtube_id=english_video_map.get(node["id"]) or node["youtubeId"], translated_youtube_id=node["translatedYoutubeId"], lang=node["translatedYoutubeLang"], ) elif node["kind"] == "Article": khan_node = KhanArticle( id=node["id"], title=node["translatedTitle"], description=node["translatedDescription"], slug=node["slug"], lang=lang, ) for c in node.get("childData", []): # if key is missing, we don't add it to list of children of topic node try: child_node = tree_dict[c["id"]] khan_node.children.append( _recurse_create(child_node, tree_dict, lang=lang)) except KeyError: pass return khan_node
def create_content_nodes(self, channel): """ This function uses the methods `add_child` and `add_file` to build the hierarchy of topic nodes (nested folder structure) and content nodes. Every content node is associated with one or more files. """ content_nodes_folder = TopicNode( source_id='uniqid001', title='Content Nodes', description='Put folder description here', author=None, language=getlang('en').id, thumbnail=None, ) channel.add_child(content_nodes_folder) # AUDIO audio_nodes_folder = TopicNode( source_id='uniqid002', title='Audio Files Folder', description='Put folder description here', author=None, language=getlang('en').id, thumbnail=None, ) content_nodes_folder.add_child(audio_nodes_folder) audio_node = AudioNode( source_id='uniqid003', title='Whale sounds', author='First Last (author\'s name)', description='Put file description here', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), thumbnail=None, files=[], ) audio_nodes_folder.add_child(audio_node) audio_file = AudioFile( path= './content/ricecooker-channel-files/Whale_sounds.mp3', # note path can also be a URL language=getlang('en').id) audio_node.add_file(audio_file) # DOCUMENTS documents_folder = TopicNode( source_id='uniqid004', title='Document Nodes', description='Put folder description here', author=None, language=getlang('en').id, thumbnail=None, ) content_nodes_folder.add_child(documents_folder) document_node = DocumentNode( source_id='uniqid005', title= 'The Supreme Court\u2019s Ruling in Brown vs. Board of Education', author='First Last (author\'s name)', description='Put file description here', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), thumbnail=None, files=[ DocumentFile( path= './content/ricecooker-channel-files/brown-vs-board-of-education.pdf', language=getlang('en').id) ]) documents_folder.add_child(document_node) # HTML5 APPS html5apps_folder = TopicNode( source_id='uniqid006', title='HTML5App Nodes', description='Put folder description here', author=None, language=getlang('en').id, thumbnail=None, ) content_nodes_folder.add_child(html5apps_folder) html5_node = HTML5AppNode( source_id='uniqid007', title='HTMLWeb capabilities test', author='First Last (author\'s name)', description= 'Tests different HTML/JS capabilities. What capabilities are allowed and disallowed by the sandboxed iframe used to render HTML5App nodes on Kolibri.', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), thumbnail='./content/ricecooker-channel-files/html5_tests.jpg', files=[ HTMLZipFile( path='./content/ricecooker-channel-files/html5_tests.zip', language=getlang('en').id) ]) html5apps_folder.add_child(html5_node) html5_node2 = HTML5AppNode( source_id='uniqid008', title='Sample Vue.js app', author='First Last (author\'s name)', description='Put file description here', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), thumbnail='./content/ricecooker-channel-files/html5_vuejs.jpg', files=[ HTMLZipFile( path='./content/ricecooker-channel-files/html5_vuejs.zip', language=getlang('en').id) ]) html5apps_folder.add_child(html5_node2) # VIDEOS videos_folder = TopicNode( source_id='uniqid009', title='Video Nodes', description='Put folder description here', author=None, language=getlang('en').id, thumbnail=None, ) content_nodes_folder.add_child(videos_folder) video_node = VideoNode( source_id='uniqid010', title='Wave particle duality explained in 2 mins', author='First Last (author\'s name)', description='Put file description here', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), derive_thumbnail=True, # video-specicig flag thumbnail=None, files=[ VideoFile( path= './content/ricecooker-channel-files/Wave_particle_duality.mp4', language=getlang('en').id) ]) videos_folder.add_child(video_node) youtube_id = 'VJyk81HmcZQ' video_node2 = VideoNode( source_id=youtube_id, # usually set source_id to youtube_id title='Estimating division that results in non whole numbers', author='Sal Khan', description='Video description would go here', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Khan Academy'), derive_thumbnail=True, # video-specicig flag thumbnail=None, files=[ YouTubeVideoFile(youtube_id=youtube_id, high_resolution=False, language='en'), YouTubeSubtitleFile(youtube_id=youtube_id, language='ko') ]) videos_folder.add_child(video_node2)
def transform_tree(clean_tree, coursedir): course_id = clean_tree['course'] course_title = clean_tree['display_name'] course_thumbnail = os.path.join(coursedir, 'static', clean_tree['course_image']) if not os.path.exists(course_thumbnail): course_image_with_spaces = clean_tree['course_image'].replace('_', ' ') course_thumbnail = os.path.join(coursedir, 'static', course_image_with_spaces) course_dict = dict( kind=content_kinds.TOPIC, title=course_title, thumbnail=course_thumbnail, source_id=course_id, description='', language=getlang('ar').code, license=EDRAAK_LICENSE, children=[], ) for chapter in clean_tree['children']: chapter_dict = dict( kind=content_kinds.TOPIC, title=chapter['display_name'], source_id=chapter['url_name'], description='', language=getlang('ar').code, license=EDRAAK_LICENSE, children=[], ) course_dict['children'].append(chapter_dict) chapter_downloadable_resources = [] for sequential in chapter['children']: # SPECIAL CASE: skip empty parent nodes of discussions if len(sequential['children']) == 0: LOGGER.debug('Skipping empty sequential ' + str(sequential)) continue # DEFAULT CASE: process as regular topic node sequential_dict = dict( kind=content_kinds.TOPIC, title=sequential['display_name'], source_id=sequential['url_name'], description=sequential.get('description', ''), language=getlang('ar').code, license=EDRAAK_LICENSE, children=[], ) chapter_dict['children'].append(sequential_dict) for vertical in sequential['children']: vertical_type = guess_vertical_type(vertical) if vertical_type in [ 'knowledge_check_vertical', 'test_vertical' ]: exercise_dict = transform_vertical_to_exercise(vertical) if exercise_dict: sequential_dict['children'].append(exercise_dict) elif vertical_type == 'video_vertical': video_dict, downloadable_resources = transform_video_vertical( vertical) if video_dict: sequential_dict['children'].append(video_dict) chapter_downloadable_resources.extend( downloadable_resources) elif vertical_type == 'html_vertical': nodes, downloadable_resources = transform_html_vertical( vertical) if nodes: sequential_dict['children'].extend(nodes) chapter_downloadable_resources.extend( downloadable_resources) else: LOGGER.debug('skipping ' + vertical_type + ' url_name=' + vertical['url_name']) # if chapter_downloadable_resources: LOGGER.debug(' Packaging chapter_downloadable_resources') source_id = chapter['url_name'] + '-downloadable-resources' html5app_dict = dict( kind=content_kinds.HTML5, title=EDRAAK_STRINGS['downloadable_resources'], description=EDRAAK_STRINGS[ 'downloadable_resources_description'], source_id=source_id, license=EDRAAK_LICENSE, language=getlang('ar').code, files=[], ) zip_path = make_html5zip_from_resources( chapter_downloadable_resources, basefilename=source_id + '2') zip_file = dict( file_type=file_types.HTML5, path=zip_path, language=getlang('ar').code, ) html5app_dict['files'].append(zip_file) chapter_dict['children'].append(html5app_dict) flattened_course_dict = flatten_transformed_tree(course_dict) return flattened_course_dict