예제 #1
0
def test_known_codes():
    lang_obj = languages.getlang('en')
    assert lang_obj is not None, 'English not found'
    assert lang_obj.name == "English", 'Wrong name'
    assert lang_obj.native_name == "English", 'Wrong native_name'

    lang_obj = languages.getlang('pt-BR')
    assert lang_obj is not None, 'Brazilian Portuguese not found'
    assert lang_obj.name == "Portuguese, Brazil", 'Wrong name'
    assert lang_obj.native_name == "Português (Brasil)", 'Wrong native_name'

    lang_obj = languages.getlang('zul')
    assert lang_obj is not None, 'Zulu not found'
    assert lang_obj.name == "Zulu", 'Wrong name'
    assert lang_obj.native_name == "isiZulu", 'Wrong native_name'
예제 #2
0
    def _get_lang_native_name(code):
        try:
            lang_name = languages.getlang(code).native_name
        except AttributeError:
            logger.warning("Did not find language code {} in our le_utils.constants!".format(code))
            lang_name = None

        return lang_name
예제 #3
0
 def set_language(self, language):
     """ Set self.language to internal lang. repr. code from str or Language object. """
     if isinstance(language, str):
         language_obj = languages.getlang(language)
         if language_obj:
             self.language = language_obj.code
         else:
             raise TypeError("Language code {} not found".format(language))
     if isinstance(language, languages.Language):
         self.language = language.code
    def get_lang_obj(self):
        if self.name != "":
            lang_code = self.code
            lang_name = self.name
            language_obj = getlang_by_name(
                lang_name) if not getlang(lang_name) else getlang(lang_name)

            if not language_obj:
                if UND_LANG[self.name]:
                    self.set_value(UND_LANG[self.name]["name"],
                                   UND_LANG[self.name]["code"],
                                   UND_LANG[self.name]["native_name"])
                    return True
            else:
                self.set_value(language_obj.name, language_obj.code,
                               language_obj.native_name)
                return True
        else:
            return False
예제 #5
0
파일: api.py 프로젝트: chrislun16/kolibri
    def _get_lang_native_name(code):
        try:
            lang_name = languages.getlang(code).native_name
        except AttributeError:
            logger.warning(
                "Did not find language code {} in our le_utils.constants!".
                format(code))
            lang_name = None

        return lang_name
예제 #6
0
def test_set_language():
    sub1 = SubtitleFile('path', language='en')
    sub2 = SubtitleFile('path', language=languages.getlang('es'))
    assert isinstance(sub1.language,
                      str), "Subtitles must be converted to Language class"
    assert isinstance(sub2.language,
                      str), "Subtitles can be passed as Langauge models"
    assert sub1.language == 'en', "Subtitles must have a language"
    assert sub2.language == 'es', "Subtitles must have a language"
    pytest.raises(TypeError, SubtitleFile, 'path', language='notalanguage')
예제 #7
0
def getlang_patched(language):
    """A patched version of languages.getlang that tries to fallback to
    a closest match if not found."""
    if languages.getlang(language):
        return language

    # Try matching on the prefix: e.g. zh-Hans --> zh
    first_part = language.split('-')[0]
    if languages.getlang(first_part):
        return first_part

    # See if pycountry can find this language and if so, match by language name
    # to resolve other inconsistencies.  e.g. YouTube might use "zu" while
    # le_utils uses "zul".
    pyc_lang = pycountry.languages.get(alpha_2=first_part)
    if pyc_lang:
        return _LANGUAGE_NAME_LOOKUP.get(pyc_lang.name)

    return None
예제 #8
0
def linkAssignment(material):
    url = material["link"]["url"]
    session = requests.Session()
    session.headers[
        "User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"
    html = session.get(url).content
    soup = bs(html, "html.parser")

    script_files = []
    css_files = []

    for script in soup.find_all("script"):
        if script.attrs.get("src"):
            # if the tag has the attribute 'src'
            script_url = urljoin(url, script.attrs.get("src"))
            script_files.append(script_url)

    for css in soup.find_all("link"):
        if css.attrs.get("href"):
            css_url = urljoin(url, css.attrs.get("href"))
            css_files.append(css_url)

    with HTMLWriter('./myzipper.zip') as zipper:
        with open("index.html", "w", encoding="utf-8") as f:
            index_content = soup.prettify()
            zipper.write_index_contents(index_content)

        with open("javascript_files.js") as f:
            for js_file in script_files:
                script_path = zipper.write_url(js_file,
                                               "scripts.js",
                                               directory="src")
                script = "<script src='{}' type='text/javascript'></script>".format(
                    script_path)

        with open("css_files.css", "w") as f:
            for css_file in css_files:
                print(css_file, file=f)
                css_path = zipper.write_url(css_file,
                                            "style.css",
                                            directory="styles")
                extra_head = "<link href='{}' rel='stylesheet'></link>".format(
                    css_path)

    link_file = HTMLZipFile(path='./myzipper.zip')
    link_node = HTML5AppNode(source_id=material["link"]["url"],
                             title=material["link"]["title"],
                             license=get_license(
                                 licenses.CC_BY,
                                 copyright_holder='Copyright holder name'),
                             language=getlang('en').id,
                             derive_thumbnail=False,
                             thumbnail=None,
                             files=[link_file])
    return link_node
 def parse_lang_and_variant_from_kwargs(self, kwargs):
     """
     Helper method to parse and validate the `lang` and `variant` options.
     Returns: (lang, variant), where `lang` uses internal repr. from le-utils
     and `variant` (str or None) identifies different channel version.
     """
     if "lang" not in kwargs:
         raise ValueError('Khan Academy chef must be run with lang=<code>')
     lang = kwargs["lang"]
     assert getlang(lang), 'Language code ' + lang + ' not recognized'
     variant = kwargs.get("variant", None)
     return lang, variant
def get_channel_title(lang=None, variant=None):
    """
    Return KA channel title for le-utils code `lang` and variant `variant`.
    """
    if variant and (lang, variant) in CHANNEL_TITLE_LOOKUP:
        return CHANNEL_TITLE_LOOKUP[(lang, variant)]
    elif lang in CHANNEL_TITLE_LOOKUP:
        return CHANNEL_TITLE_LOOKUP[lang]
    else:
        lang_obj = getlang(lang)
        title = "Khan Academy ({})".format(lang_obj.first_native_name)
        return title
def get_channel_description(lang=None, variant=None):
    """
    Find KA channel description for le-utils code `lang` and variant `variant`.
    """
    if variant and (lang, variant) in CHANNEL_DESCRIPTION_LOOKUP:
        return CHANNEL_DESCRIPTION_LOOKUP[(lang, variant)]
    elif lang in CHANNEL_DESCRIPTION_LOOKUP:
        return CHANNEL_DESCRIPTION_LOOKUP[lang]
    else:
        lang_obj = getlang(lang)
        description = "Khan Academy content for {}.".format(lang_obj.name)
        return description
예제 #12
0
 def __init__(self, youtube_id, language=None, **kwargs):
     self.youtube_url = 'http://www.youtube.com/watch?v={}'.format(youtube_id)
     if isinstance(language, languages.Language):  # for backward compatibility
         language = language.code
     self.youtube_language = language  # youtube language code (can differ from internal repr.)
     language_obj = languages.getlang(language)   # lookup `language` using internal representation
     # if language_obj not None, we know `language` is a valid language_id in the internal repr.
     if language_obj is None:  # if `language` not found using internal repr.
         language_obj = languages.getlang_by_alpha2(language)  # try to match by two-letter ISO code
         language = language_obj.code   # update `language` argument from internal repr. language_id
     super(YouTubeSubtitleFile, self).__init__(language=language, **kwargs)
     assert self.language, "Subtitles must have a language"
def should_include_subtitle(youtube_language, target_lang):
    """
    Determine whether subtitles with language code `youtube_language` available
    for a YouTube video should be imported as part of the Khan Academy chef run
    for language `target_lang` (internal language code).
    """
    lang_obj = get_language_with_alpha2_fallback(youtube_language)
    target_lang_obj = getlang(target_lang)
    if lang_obj.primary_code == target_lang_obj.primary_code:
        return True  # accept if the same language code even if different locale
    else:
        return False
예제 #14
0
def _get_language_with_alpha2_fallback(language_code):
    """
    Lookup language code `language_code` (string) in the internal language codes,
    and if that fails, try to map map `language_code` to the internal represention
    using the `getlang_by_alpha2` helper method.
    Returns either a le-utils Language object or None if both lookups fail.
    """
    # 1. try to lookup `language` using internal representation
    language_obj = languages.getlang(language_code)
    # if language_obj not None, we know `language` is a valid language_id in the internal repr.
    if language_obj is None:
        # 2. try to match by two-letter ISO code
        language_obj = languages.getlang_by_alpha2(language_code)
    return language_obj
예제 #15
0
    def get_channel(self, **kwargs):

        lang_code = kwargs.get("lang", "en")

        lang = getlang(lang_code)

        channel = nodes.ChannelNode(
            source_id="KA ({0})".format(lang_code),
            source_domain="khanacademy.org-test",
            title="Khan Academy ({0}) - TEST".format(lang.native_name),
            description='Khan Academy content for {}.'.format(lang.name),
            thumbnail=
            "https://upload.wikimedia.org/wikipedia/commons/1/15/Khan_Academy_Logo_Old_version_2015.jpg",
        )

        return channel
예제 #16
0
def transform_vertical_to_exercise(vertical, parent_title=None):
    """
    Parse an Edraaak `test_vertical' or `knowledge_check_vertical` to exercise.
    """
    if 'children' not in vertical:
        return None

    description = ''
    # Extract an optional description from the first html node
    first_child = vertical['children'][0]
    if first_child['kind'] == 'html':
        description = extract_text_from_html_item(first_child,
                                                  translate_from='ar')

    if parent_title:
        exercise_title = parent_title + ' ' + vertical['display_name']
    else:
        exercise_title = vertical['display_name']

    # Exercise node
    exercise_dict = dict(
        kind=content_kinds.EXERCISE,
        title=exercise_title,
        author='Edraak',
        source_id=vertical['url_name'],
        description=description,
        language=getlang('ar').code,
        license=EDRAAK_LICENSE,
        exercise_data={
            'mastery_model': exercises.M_OF_N,
            'randomize': False,
            'm': 5,  # By default require 3 to count as mastery
        },
        # thumbnail=
        questions=[],
    )

    for child in vertical['children']:
        if child['kind'] == 'problem':
            parsed_problem = parse_questions_from_problem(child)
            exercise_dict['questions'].extend(parsed_problem['questions'])

    # Update m in case less than 3 quesitons in the exercise
    if len(exercise_dict['questions']) < 5:
        exercise_dict['exercise_data']['m'] = len(exercise_dict['questions'])

    return exercise_dict
예제 #17
0
    def get_json_tree_path(self, *args, **kwargs):
        """
        Return path to ricecooker json tree file. Override this method to use
        a custom filename, e.g., for channel with multiple languages.
        """
        # Channel language
        if "lang" in kwargs:
            language_code = kwargs["lang"]
        else:
            language_code = (
                "en"
            )  # default to en if no language specified on command line

        lang_obj = getlang(language_code) or getlang_by_name(language_code)

        json_filename = self.RICECOOKER_JSON_TREE_TPL.format(lang_obj.code)
        json_tree_path = os.path.join(self.TREES_DATA_DIR, json_filename)
        return json_tree_path
예제 #18
0
def videoAssignment(material):
    video_node = VideoNode(
        source_id=material["youtubeVideo"]
        ["id"],  # usually set source_id to youtube_id
        title=material["youtubeVideo"]["title"],
        license=get_license(licenses.CC_BY,
                            copyright_holder='Copyright holder name'),
        language=getlang('en').id,
        derive_thumbnail=True,  # video-specicig flag
        thumbnail=None,
        files=[
            YouTubeVideoFile(youtube_id=material["youtubeVideo"]["id"],
                             high_resolution=False,
                             language='en'),
            YouTubeSubtitleFile(youtube_id=material["youtubeVideo"]["id"],
                                language='en')
        ])
    return video_node
예제 #19
0
    def pre_run(self, args, options):
        """
        Build the ricecooker json tree for the entire channel.
        """
        LOGGER.info('in pre_run...')

        ricecooker_json_tree = dict(
            title='Edraak Courses (العربيّة)',  # a humand-readbale title
            source_domain=EDRAAK_COURSES_DOMAIN,  # content provider's domain
            source_id=
            'continuing-education-courses',  # an alphanumeric channel ID
            description=EDRAAK_COURSES_CHANNEL_DESCRIPTION,
            thumbnail='./chefdata/edraak-logo.png',  # logo created from SVG
            language=getlang('ar').code,  # language code of channel
            children=[],
        )
        self.add_content_nodes(ricecooker_json_tree)

        json_tree_path = self.get_json_tree_path()
        write_tree_to_json_tree(json_tree_path, ricecooker_json_tree)
예제 #20
0
def make_topic_for_settings(title, ffmpeg_settings):
    """
    Assumes global VIDEO_URLS available.
    """
    topic = TopicNode(
        source_id=title,
        title=title,
        description='',
        author=None,
        language=getlang('en').id,
        thumbnail=None,
    )
    for counter, video_url in enumerate(VIDEO_URLS):
        vid_number = counter + 1
        video_title = 'Video ' + str(vid_number)
        video_node = make_video_node(video_title,
                                     video_url,
                                     ffmpeg_settings=ffmpeg_settings)
        topic.add_child(video_node)
    return topic
예제 #21
0
class SampleChef(SushiChef):
    """
    The chef class that takes care of uploading channel to Kolibri Studio.
    We'll call its `main()` method from the command line script.
    """

    channel_info = {
        'CHANNEL_SOURCE_DOMAIN':
        'source.org',  # content provider's domain
        'CHANNEL_SOURCE_ID':
        'uber-big-ricecooker-channel',  # an alphanumeric channel ID
        'CHANNEL_TITLE':
        'UBERRRRRR count of topics',  # a humand-readbale title
        'CHANNEL_LANGUAGE':
        getlang('en').id,  # language code of channel
        'CHANNEL_THUMBNAIL':
        'http://quantlabs.net/blog/wp-content/uploads/2015/11/pythonlogo.jpg',  # (optional) local path or url to image file
        'CHANNEL_DESCRIPTION':
        'This channel was created from the files in the '
        'content/ dir and the metadata provided in Python'
    }

    def construct_channel(self, *args, **kwargs):
        """
        Create ChannelNode and build topic tree.
        """
        channel = self.get_channel(
            *args,
            **kwargs)  # create ChannelNode from data in self.channel_info
        self.create_content_nodes(channel)
        raise_for_invalid_channel(channel)
        return channel

    def create_content_nodes(self, channel):
        """
        This function uses the methods `add_child` and `add_file` to build the
        hierarchy of topic nodes and content nodes. Every content node is associated
        with the underlying file node.
        """
        make_random_subtree(channel, 2)
def youtubeNode(url):
    #Picking out youtube video ID from URL
    url_data = urlparse(url)
    query = urlparse.parse_qs(url_data.query)
    videoID = query["v"][0]

    r = requests.get(url).text  # grabs request of the URL

    #Get video title
    bs = bs4.BeautifulSoup(r, "html.parser")
    videoTitle = bs.find_all('title', limit=1)

    #videoTitle includes html tags, stripping them
    newTitle = str(re.sub('<.*?>', '', str(videoTitle)))

    #May have to delete if there are brackets in title
    newTitle = newTitle.replace("]", '')
    newTitle = newTitle.replace("[", '')

    #Create Video Node
    video_node = VideoNode(
        source_id=videoID,  # usually set source_id to youtube_id
        title=str(newTitle),
        license=get_license(licenses.CC_BY,
                            copyright_holder='Copyright holder name'),
        language=getlang('en').id,
        derive_thumbnail=True,  # video-specicig flag
        thumbnail=None,
        files=[
            YouTubeVideoFile(youtube_id=videoID,
                             high_resolution=False,
                             language='en'),
            YouTubeSubtitleFile(youtube_id=videoID, language='en')
        ])

    #Return Video Node
    return video_node
예제 #23
0
    def get_channel(self, **kwargs):
        LANGUAGE = kwargs.get("lang", "en")
        lang_obj = getlang(LANGUAGE)

        if LANGUAGE == "en":
            source_id_suffix = ''
        else:
            source_id_suffix = '-{}'.format(LANGUAGE)

        description = CHANNEL_DESCRIPTIONS.get(LANGUAGE, None)
        if description is None:
            description = CHANNEL_DESCRIPTIONS['en']

        channel = ChannelNode(
            source_domain='phet.colorado.edu',
            source_id='phet-html5-simulations{}'.format(source_id_suffix),
            title='PhET Interactive Simulations ({})'.format(
                lang_obj.native_name),
            thumbnail='chefdata/phet-logo-TM-partners.png',
            description=description,
            language=lang_obj,
        )

        return channel
예제 #24
0
    def get_channel(self, **kwargs):
        LANGUAGE = kwargs.get("lang", "en")
        lang_obj = getlang(LANGUAGE)

        title_id_suffix = LANGUAGE
        source_id_suffix = '-{}'.format(LANGUAGE)

        if LANGUAGE == "en":
            source_id_suffix = ''
        elif LANGUAGE == "ar":
            title_id_suffix = lang_obj.native_name

        channel = ChannelNode(
            source_domain='phet.colorado.edu',
            source_id='phet-html5-simulations{}'.format(source_id_suffix),
            title='PhET Interactive Simulations ({})'.format(title_id_suffix),
            thumbnail=
            'https://phet.colorado.edu/images/phet-social-media-logo.png',
            description=
            'The PhET Interactive Simulations project at the University of Colorado Boulder provides a collection of 140 interactive simulations for teaching and learning science and math for upper middle school and high school students. Most content available supports chemistry and physics learning.',
            language=lang_obj,
        )

        return channel
예제 #25
0
def transform_html_vertical(vertical, parent_title=None):
    """
    Parses the `html` children of the vertical to generate document nodes from
    linked pdfs, extract downloadable resources, or a standalone html5 app node
    of the html content for all other cases.
    Returns: nodes, downloadable_resources
    """
    if 'children' not in vertical:
        LOGGER.warning('found empty vertical' + str(vertical))
        return [], []

    assert all(ch['kind'] == 'html'
               for ch in vertical['children']), 'non htmls found'

    nodes = []
    downloadable_resources = []
    htmls = [ch for ch in vertical['children'] if ch['kind'] == 'html']

    for html in htmls:
        if 'downloadable_resources' in html and html['downloadable_resources']:
            LOGGER.debug('    found downloadable_resources')
            resources = html['downloadable_resources']
            for resource in resources:
                ext = resource['ext']
                if ext == 'pdf':
                    pdf_node = dict(
                        kind=content_kinds.DOCUMENT,
                        title=resource['title'],
                        description=resource.get('description', ''),
                        source_id=resource['relhref'],
                        license=EDRAAK_LICENSE,
                        language=getlang('ar').code,
                        files=[],
                    )
                    file_dict = dict(
                        file_type=file_types.DOCUMENT,
                        path=resource['relhref'],
                        language=getlang('ar').code,
                    )
                    pdf_node['files'].append(file_dict)
                    nodes.append(pdf_node)
                else:
                    downloadable_resources.append(resource)

        else:
            LOGGER.debug('    packaging html content')
            html5app_dict = dict(
                kind=content_kinds.HTML5,
                title=vertical['display_name'],
                # title=EDRAAK_STRINGS['downloadable_resources'],
                description=html.get('description', ''),
                source_id=html['url_name'],
                license=EDRAAK_LICENSE,
                language=getlang('ar').code,
                files=[],
            )
            zip_path = package_html_content_as_html5_zip_file(html)
            zip_file = dict(
                file_type=file_types.HTML5,
                path=zip_path,
                language=getlang('ar').code,
            )
            html5app_dict['files'].append(zip_file)
            nodes.append(html5app_dict)
        #
        return nodes, downloadable_resources
예제 #26
0
    def create_exercise_nodes(self, channel):
        """
        This function adds a few exercise nodes to the channel content tree.
        TODO: handle exercises with embedded image links + base64 encoded data.
        """

        # EXERCISES
        exercices_folder = TopicNode(
            source_id='uniqid011',
            title='Exercise Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        channel.add_child(exercices_folder)

        exercise1 = ExerciseNode(
            source_id='uniqid012',
            title='Basic questions',
            author='LE content team',
            description=
            'Showcase of the simple exercises supported by Ricecooker and Studio',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            exercise_data={
                'mastery_model': exercises.M_OF_N,  # or exercises.DO_ALL
                'randomize': True,
                'm': 2,
                'n': 3,
            },
            questions=[
                MultipleSelectQuestion(
                    id='ex2aQ1',
                    question=
                    "Which numbers are even?\n\nTest local image include: ![](content/ricecooker-channel-files/html5_vuejs.jpg)",
                    correct_answers=[
                        "2",
                        "4",
                    ],
                    all_answers=["1", "2", "3", "4", "5"],
                    hints=[
                        "There are two answers.",
                        "Both answers are multiples of two."
                    ]),
                SingleSelectQuestion(
                    id='ex2aQ2',
                    question="What is 2 times 3?",
                    correct_answer="6",
                    all_answers=["2", "3", "5", "6"],
                ),
                InputQuestion(
                    id='ex2aQ3',
                    question="Name a factor of 10.",
                    answers=["1", "2", "5", "10"],
                )
            ])
        exercices_folder.add_child(exercise1)

        # LOAD JSON DATA (as string) FOR PERSEUS QUESTIONS
        SAMPLE_PERSEUS_4_JSON = open(
            './content/ricecooker-channel-files/perseus_graph_question.json',
            'r').read()
        exercise2 = ExerciseNode(
            source_id='baszzzs1',
            title='Perseus questions',
            author='LE content team',
            description='An example exercise with Persus questions',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            exercise_data={
                'mastery_model': exercises.M_OF_N,  # or exercises.DO_ALL
                'randomize': True,
                'm': 1,
                'n': 1,
            },
            questions=[
                PerseusQuestion(
                    id='ex2bQ4',
                    raw_data=SAMPLE_PERSEUS_4_JSON,
                    source_url=
                    'https://github.com/learningequality/sample-channels/blob/master/contentnodes/exercise/sample_perseus04.json'
                ),
            ])
        exercices_folder.add_child(exercise2)
예제 #27
0
class SampleChef(SushiChef):
    """
    The chef class that takes care of uploading channel to Kolibri Studio.
    We'll call its `main()` method from the command line script.
    """

    channel_info = {
        'CHANNEL_SOURCE_DOMAIN':
        'source.org',  # content provider's domain
        'CHANNEL_SOURCE_ID':
        'sample-ricecooker-channel',  # an alphanumeric channel ID
        'CHANNEL_TITLE':
        'Sample Ricecooker Channel',  # a humand-readbale title
        'CHANNEL_LANGUAGE':
        getlang('en').id,  # language code of channel
        'CHANNEL_THUMBNAIL':
        'http://quantlabs.net/blog/wp-content/uploads/2015/11/pythonlogo.jpg',  # (optional) local path or url to image file
        'CHANNEL_DESCRIPTION':
        'This channel was created from the files in the '
        'content/ dir and metadata specified in Python'
    }

    def construct_channel(self, *args, **kwargs):
        """
        Create ChannelNode and build topic tree.
        """
        channel = self.get_channel(
            *args,
            **kwargs)  # create ChannelNode from data in self.channel_info
        self.create_content_nodes(channel)
        self.create_exercise_nodes(channel)
        raise_for_invalid_channel(channel)
        return channel

    def create_content_nodes(self, channel):
        """
        This function uses the methods `add_child` and `add_file` to build the
        hierarchy of topic nodes (nested folder structure) and content nodes.
        Every content node is associated with one or more files.
        """
        content_nodes_folder = TopicNode(
            source_id='uniqid001',
            title='Content Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        channel.add_child(content_nodes_folder)

        # AUDIO
        audio_nodes_folder = TopicNode(
            source_id='uniqid002',
            title='Audio Files Folder',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(audio_nodes_folder)

        audio_node = AudioNode(
            source_id='uniqid003',
            title='Whale sounds',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            files=[],
        )
        audio_nodes_folder.add_child(audio_node)
        audio_file = AudioFile(
            path=
            './content/ricecooker-channel-files/Whale_sounds.mp3',  # note path can also be a URL
            language=getlang('en').id)
        audio_node.add_file(audio_file)

        # DOCUMENTS
        documents_folder = TopicNode(
            source_id='uniqid004',
            title='Document Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(documents_folder)

        document_node = DocumentNode(
            source_id='uniqid005',
            title=
            'The Supreme Court\u2019s Ruling in Brown vs. Board of Education',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            files=[
                DocumentFile(
                    path=
                    './content/ricecooker-channel-files/brown-vs-board-of-education.pdf',
                    language=getlang('en').id)
            ])
        documents_folder.add_child(document_node)

        # HTML5 APPS
        html5apps_folder = TopicNode(
            source_id='uniqid006',
            title='HTML5App Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(html5apps_folder)

        html5_node = HTML5AppNode(
            source_id='uniqid007',
            title='HTMLWeb capabilities test',
            author='First Last (author\'s name)',
            description=
            'Tests different HTML/JS capabilities. What capabilities are allowed and disallowed by the sandboxed iframe used to render HTML5App nodes on Kolibri.',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail='./content/ricecooker-channel-files/html5_tests.jpg',
            files=[
                HTMLZipFile(
                    path='./content/ricecooker-channel-files/html5_tests.zip',
                    language=getlang('en').id)
            ])
        html5apps_folder.add_child(html5_node)

        html5_node2 = HTML5AppNode(
            source_id='uniqid008',
            title='Sample Vue.js app',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail='./content/ricecooker-channel-files/html5_vuejs.jpg',
            files=[
                HTMLZipFile(
                    path='./content/ricecooker-channel-files/html5_vuejs.zip',
                    language=getlang('en').id)
            ])
        html5apps_folder.add_child(html5_node2)

        # VIDEOS
        videos_folder = TopicNode(
            source_id='uniqid009',
            title='Video Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(videos_folder)
        video_node = VideoNode(
            source_id='uniqid010',
            title='Wave particle duality explained in 2 mins',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            derive_thumbnail=True,  # video-specicig flag
            thumbnail=None,
            files=[
                VideoFile(
                    path=
                    './content/ricecooker-channel-files/Wave_particle_duality.mp4',
                    language=getlang('en').id)
            ])
        videos_folder.add_child(video_node)

        youtube_id = 'VJyk81HmcZQ'
        video_node2 = VideoNode(
            source_id=youtube_id,  # usually set source_id to youtube_id
            title='Estimating division that results in non whole numbers',
            author='Sal Khan',
            description='Video description would go here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Khan Academy'),
            derive_thumbnail=True,  # video-specicig flag
            thumbnail=None,
            files=[
                YouTubeVideoFile(youtube_id=youtube_id,
                                 high_resolution=False,
                                 language='en'),
                YouTubeSubtitleFile(youtube_id=youtube_id, language='ko')
            ])
        videos_folder.add_child(video_node2)

    def create_exercise_nodes(self, channel):
        """
        This function adds a few exercise nodes to the channel content tree.
        TODO: handle exercises with embedded image links + base64 encoded data.
        """

        # EXERCISES
        exercices_folder = TopicNode(
            source_id='uniqid011',
            title='Exercise Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        channel.add_child(exercices_folder)

        exercise1 = ExerciseNode(
            source_id='uniqid012',
            title='Basic questions',
            author='LE content team',
            description=
            'Showcase of the simple exercises supported by Ricecooker and Studio',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            exercise_data={
                'mastery_model': exercises.M_OF_N,  # or exercises.DO_ALL
                'randomize': True,
                'm': 2,
                'n': 3,
            },
            questions=[
                MultipleSelectQuestion(
                    id='ex2aQ1',
                    question=
                    "Which numbers are even?\n\nTest local image include: ![](content/ricecooker-channel-files/html5_vuejs.jpg)",
                    correct_answers=[
                        "2",
                        "4",
                    ],
                    all_answers=["1", "2", "3", "4", "5"],
                    hints=[
                        "There are two answers.",
                        "Both answers are multiples of two."
                    ]),
                SingleSelectQuestion(
                    id='ex2aQ2',
                    question="What is 2 times 3?",
                    correct_answer="6",
                    all_answers=["2", "3", "5", "6"],
                ),
                InputQuestion(
                    id='ex2aQ3',
                    question="Name a factor of 10.",
                    answers=["1", "2", "5", "10"],
                )
            ])
        exercices_folder.add_child(exercise1)

        # LOAD JSON DATA (as string) FOR PERSEUS QUESTIONS
        SAMPLE_PERSEUS_4_JSON = open(
            './content/ricecooker-channel-files/perseus_graph_question.json',
            'r').read()
        exercise2 = ExerciseNode(
            source_id='baszzzs1',
            title='Perseus questions',
            author='LE content team',
            description='An example exercise with Persus questions',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            exercise_data={
                'mastery_model': exercises.M_OF_N,  # or exercises.DO_ALL
                'randomize': True,
                'm': 1,
                'n': 1,
            },
            questions=[
                PerseusQuestion(
                    id='ex2bQ4',
                    raw_data=SAMPLE_PERSEUS_4_JSON,
                    source_url=
                    'https://github.com/learningequality/sample-channels/blob/master/contentnodes/exercise/sample_perseus04.json'
                ),
            ])
        exercices_folder.add_child(exercise2)
예제 #28
0
        f.close()
    return f  # returns a closed file descriptor which we use for name attribute


def test_bad_subtitles_raises(bad_subtitles_file):
    subs_file = SubtitleFile(bad_subtitles_file.name, language='en')
    pytest.raises(ValueError, subs_file.process_file)


PRESSURECOOKER_REPO_URL = "https://raw.githubusercontent.com/bjester/pressurecooker/"
PRESSURECOOKER_FILES_URL_BASE = PRESSURECOOKER_REPO_URL + "pycaption/tests/files/subtitles/"
PRESSURECOOKER_SUBS_FIXTURES = [
    {
        'srcfilename': 'basic.srt',
        'subtitlesformat': 'srt',
        'language': languages.getlang('ar'),
        'check_words': 'البعض أكثر'
    },
    {
        'srcfilename': 'encapsulated.sami',
        'subtitlesformat': 'sami',
        'language': 'en',
        'check_words': 'we have this vision of Einstein',
    },
    {
        'srcfilename': 'basic.vtt',
        'subtitlesformat': 'vtt',
        'language': 'ar',
        'check_words': 'البعض أكثر'
    },
    {
예제 #29
0
def create_node(node, assessment_dict, subtitle_path, vtt_videos, base_path,
                lite_version, lang_code):

    kind = node.get('kind')
    # Exercise node creation
    if kind == 'Exercise':
        child_node = ExerciseNode(
            source_id=node['id'],
            title=node['title'],
            exercise_data={
                'mastery_model': node.get('suggested_completion_criteria')
            },
            description='' if node.get("description") is None else node.get(
                "description", '')[:400],
            license=licenses.ALL_RIGHTS_RESERVED,
            thumbnail=node.get('image_url_256'),
        )

        # build exercise urls for previews
        full_path = base_path + node.get('path').strip('khan')
        slug = full_path.split('/')[-2]
        full_path = full_path.replace(slug, 'e') + slug

        # attach Perseus questions to Exercises
        for item in node['all_assessment_items']:
            # we replace all references to assessment images with the local file path to the image
            for match in re.finditer(FILE_URL_REGEX,
                                     assessment_dict[item['id']]["item_data"]):
                file_path = str(match.group(0)).replace('\\', '')
                file_path = file_path.replace(REPLACE_STRING,
                                              IMAGE_DL_LOCATION)
                assessment_dict[item['id']]["item_data"] = re.sub(
                    FILE_URL_REGEX, file_path,
                    assessment_dict[item['id']]["item_data"], 1)
            question = PerseusQuestion(
                id=item['id'],
                raw_data=assessment_dict[item['id']]['item_data'],
                source_url=full_path if not lite_version else None,
            )
            child_node.add_question(question)

    # Topic node creation
    elif kind == 'Topic':
        child_node = TopicNode(
            source_id=node["id"],
            title=node["title"],
            description='' if node.get("description") is None else node.get(
                "description", '')[:400])

    # Video node creation
    elif kind == 'Video':
        # standard download url for KA videos
        download_url = "https://cdn.kastatic.org/KA-youtube-converted/{0}.mp4/{1}.mp4".format(
            node['youtube_id'], node['youtube_id'])
        files = [VideoFile(download_url)]
        if node['youtube_id'] in vtt_videos:
            files.append(
                SubtitleFile(subtitle_path +
                             '/{}.vtt'.format(node['youtube_id']),
                             language=getlang(lang_code)))
        child_node = VideoNode(
            source_id=node["id"],
            title=node["title"],
            description='' if node.get("description") is None else node.get(
                "description", '')[:400],
            files=files,
            thumbnail=node.get('image_url'),
            license=licenses.CC_BY_NC_SA)

    else:  # unknown content file format
        return None

    return child_node
예제 #30
0
def test_unknown_code():
    lang_obj = languages.getlang('unknownd-code')
    assert lang_obj is None, 'Unknown lang code returned non-None'
예제 #31
0
def _recurse_create(node, tree_dict, lang="en"):

    node["translatedTitle"] = translations.get(node["translatedTitle"],
                                               node["translatedTitle"])
    node["translatedDescription"] = translations.get(
        node["translatedDescription"], node["translatedDescription"])

    if node["kind"] == "Exercise":
        khan_node = KhanExercise(
            id=node[
                "name"],  # ID is the name of exercise node, for backwards compatibility
            title=node["translatedTitle"],
            description=node["translatedDescription"],
            slug=node["slug"],
            thumbnail=node["imageUrl"],
            assessment_items=node["allAssessmentItems"],
            mastery_model=node["suggestedCompletionCriteria"],
            source_url=node["kaUrl"],
            lang=lang,
        )
    elif node["kind"] == "Topic":
        khan_node = KhanTopic(
            id=node[
                "slug"],  # ID is the slug of topic node, for backwards compatibility
            title=node["translatedTitle"],
            description=node["translatedDescription"],
            slug=node["slug"],
            lang=lang,
        )
    elif node["kind"] == "Video":

        name = getlang(lang).name.lower()
        if node["translatedYoutubeLang"] != lang:
            if video_map.get(name):
                if video_map[name].get(node["translatedYoutubeId"]):
                    node["translatedYoutubeId"] = video_map[name].get(
                        node["translatedYoutubeId"])
                    node["translatedYoutubeLang"] = lang

        if node.get("translatedDescriptionHtml"):
            video_description = html2text(
                translations.get(node["translatedDescriptionHtml"],
                                 node["translatedDescriptionHtml"]))[:400]
        elif node.get("translatedDescription"):
            video_description = translations.get(
                node["translatedDescription"],
                node["translatedDescription"])[:400]
        else:
            video_description = ""
        khan_node = KhanVideo(
            id=node["id"],
            title=node["translatedTitle"],
            description=video_description,
            slug=node["slug"],
            thumbnail=node["imageUrl"],
            license=node["licenseName"],
            download_urls=node["downloadUrls"],
            # for backwards compatibility, youtubeId is the source_id for chef video nodes
            # these should be the english youtubeIds corresponding to the translated youtubeId
            youtube_id=english_video_map.get(node["id"]) or node["youtubeId"],
            translated_youtube_id=node["translatedYoutubeId"],
            lang=node["translatedYoutubeLang"],
        )
    elif node["kind"] == "Article":
        khan_node = KhanArticle(
            id=node["id"],
            title=node["translatedTitle"],
            description=node["translatedDescription"],
            slug=node["slug"],
            lang=lang,
        )

    for c in node.get("childData", []):
        # if key is missing, we don't add it to list of children of topic node
        try:
            child_node = tree_dict[c["id"]]
            khan_node.children.append(
                _recurse_create(child_node, tree_dict, lang=lang))
        except KeyError:
            pass

    return khan_node
예제 #32
0
    def create_content_nodes(self, channel):
        """
        This function uses the methods `add_child` and `add_file` to build the
        hierarchy of topic nodes (nested folder structure) and content nodes.
        Every content node is associated with one or more files.
        """
        content_nodes_folder = TopicNode(
            source_id='uniqid001',
            title='Content Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        channel.add_child(content_nodes_folder)

        # AUDIO
        audio_nodes_folder = TopicNode(
            source_id='uniqid002',
            title='Audio Files Folder',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(audio_nodes_folder)

        audio_node = AudioNode(
            source_id='uniqid003',
            title='Whale sounds',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            files=[],
        )
        audio_nodes_folder.add_child(audio_node)
        audio_file = AudioFile(
            path=
            './content/ricecooker-channel-files/Whale_sounds.mp3',  # note path can also be a URL
            language=getlang('en').id)
        audio_node.add_file(audio_file)

        # DOCUMENTS
        documents_folder = TopicNode(
            source_id='uniqid004',
            title='Document Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(documents_folder)

        document_node = DocumentNode(
            source_id='uniqid005',
            title=
            'The Supreme Court\u2019s Ruling in Brown vs. Board of Education',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            files=[
                DocumentFile(
                    path=
                    './content/ricecooker-channel-files/brown-vs-board-of-education.pdf',
                    language=getlang('en').id)
            ])
        documents_folder.add_child(document_node)

        # HTML5 APPS
        html5apps_folder = TopicNode(
            source_id='uniqid006',
            title='HTML5App Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(html5apps_folder)

        html5_node = HTML5AppNode(
            source_id='uniqid007',
            title='HTMLWeb capabilities test',
            author='First Last (author\'s name)',
            description=
            'Tests different HTML/JS capabilities. What capabilities are allowed and disallowed by the sandboxed iframe used to render HTML5App nodes on Kolibri.',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail='./content/ricecooker-channel-files/html5_tests.jpg',
            files=[
                HTMLZipFile(
                    path='./content/ricecooker-channel-files/html5_tests.zip',
                    language=getlang('en').id)
            ])
        html5apps_folder.add_child(html5_node)

        html5_node2 = HTML5AppNode(
            source_id='uniqid008',
            title='Sample Vue.js app',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail='./content/ricecooker-channel-files/html5_vuejs.jpg',
            files=[
                HTMLZipFile(
                    path='./content/ricecooker-channel-files/html5_vuejs.zip',
                    language=getlang('en').id)
            ])
        html5apps_folder.add_child(html5_node2)

        # VIDEOS
        videos_folder = TopicNode(
            source_id='uniqid009',
            title='Video Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(videos_folder)
        video_node = VideoNode(
            source_id='uniqid010',
            title='Wave particle duality explained in 2 mins',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            derive_thumbnail=True,  # video-specicig flag
            thumbnail=None,
            files=[
                VideoFile(
                    path=
                    './content/ricecooker-channel-files/Wave_particle_duality.mp4',
                    language=getlang('en').id)
            ])
        videos_folder.add_child(video_node)

        youtube_id = 'VJyk81HmcZQ'
        video_node2 = VideoNode(
            source_id=youtube_id,  # usually set source_id to youtube_id
            title='Estimating division that results in non whole numbers',
            author='Sal Khan',
            description='Video description would go here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Khan Academy'),
            derive_thumbnail=True,  # video-specicig flag
            thumbnail=None,
            files=[
                YouTubeVideoFile(youtube_id=youtube_id,
                                 high_resolution=False,
                                 language='en'),
                YouTubeSubtitleFile(youtube_id=youtube_id, language='ko')
            ])
        videos_folder.add_child(video_node2)
예제 #33
0
def transform_tree(clean_tree, coursedir):
    course_id = clean_tree['course']
    course_title = clean_tree['display_name']
    course_thumbnail = os.path.join(coursedir, 'static',
                                    clean_tree['course_image'])
    if not os.path.exists(course_thumbnail):
        course_image_with_spaces = clean_tree['course_image'].replace('_', ' ')
        course_thumbnail = os.path.join(coursedir, 'static',
                                        course_image_with_spaces)

    course_dict = dict(
        kind=content_kinds.TOPIC,
        title=course_title,
        thumbnail=course_thumbnail,
        source_id=course_id,
        description='',
        language=getlang('ar').code,
        license=EDRAAK_LICENSE,
        children=[],
    )

    for chapter in clean_tree['children']:
        chapter_dict = dict(
            kind=content_kinds.TOPIC,
            title=chapter['display_name'],
            source_id=chapter['url_name'],
            description='',
            language=getlang('ar').code,
            license=EDRAAK_LICENSE,
            children=[],
        )
        course_dict['children'].append(chapter_dict)
        chapter_downloadable_resources = []

        for sequential in chapter['children']:

            # SPECIAL CASE: skip empty parent nodes of discussions
            if len(sequential['children']) == 0:
                LOGGER.debug('Skipping empty sequential ' + str(sequential))
                continue

            # DEFAULT CASE: process as regular topic node
            sequential_dict = dict(
                kind=content_kinds.TOPIC,
                title=sequential['display_name'],
                source_id=sequential['url_name'],
                description=sequential.get('description', ''),
                language=getlang('ar').code,
                license=EDRAAK_LICENSE,
                children=[],
            )
            chapter_dict['children'].append(sequential_dict)

            for vertical in sequential['children']:
                vertical_type = guess_vertical_type(vertical)

                if vertical_type in [
                        'knowledge_check_vertical', 'test_vertical'
                ]:
                    exercise_dict = transform_vertical_to_exercise(vertical)
                    if exercise_dict:
                        sequential_dict['children'].append(exercise_dict)
                elif vertical_type == 'video_vertical':
                    video_dict, downloadable_resources = transform_video_vertical(
                        vertical)
                    if video_dict:
                        sequential_dict['children'].append(video_dict)
                    chapter_downloadable_resources.extend(
                        downloadable_resources)
                elif vertical_type == 'html_vertical':
                    nodes, downloadable_resources = transform_html_vertical(
                        vertical)
                    if nodes:
                        sequential_dict['children'].extend(nodes)
                    chapter_downloadable_resources.extend(
                        downloadable_resources)
                else:
                    LOGGER.debug('skipping ' + vertical_type + ' url_name=' +
                                 vertical['url_name'])

        #
        if chapter_downloadable_resources:
            LOGGER.debug('  Packaging chapter_downloadable_resources')
            source_id = chapter['url_name'] + '-downloadable-resources'
            html5app_dict = dict(
                kind=content_kinds.HTML5,
                title=EDRAAK_STRINGS['downloadable_resources'],
                description=EDRAAK_STRINGS[
                    'downloadable_resources_description'],
                source_id=source_id,
                license=EDRAAK_LICENSE,
                language=getlang('ar').code,
                files=[],
            )
            zip_path = make_html5zip_from_resources(
                chapter_downloadable_resources, basefilename=source_id + '2')
            zip_file = dict(
                file_type=file_types.HTML5,
                path=zip_path,
                language=getlang('ar').code,
            )
            html5app_dict['files'].append(zip_file)
            chapter_dict['children'].append(html5app_dict)

    flattened_course_dict = flatten_transformed_tree(course_dict)
    return flattened_course_dict