def _build_tree(node, sourcetree):

    for child_source_node in sourcetree:
        try:
            main_file = child_source_node['files'][0] if 'files' in child_source_node else {}
            kind = guess_content_kind(path=main_file.get('path'), web_video_data=main_file.get('youtube_id') or main_file.get('web_url'), questions=child_source_node.get("questions"))
        except UnknownContentKindError:
            continue

        if kind == content_kinds.TOPIC:
            child_node = nodes.TopicNode(
                source_id=child_source_node["id"],
                title=child_source_node["title"],
                author=child_source_node.get("author"),
                description=child_source_node.get("description"),
                thumbnail=child_source_node.get("thumbnail"),
            )
            node.add_child(child_node)

            source_tree_children = child_source_node.get("children", [])

            _build_tree(child_node, source_tree_children)

        elif kind == content_kinds.EXERCISE:
            # node_data = json.dumps(child_source_node)
            if int(len(child_source_node['questions'])) < 5:
                exercise_data = {
                    'mastery_model': exercises.DO_ALL,
                    'randomize': True,
                }
            else:
                exercise_data={
                    'mastery_model': exercises.M_OF_N,
                    'randomize': True,
                    'm': 4,
                    'n': 5,
                }
            child_node = nodes.ExerciseNode(
                source_id=child_source_node["id"],
                title=child_source_node["title"],
                license=child_source_node.get("license"),
                author=child_source_node.get("author"),
                description=child_source_node.get("description"),
                exercise_data=exercise_data,
                copyright_holder='GreyKite Technologies Pvt. Ltd.',
                thumbnail=child_source_node.get("thumbnail"),
            )
    
            add_files(child_node, child_source_node.get("files") or [])
            for q in child_source_node.get("questions"):
                question = create_question(q)
                child_node.add_question(question)
            node.add_child(child_node)

        else:                   # unknown content file format
            continue

    return node
def fetch_assessment_topic_items(driver, topic_node, topic_url,
        topic_short_title, thumbnail=None):
    """Fetch the individual assessment items for a given topic.

    Groups every 5 assessments into an exercise node.
    """
    next_item_url = topic_url
    item_count = 0

    while next_item_url:
        driver.get(next_item_url)
        current_url = driver.current_url
        item_id = current_url.split('/')[-1]

        print('  Fetching question %s (%s)' % (item_count + 1, current_url))

        # Create exercise node, grouping together every 5 questions.
        if item_count % QUESTIONS_PER_EXERCISE == 0:
            first_item_index_in_exercise = item_count
            exercise_title = _title_exercise(topic_short_title, item_count + 1,
                    item_count + QUESTIONS_PER_EXERCISE)
            exercise_node = nodes.ExerciseNode(source_id=item_id,
                    title=exercise_title, license=LICENSE, thumbnail=thumbnail,
                    exercise_data={'randomize': False})
            topic_node.add_child(exercise_node)

        # Now try to convert the page HTML into an assessment item, retrying
        # on error, and then skipping any missing images after a few failed
        # retries.
        for i in range(0, 4):
            try:
                page_html = get_generated_html_from_driver(driver)
                question, next_item_url = fetch_assessment_item(page_html, item_id)
                break
            except Exception as e:
                wait_time = (2 ** i)
                print("Got an error, retrying after a wait of %s seconds. "
                        "Error was: %s" % (wait_time, str(e)))
                driver.get(current_url)
                time.sleep(wait_time)
                exception = e
        else:
            print("Going to try skipping any missing images")
            page_html = get_generated_html_from_driver(driver)
            question, next_item_url = fetch_assessment_item(page_html, item_id,
                    skip_missing_images=True)

        exercise_node.add_question(question)
        item_count += 1

    # Re-title the exercise, given that this is the last exercise in the topic,
    # which may not contain up to 5 items. (e.g. re-title it "Genetics 10-12")
    exercise_node.title = _title_exercise(topic_short_title,
            first_item_index_in_exercise + 1, item_count)
Exemple #3
0
def generate_pdf_nodes(data, topic, source=""):
    """
        Generates nodes related to pdfs
        Args:
            - data (dict) data on pdf details (split pdfs, file paths, exercises, etc.)
            - topic (TopicNode) node to add sub nodes to
            - source (str) unique string associated with this pdf
        Returns None
    """

    # Iterate through chapter data
    for chapter in data:
        # Create topics if we're dealing with a section
        if chapter.get('header'):
            source_id = "{}-{}".format(source, chapter['header'])
            subtopic = nodes.TopicNode(title=chapter['header'],
                                       source_id=source_id)
            topic.add_child(subtopic)
            generate_pdf_nodes(chapter['chapters'], subtopic, source=source_id)

        # Create a document node and its related exercise nodes if it's a document
        elif chapter.get("chapter"):
            # Create doucment node
            source_id = "{}-{}".format(source, chapter['chapter'])
            topic.add_child(
                nodes.DocumentNode(title=chapter['chapter'],
                                   source_id=source_id,
                                   copyright_holder=COPYRIGHT_HOLDER,
                                   license=LICENSE,
                                   files=[files.DocumentFile(chapter['path'])
                                          ]))

            # Create exercise nodes
            for index, exercise in enumerate(chapter.get("exercises") or []):
                exercise_id = "{} Exercise {}".format(source_id, index)
                exercise_node = nodes.ExerciseNode(
                    title=chapter['chapter'],
                    source_id=exercise_id,
                    description=exercise.get('description'),
                    copyright_holder=COPYRIGHT_HOLDER,
                    license=LICENSE,
                )
                topic.add_child(exercise_node)
                create_exercise_questions(exercise_node,
                                          exercise.get('questions') or [])
def _build_tree(node, sourcetree):
    """
    Parse nodes given in `sourcetree` and add as children of `node`.
    """
    for child_source_node in sourcetree:
        try:
            main_file = child_source_node['files'][0] if 'files' in child_source_node else {}
            kind = guess_content_kind(path=main_file.get('path'), web_video_data=main_file.get('youtube_id') or main_file.get('web_url'), questions=child_source_node.get("questions"))
        except UnknownContentKindError:
            continue

        if kind == content_kinds.TOPIC:
            child_node = nodes.TopicNode(
                source_id=child_source_node["id"],
                title=child_source_node["title"],
                author=child_source_node.get("author"),
                description=child_source_node.get("description"),
                thumbnail=child_source_node.get("thumbnail"),
            )
            node.add_child(child_node)

            source_tree_children = child_source_node.get("children", [])

            _build_tree(child_node, source_tree_children)

        elif kind == content_kinds.VIDEO:
            child_node = nodes.VideoNode(
                source_id=child_source_node["id"],
                title=child_source_node["title"],
                license=get_license(child_source_node.get("license"), description="Description of license"),
                author=child_source_node.get("author"),
                description=child_source_node.get("description"),
                derive_thumbnail=True, # video-specific data
                thumbnail=child_source_node.get('thumbnail'),
            )
            add_files(child_node, child_source_node.get("files") or [])
            node.add_child(child_node)

        elif kind == content_kinds.AUDIO:
            child_node = nodes.AudioNode(
                source_id=child_source_node["id"],
                title=child_source_node["title"],
                license=child_source_node.get("license"),
                author=child_source_node.get("author"),
                description=child_source_node.get("description"),
                thumbnail=child_source_node.get("thumbnail"),
            )
            add_files(child_node, child_source_node.get("files") or [])
            node.add_child(child_node)

        elif kind == content_kinds.DOCUMENT:
            child_node = nodes.DocumentNode(
                source_id=child_source_node["id"],
                title=child_source_node["title"],
                license=child_source_node.get("license"),
                author=child_source_node.get("author"),
                description=child_source_node.get("description"),
                thumbnail=child_source_node.get("thumbnail"),
            )
            add_files(child_node, child_source_node.get("files") or [])
            node.add_child(child_node)

        elif kind == content_kinds.EXERCISE:
            child_node = nodes.ExerciseNode(
                source_id=child_source_node["id"],
                title=child_source_node["title"],
                license=child_source_node.get("license"),
                author=child_source_node.get("author"),
                description=child_source_node.get("description"),
                exercise_data={}, # Just set to default
                thumbnail=child_source_node.get("thumbnail"),
            )
            add_files(child_node, child_source_node.get("files") or [])
            for q in child_source_node.get("questions"):
                question = create_question(q)
                child_node.add_question(question)
            node.add_child(child_node)

        elif kind == content_kinds.HTML5:
            child_node = nodes.HTML5AppNode(
                source_id=child_source_node["id"],
                title=child_source_node["title"],
                license=child_source_node.get("license"),
                author=child_source_node.get("author"),
                description=child_source_node.get("description"),
                thumbnail=child_source_node.get("thumbnail"),
            )
            add_files(child_node, child_source_node.get("files") or [])
            node.add_child(child_node)

        else:                   # unknown content file format
            continue

    return node
Exemple #5
0
def build_tree_from_json(parent_node, sourcetree):
    """
    Recusively parse nodes in the list `sourcetree` and add them as children
    to the `parent_node`. Usually called with `parent_node` being a `ChannelNode`.
    """
    EXPECTED_NODE_TYPES = [
        TOPIC_NODE, VIDEO_NODE, AUDIO_NODE, EXERCISE_NODE, DOCUMENT_NODE,
        HTML5_NODE
    ]

    for source_node in sourcetree:
        kind = source_node['kind']
        if kind not in EXPECTED_NODE_TYPES:
            LOGGER.critical('Unexpected node type found: ' + kind)
            raise NotImplementedError(
                'Unexpected node type found in json data.')

        if kind == TOPIC_NODE:
            child_node = nodes.TopicNode(
                source_id=source_node.get("source_id", None),
                title=source_node["title"],
                author=source_node.get("author"),
                description=source_node.get("description"),
                language=source_node.get('language', None),
                thumbnail=source_node.get("thumbnail"),
            )
            parent_node.add_child(child_node)
            source_tree_children = source_node.get("children", [])
            build_tree_from_json(child_node, source_tree_children)

        elif kind == VIDEO_NODE:
            child_node = nodes.VideoNode(
                source_id=source_node["source_id"],
                title=source_node["title"],
                license=get_license(**source_node['license']),
                author=source_node.get("author"),
                description=source_node.get("description"),
                language=source_node.get('language', None),
                derive_thumbnail=source_node.get(
                    'derive_thumbnail', True),  # video-specific option
                thumbnail=source_node.get('thumbnail'),
            )
            add_files(child_node, source_node.get("files") or [])
            parent_node.add_child(child_node)

        elif kind == AUDIO_NODE:
            child_node = nodes.AudioNode(
                source_id=source_node["source_id"],
                title=source_node["title"],
                license=get_license(**source_node['license']),
                author=source_node.get("author"),
                description=source_node.get("description"),
                language=source_node.get('language', None),
                thumbnail=source_node.get('thumbnail'),
            )
            add_files(child_node, source_node.get("files") or [])
            parent_node.add_child(child_node)

        elif kind == EXERCISE_NODE:
            child_node = nodes.ExerciseNode(
                source_id=source_node["source_id"],
                title=source_node["title"],
                license=get_license(**source_node['license']),
                author=source_node.get("author"),
                description=source_node.get("description"),
                language=source_node.get('language', None),
                thumbnail=source_node.get("thumbnail"),
                questions=[],
            )
            add_questions(child_node, source_node.get("questions") or [])
            parent_node.add_child(child_node)

        elif kind == DOCUMENT_NODE:
            child_node = nodes.DocumentNode(
                source_id=source_node["source_id"],
                title=source_node["title"],
                license=get_license(**source_node['license']),
                author=source_node.get("author"),
                description=source_node.get("description"),
                language=source_node.get('language', None),
                thumbnail=source_node.get("thumbnail"),
            )
            add_files(child_node, source_node.get("files") or [])
            parent_node.add_child(child_node)

        elif kind == HTML5_NODE:
            child_node = nodes.HTML5AppNode(
                source_id=source_node["source_id"],
                title=source_node["title"],
                license=get_license(**source_node['license']),
                author=source_node.get("author"),
                description=source_node.get("description"),
                language=source_node.get('language', None),
                thumbnail=source_node.get("thumbnail"),
            )
            add_files(child_node, source_node.get("files") or [])
            parent_node.add_child(child_node)

        else:
            LOGGER.critical("Encountered an unknown kind: " + str(source_node))
            continue

    return parent_node
Exemple #6
0
def build_tree_from_json(parent_node, sourcetree):
    """
    Recusively parse nodes in the list `sourcetree` and add them as children
    to the `parent_node`. Usually called with `parent_node` being a `ChannelNode`.
    """
    EXPECTED_NODE_TYPES = [TOPIC_NODE, VIDEO_NODE, AUDIO_NODE, EXERCISE_NODE,
                           DOCUMENT_NODE, HTML5_NODE, SLIDESHOW_NODE]

    for source_node in sourcetree:
        kind = source_node['kind']
        if kind not in EXPECTED_NODE_TYPES:
            LOGGER.critical('Unexpected node kind found: ' + kind)
            raise NotImplementedError('Unexpected node kind found in json data.')

        if kind == TOPIC_NODE:
            child_node = nodes.TopicNode(
                source_id=source_node.get('source_id', None),
                title=source_node['title'],
                description=source_node.get('description'),
                author=source_node.get('author'),
                aggregator=source_node.get('aggregator'),
                provider=source_node.get('provider'),
                # no role for topics (computed dynaically from descendants)
                language=source_node.get('language'),
                thumbnail=source_node.get('thumbnail'),
                tags=source_node.get('tags'),
            )
            parent_node.add_child(child_node)
            source_tree_children = source_node.get('children', [])
            build_tree_from_json(child_node, source_tree_children)

        elif kind == VIDEO_NODE:
            child_node = nodes.VideoNode(
                source_id=source_node['source_id'],
                title=source_node['title'],
                description=source_node.get('description'),
                license=get_license(**source_node['license']),
                author=source_node.get('author'),
                aggregator=source_node.get('aggregator'),
                provider=source_node.get('provider'),
                role=source_node.get('role', roles.LEARNER),
                language=source_node.get('language'),
                derive_thumbnail=source_node.get('derive_thumbnail', True),  # video-specific option
                thumbnail=source_node.get('thumbnail'),
                tags=source_node.get('tags'),
            )
            add_files(child_node, source_node.get('files') or [])
            parent_node.add_child(child_node)

        elif kind == AUDIO_NODE:
            child_node = nodes.AudioNode(
                source_id=source_node['source_id'],
                title=source_node['title'],
                description=source_node.get('description'),
                license=get_license(**source_node['license']),
                author=source_node.get('author'),
                aggregator=source_node.get('aggregator'),
                provider=source_node.get('provider'),
                role=source_node.get('role', roles.LEARNER),
                language=source_node.get('language'),
                thumbnail=source_node.get('thumbnail'),
                tags=source_node.get('tags'),
            )
            add_files(child_node, source_node.get('files') or [])
            parent_node.add_child(child_node)

        elif kind == EXERCISE_NODE:
            child_node = nodes.ExerciseNode(
                source_id=source_node['source_id'],
                title=source_node['title'],
                description=source_node.get('description'),
                license=get_license(**source_node['license']),
                author=source_node.get('author'),
                aggregator=source_node.get('aggregator'),
                provider=source_node.get('provider'),
                role=source_node.get('role', roles.LEARNER),
                language=source_node.get('language'),
                thumbnail=source_node.get('thumbnail'),
                tags=source_node.get('tags'),
                exercise_data=source_node.get('exercise_data'),
                questions=[],
            )
            add_questions(child_node, source_node.get('questions') or [])
            parent_node.add_child(child_node)

        elif kind == DOCUMENT_NODE:
            child_node = nodes.DocumentNode(
                source_id=source_node['source_id'],
                title=source_node['title'],
                description=source_node.get('description'),
                license=get_license(**source_node['license']),
                author=source_node.get('author'),
                aggregator=source_node.get('aggregator'),
                provider=source_node.get('provider'),
                role=source_node.get('role', roles.LEARNER),
                language=source_node.get('language'),
                thumbnail=source_node.get('thumbnail'),
                tags=source_node.get('tags'),
            )
            add_files(child_node, source_node.get('files') or [])
            parent_node.add_child(child_node)

        elif kind == HTML5_NODE:
            child_node = nodes.HTML5AppNode(
                source_id=source_node['source_id'],
                title=source_node['title'],
                description=source_node.get('description'),
                license=get_license(**source_node['license']),
                author=source_node.get('author'),
                aggregator=source_node.get('aggregator'),
                provider=source_node.get('provider'),
                role=source_node.get('role', roles.LEARNER),
                language=source_node.get('language'),
                thumbnail=source_node.get('thumbnail'),
                tags=source_node.get('tags'),
            )
            add_files(child_node, source_node.get('files') or [])
            parent_node.add_child(child_node)

        elif kind == SLIDESHOW_NODE:
            child_node = nodes.SlideshowNode(
                source_id=source_node['source_id'],
                title=source_node['title'],
                description=source_node.get('description'),
                license=get_license(**source_node['license']),
                author=source_node.get('author'),
                aggregator=source_node.get('aggregator'),
                provider=source_node.get('provider'),
                role=source_node.get('role', roles.LEARNER),
                language=source_node.get('language'),
                thumbnail=source_node.get('thumbnail'),
                tags=source_node.get('tags')
            )
            add_files(child_node, source_node.get('files') or [])
            parent_node.add_child(child_node)

        else:
            LOGGER.critical('Encountered an unknown kind: ' + str(source_node))
            continue

    return parent_node
Exemple #7
0
    def upload_content(self, data, access_token, channel):
        for language, language_value in data.items():
            # convert to title to apply title case for node titles
            language = language.title()
            language_node = nodes.TopicNode(title=language,
                                            source_id=language,
                                            author="TicTacLearn",
                                            description='',
                                            thumbnail=TTL_MAIN_LOGO,
                                            language=getlang_by_name(language))
            for grade, grade_value in language_value.items():
                grade_node = nodes.TopicNode(
                    title='Grade {}'.format(grade),
                    source_id="{}-{}".format(language, grade),
                    author="TicTacLearn",
                    description='',
                    thumbnail=TTL_MAIN_LOGO,
                    language=getlang_by_name(language))

                for subject, subject_value in grade_value.items():
                    subject = subject.title()
                    subject_node = nodes.TopicNode(
                        title=subject,
                        source_id="{}-{}-{}".format(language, grade, subject),
                        author="TicTacLearn",
                        description='',
                        thumbnail=TTL_MAIN_LOGO,
                        language=getlang_by_name(language))
                    for chapter, chapter_value in subject_value.items():
                        chapter = chapter.title()
                        chapter_node = nodes.TopicNode(
                            title=chapter,
                            source_id="{}-{}-{}-{}".format(
                                language, grade, subject, chapter),
                            author="TicTacLearn",
                            description='',
                            thumbnail=TTL_MAIN_LOGO,
                            language=getlang_by_name(language))
                        for topic, topic_value in chapter_value.items():
                            topic = topic.title()
                            if topic == "Chapter Assessment":
                                questions = self.create_question(
                                    topic_value.items())
                                exercise_node = nodes.ExerciseNode(
                                    source_id="{}-{}-{}-{}-{}".format(
                                        language, grade, subject, chapter,
                                        topic),
                                    title=topic,
                                    author="TicTacLearn",
                                    description="Chapter Assessment",
                                    language=getlang_by_name(language),
                                    license=licenses.CC_BYLicense(
                                        "TicTacLearn"),
                                    thumbnail=TTL_MAIN_LOGO,
                                    exercise_data={
                                        "mastery_model": exercises.M_OF_N,
                                        "m": len(questions),
                                        "n": len(questions),
                                        "randomize": True
                                    },
                                    questions=questions)
                                chapter_node.add_child(exercise_node)
                            else:
                                topic_node = nodes.TopicNode(
                                    title=topic,
                                    source_id="{}-{}-{}-{}-{}".format(
                                        language, grade, subject, chapter,
                                        topic),
                                    author="TicTacLearn",
                                    description='',
                                    thumbnail=TTL_MAIN_LOGO,
                                    language=getlang_by_name(language))
                                for content_type, content in topic_value.items(
                                ):
                                    if content_type == "video":
                                        for link, details in content.items():
                                            try:
                                                video_node = self.video_node_from_dropbox(
                                                    details, link,
                                                    access_token)
                                                topic_node.add_child(
                                                    video_node)
                                            except Exception as e:
                                                print(e)
                                                print(
                                                    "Error getting video from dropbox with link: {}"
                                                    .format(link))
                                                self.add_to_failed(
                                                    link, details,
                                                    content_type)
                                                continue
                                    else:
                                        # content type is assessment
                                        questions = self.create_question(
                                            content.items())
                                        exercise_node = nodes.ExerciseNode(
                                            source_id=
                                            "{}-{}-{}-{}-{}-Assessment".format(
                                                language, grade, subject,
                                                chapter, topic),
                                            title="{} Assessment".format(
                                                topic),
                                            author="TicTacLearn",
                                            description="{} Assessment".format(
                                                topic),
                                            license=licenses.CC_BYLicense(
                                                "TicTacLearn"),
                                            thumbnail=TTL_MAIN_LOGO,
                                            exercise_data={
                                                "mastery_model":
                                                exercises.M_OF_N,
                                                "m": len(questions),
                                                "n": len(questions),
                                                "randomize": True
                                            },
                                            questions=questions)
                                        topic_node.add_child(exercise_node)

                                chapter_node.add_child(topic_node)
                        subject_node.add_child(chapter_node)
                    grade_node.add_child(subject_node)
                language_node.add_child(grade_node)
            channel.add_child(language_node)

        return channel
def convert_ka_node_to_ricecooker_node(ka_node):

    if ka_node.slug in SLUG_BLACKLIST:
        return None

    if isinstance(ka_node, KhanTopic):
        topic = nodes.TopicNode(
            source_id=ka_node.id,
            title=ka_node.title,
            description=ka_node.description[:400],
        )
        for ka_subtopic in ka_node.children:
            subtopic = convert_ka_node_to_ricecooker_node(ka_subtopic)
            if subtopic:
                topic.add_child(subtopic)
        return topic

    elif isinstance(ka_node, KhanExercise):
        exercise = nodes.ExerciseNode(
            source_id=ka_node.id,
            title=ka_node.title,
            description=ka_node.description[:400],
            # exercise_data={'mastery_model': node.get('suggested_completion_criteria')},
            license=licenses.SpecialPermissionsLicense(
                copyright_holder="Khan Academy",
                description=
                "Permission granted to distribute through Kolibri for non-commercial use"
            ),  # need to formalize with KA
            thumbnail=node.thumbnail,
        )
        for ka_assessment_item in ka_node.get_assessment_items():
            assessment_item = PerseusQuestion(
                id=assessment_item.id,
                raw_data=assessment_item.data,
                source_url=assessment_item.source_url,
            )
            exercise.add_question(assessment_item)
        return exercise

    elif isinstance(ka_node, KhanVideo):

        # TODO: Use traditional compression here to avoid breaking existing KA downloads?
        files = [
            VideoFile(
                ka_node.download_urls.get("mp4-low",
                                          ka_node.download_urls.get("mp4")))
        ]

        # if the video is in English, include any subtitles available along with it
        if ka_node.lang == "en":
            for lang_code in ka_node.get_subtitle_languages():
                files.append(YouTubeSubtitleFile(node.id, language=lang_code))

        # convert KA's license format into our own license classes
        if ka_node.license in LICENSE_MAPPING:
            license = LICENSE_MAPPING[ka_node.license]
        else:
            # license = licenses.CC_BY_NC_SA # or?
            raise Exception("Unknown license on video {}: {}".format(
                ka_node.id, ka_node.license))

        video = nodes.VideoNode(
            source_id=ka_node.id,
            title=ka_node.title,
            description=ka_node.description[:400],
            license=license,
            thumbnail=node.thumbnail,
            files=files,
        )

        return video

    elif isinstance(ka_node, KhanArticle):
        # TODO
        return None