Esempio n. 1
0
def html_invalid_zip(html_data, html_invalid_file):
    args_data = get_content_node_args(html_data)
    contentnode_kwargs = get_content_node_kwargs(html_data)
    contentnode_kwargs['files'] = []  # clear files because added one above
    html = HTML5AppNode(*args_data, **contentnode_kwargs)
    html.add_file(html_invalid_file)
    return html
def add_node_document(booklist, level_topic, as_booklist):
    # Add books according to level, language and publisher
    for item in booklist:
        # initailize the source domain and content_id
        domain = uuid.uuid5(uuid.NAMESPACE_DNS, 'storyweaver.org.in')
        book_id = str(item['source_id'])
        """ 
        If the publisher is AS and the book is found, 
        then change the source_domain and content_id
        """
        if item['publisher'] == 'African Storybook Initiative':
            check = check_if_story_in_AS(as_booklist, item['title'])
            if check[0] == True:
                domain = uuid.uuid5(uuid.NAMESPACE_DNS,
                                    'www.africanstorybook.org')
                book_id = check[1]

        link = get_html5_app_zip_path(item['slug'])
        if link:
            html5_file = HTMLZipFile(path=link)
            book = HTML5AppNode(
                title=item['title'],
                source_id=book_id,
                author=item['author'],
                files=[html5_file],
                license=get_license(licenses.CC_BY,
                                    copyright_holder='Pratham Books'),
                thumbnail=item.get('thumbnail'),
                description=item['description'],
                domain_ns=domain,
            )
            level_topic.add_child(book)
Esempio n. 3
0
    def construct_channel(self, **kwargs):
        channel = self.get_channel(**kwargs)
        old_title = None
        old_group = None
        i = 0

        for metadata, zfilename, [title, group] in index.no_dl_index():
            i = i + 1
            if title != old_title:
                old_title = title
                title_node = TopicNode(source_id=title + str(i),
                                       title=replace(title))
                channel.add_child(title_node)
                old_group = None
            if group != old_group:
                old_group = group
                group_node = TopicNode(source_id=title + group + str(i + 0.5),
                                       title=replace(group))
                title_node.add_child(group_node)

            doc_node = HTML5AppNode(
                title=metadata.title,
                description=metadata.description,
                source_id=zfilename + str(i + 0.9),
                license=LICENCE,
                language='es',
                files=[HTMLZipFile(path=zfilename)],
            )

            group_node.add_child(doc_node)
        return channel
Esempio n. 4
0
def download_wikipedia_page(url, thumbnail, title):
    # create a temp directory to house our downloaded files
    destpath = tempfile.mkdtemp()

    # downlod the main wikipedia page, apply a middleware processor, and call it index.html
    localref, _ = download_file(
        url,
        destpath,
        filename="index.html",
        middleware_callbacks=process_wikipedia_page,
        request_fn=make_request,
    )

    # turn the temp folder into a zip file
    zippath = create_predictable_zip(destpath)

    # create an HTML5 app node
    html5app = HTML5AppNode(
        files=[HTMLZipFile(zippath)],
        title=title,
        thumbnail=thumbnail,
        source_id=url.split("/")[-1],
        license=licenses.PublicDomainLicense(),
    )

    return html5app
Esempio n. 5
0
    def construct_channel(self, **kwargs):
        channel = self.get_channel(**kwargs)
        for name, _id in catnum.items():
            cat_node = TopicNode(source_id=str(_id), title=name)
            channel.add_child(cat_node)
            links = crawl.get_all_links(_id)
            for link in list(links):
                zipfilename, title = localise.zip_from_url(link)
                appzip = HTMLZipFile(zipfilename)
                if os.path.exists(zipfilename + "_2.jpg"):
                    thumb = ThumbnailFile(zipfilename + "_2.jpg")
                else:
                    thumb = None
                zipnode = HTML5AppNode(source_id=link,
                                       title=title,
                                       license=licenses.CC_BY,
                                       copyright_holder=CHANNEL_NAME,
                                       files=[appzip],
                                       author=crawl.author_lookup[link],
                                       thumbnail=thumb)
                zipnode.validate()
                cat_node.add_child(zipnode)

            cat_node.validate()
        print("DONE")
        return channel
 def get_things(all_things, parent_node, new_node=True):
     for thing in all_things:
         _id = thing.url.strip('/').split('/')[-1] # TODO hash
         if new_node:
             this_node = TopicNode(source_id = thing.url,
                                    title=thing.title)
         else:
             this_node = parent_node
         content_node = make_youtube_video(thing.youtube, "Video: {}".format(thing.title), "video__{}".format(thing.url)) # TODO hash
         if content_node is not None:
             this_node.add_child(content_node)
        
         try:
             os.mkdir('html')
         except Exception:
             pass
         fn = "html/{}.zip".format(_id)
         with open(fn, "wb") as f:
             f.write(thing.app)
         app_zip = HTMLZipFile(fn)
         if thing.title[0] in "AEIOUaeiou":
             an = "an"
         else:
             an = "a" 
         app_node = HTML5AppNode(source_id = "app_{}".format(thing.url),
                                 title = "Being {} {}".format(an, thing.title),
                                 license = LICENCE,
                                 files=[app_zip])
     
         this_node.add_child(app_node)
         if new_node:
             parent_node.add_child(this_node)
def linkAssignment(linkData):
    #Get URL and Title from JSON info
    url = linkData['attachments']['links']['link'][0]['url']
    title = linkData['attachments']['links']['link'][0]['title']

    #Make session and request to get HTML
    session = requests.Session()
    session.headers[
        "User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"
    html = session.get(url).content

    #HTML parser
    soup = bs4.BeautifulSoup(html, "html.parser")

    #Path for folder to hold content
    global zipId
    filename = 'myzipper' + str(zipId)
    print("\n\n\n" + filename + "\n\n\n")
    zipId = zipId + 1

    #Delete folder if it already exists
    if (os.path.exists(filename)):
        shutil.rmtree(filename)
        #os.unlink(filename)

    #Download all assets(html, css, js,...) from url
    doc = download_static_assets(soup,
                                 filename,
                                 url,
                                 request_fn=make_request,
                                 url_blacklist=url_blacklist)

    # Write out the HTML source.
    with open(os.path.join(filename, "index.html"), "w",
              encoding="utf-8") as f:
        f.write(str(doc))

    #Outputs files being downloaded
    print("        ... downloaded to %s" % filename)

    filesCreated.append(filename)

    #Make zip file from folder contents
    shutil.make_archive(filename, 'zip', filename)

    filesCreated.append(filename + '.zip')

    #Creation of file and node
    link_file = HTMLZipFile(path=(filename + '.zip'))
    link_node = HTML5AppNode(source_id=url,
                             title=title,
                             license=get_license(
                                 licenses.CC_BY,
                                 copyright_holder='Copyright holder name'),
                             language=getlang('en').id,
                             derive_thumbnail=False,
                             thumbnail=None,
                             files=[link_file])
    return link_node
Esempio n. 8
0
 def node(self):
     return HTML5AppNode(source_id=self.url,
                         title=self.title(),
                         license=self.get_license(),
                         copyright_holder=copyright_holder,
                         thumbnail=self.thumb(),
                         files=[HTMLZipFile(self.zipname)],
                         **self.metadata)
Esempio n. 9
0
def html(html_file, html_data, channel):
    args_data = get_content_node_args(html_data)
    contentnode_kwargs = get_content_node_kwargs(html_data)
    html = HTML5AppNode(*args_data, **contentnode_kwargs)
    html.add_file(html_file)
    channel.add_child(html)
    html_data['files'].append(html_file)  # save it so we can compare later
    return html
Esempio n. 10
0
def html_invalid_files(html_data, document_file):
    """
    Invalid because adding a PDF document to a HTML5AppNode.
    """
    args_data = get_content_node_args(html_data)
    contentnode_kwargs = get_content_node_kwargs(html_data)
    contentnode_kwargs['files'] = []  # clear files becuse added one above
    html = HTML5AppNode(*args_data, **contentnode_kwargs)
    html.add_file(document_file)
    return html
Esempio n. 11
0
 def test_generate_thumbnail_from_html(self, html_file):
     node = HTML5AppNode('html-src-id',
                         "HTML5 App",
                         licenses.PUBLIC_DOMAIN,
                         thumbnail=None)
     node.add_file(html_file)
     config.THUMBNAILS = True
     filenames = node.process_files()
     assert len(filenames) == 2, 'expected two filenames'
     self.check_has_thumbnail(node)
Esempio n. 12
0
def linkAssignment(material):
    url = material["link"]["url"]
    session = requests.Session()
    session.headers[
        "User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"
    html = session.get(url).content
    soup = bs(html, "html.parser")

    script_files = []
    css_files = []

    for script in soup.find_all("script"):
        if script.attrs.get("src"):
            # if the tag has the attribute 'src'
            script_url = urljoin(url, script.attrs.get("src"))
            script_files.append(script_url)

    for css in soup.find_all("link"):
        if css.attrs.get("href"):
            css_url = urljoin(url, css.attrs.get("href"))
            css_files.append(css_url)

    with HTMLWriter('./myzipper.zip') as zipper:
        with open("index.html", "w", encoding="utf-8") as f:
            index_content = soup.prettify()
            zipper.write_index_contents(index_content)

        with open("javascript_files.js") as f:
            for js_file in script_files:
                script_path = zipper.write_url(js_file,
                                               "scripts.js",
                                               directory="src")
                script = "<script src='{}' type='text/javascript'></script>".format(
                    script_path)

        with open("css_files.css", "w") as f:
            for css_file in css_files:
                print(css_file, file=f)
                css_path = zipper.write_url(css_file,
                                            "style.css",
                                            directory="styles")
                extra_head = "<link href='{}' rel='stylesheet'></link>".format(
                    css_path)

    link_file = HTMLZipFile(path='./myzipper.zip')
    link_node = HTML5AppNode(source_id=material["link"]["url"],
                             title=material["link"]["title"],
                             license=get_license(
                                 licenses.CC_BY,
                                 copyright_holder='Copyright holder name'),
                             language=getlang('en').id,
                             derive_thumbnail=False,
                             thumbnail=None,
                             files=[link_file])
    return link_node
Esempio n. 13
0
 def test_non_existent_htmlzip_fails(self):
     node = HTML5AppNode('doc-src-id',
                         "Document",
                         licenses.PUBLIC_DOMAIN,
                         thumbnail=None)
     non_existent_path = 'does/not/exist.zip'
     html_file = HTMLZipFile(non_existent_path, language='en')
     node.add_file(html_file)
     config.THUMBNAILS = True
     filenames = node.process_files()
     assert filenames == [None
                          ], 'expected one None filename (the broken zip)'
     assert len(config.FAILED_FILES) == 1, 'expected one failed file'
def handle_lesson(page):
    def makefolder():
        os.mkdir(localise.DOWNLOAD_FOLDER)

    video_nodes = []
    images = []
    html = requests.get(page.url).content  # html is bytes; ≤/≥ -
    html = html.decode(
        'utf-8')  # force conversion to UTF-8 because lxml isn't.
    root = lxml.html.fromstring(html)
    article = root.xpath("//article[@id='article']")[0]
    videos = article.xpath("//iframe")
    for element in article.xpath("//div[@class='small related']"):
        element.drop_tree()
    for element in article.xpath("//div[@id='share']"):
        element.drop_tree()
    for video in videos:
        node, nodehash = get_video_node(video.attrib['src'])
        if nodehash:  # skip if no filename because video probably broke
            # continue # skip video for now
            video_nodes.append(node)
            video.attrib['src'] = "/content/storage/{}/{}/{}".format(
                nodehash[0], nodehash[1], nodehash)
            video.attrib['localise'] = "skip"
            video.attrib['controls'] = "True"
            video.tag = "video"

    article_out = lxml.html.tostring(article)
    article_decode = article_out.decode('utf-8')

    new_html = template.replace("{name}",
                                page.name).replace("{article}", article_decode)
    local_soup = localise.make_local_html(BeautifulSoup(new_html, "html5lib"),
                                          page.url, makefolder)
    tex_html = local_soup.prettify()
    svg_html = tex_to_svg.html_to_svg(tex_html)

    with open(localise.DOWNLOAD_FOLDER + "/index.html", "wb") as f:
        f.write(svg_html)

    zip_name = localise.finalise_zip_file(page.url)
    zip_file = HTMLZipFile(zip_name)
    zip_node = HTML5AppNode(source_id=page.url,
                            title=page.name,
                            license=CC_BY_NC_ND,
                            copyright_holder='Mathplanet',
                            files=[zip_file])
    return zip_node, video_nodes
    def download_content(self, parent, link, params, selected_category, start):
        """
        Parse each content page.
        """
        params["start"] = start
        params.pop("format")

        # Parse each page of the result
        resp = downloader.make_request("{}/itemlist/filter".format(link),
                                       params=params)
        soup = BeautifulSoup(resp.content, "html.parser")

        # Find the all the content in each page
        for item in soup.find("tbody").find_all("a"):
            content_url = "http://proyectodescartes.org{}".format(item["href"])
            title = item.text.strip()
            source_id = item["href"].split("/")[-1]

            # Parse each content's page
            response = downloader.make_request(content_url)
            page = BeautifulSoup(response.content, "html.parser")

            thumbnail_url = "http://proyectodescartes.org{}".format(
                page.find("div", class_="itemFullText").find("img")["src"])
            author = self.get_content_author(page)
            zip_path = self.get_content_zip(page)
            if not zip_path:
                LOGGER.info(
                    "The url for the zip file does not exist in this page: {}".
                    format(content_url))
                continue

            content_node = HTML5AppNode(
                source_id=source_id,
                title=title,
                license=CC_BY_NC_SALicense(
                    copyright_holder="Proyecto Descartes"),
                language=CHANNEL_LANGUAGE,
                files=[files.HTMLZipFile(zip_path)],
                author=author,
                thumbnail=thumbnail_url,
            )

            parent.add_child(content_node)
def get_contents(parent, path):
    doc = get_page(path)
    try:
        menu_row = doc.find('div', {'id': 'row-exu'})
    except Exception as e:
        LOGGER.error('get_contents: %s : %s' % (e, doc))
        return
    for content in menu_row.find_all('div', {'class': 'col-md-3'}):
        try:
            title = content.find('div', {'class': 'txtline'}).get_text()
            thumbnail = content.find('a').find('img')['src']
            thumbnail = get_absolute_path(thumbnail)
            main_file, master_file, source_id = get_content_link(content)
            LOGGER.info('      content: %s: %s' % (source_id, title))
            if main_file.endswith('mp4'):
                video = VideoNode(title=title,
                                  source_id=source_id,
                                  license=licenses.PUBLIC_DOMAIN,
                                  thumbnail=thumbnail,
                                  files=[VideoFile(main_file)])
                parent.add_child(video)
            elif main_file.endswith('pdf'):
                pdf = DocumentNode(title=title,
                                   source_id=source_id,
                                   license=licenses.PUBLIC_DOMAIN,
                                   thumbnail=thumbnail,
                                   files=[DocumentFile(main_file)])
                parent.add_child(pdf)
            elif main_file.endswith('html') and master_file.endswith('zip'):
                zippath = get_zip_file(master_file, main_file)
                if zippath:
                    html5app = HTML5AppNode(
                        title=title,
                        source_id=source_id,
                        license=licenses.PUBLIC_DOMAIN,
                        thumbnail=thumbnail,
                        files=[HTMLZipFile(zippath)],
                    )
                    parent.add_child(html5app)
            else:
                LOGGER.error('Content not supported: %s, %s' %
                             (main_file, master_file))
        except Exception as e:
            LOGGER.error('get_contents: %s : %s' % (e, content))
Esempio n. 17
0
def make_random_subtree(parent, depth):
    for i in range(45):
        istr = str(i)
        title = ''.join(
            random.choice(string.ascii_uppercase + string.digits)
            for _ in range(10))
        description = ''.join(
            random.choice(string.ascii_uppercase + string.digits)
            for _ in range(100))

        typ = random.choice("tttttttvadh")

        if typ == 't':
            topic = TopicNode(
                source_id=title + istr,
                title=title,
                description=description,
                author=None,
                language=getlang('en').id,
                thumbnail=None,
            )
            parent.add_child(topic)

            if depth > 0:
                make_random_subtree(topic, depth - 1)

        elif typ == 'a':

            content11a = AudioNode(
                source_id='940ac8ff' + istr,
                title='Whale sounds',
                author='First Last (author\'s name)',
                description='Put file description here',
                language=getlang('en').id,
                license=get_license(licenses.CC_BY,
                                    copyright_holder='Copyright holder name'),
                thumbnail=None,
                files=[],
            )
            parent.add_child(content11a)
            audio_file = AudioFile(
                path='./content/ricecooker-channel-files/Whale_sounds.mp3',
                language=getlang('en').id)
            content11a.add_file(audio_file)

        elif typ == 'd':

            content12a = DocumentNode(
                source_id='80b7136f' + istr,
                title=
                'The Supreme Court\u2019s Ruling in Brown vs. Board of Education',
                author='First Last (author\'s name)',
                description='Put file description here',
                language=getlang('en').id,
                license=get_license(licenses.CC_BY,
                                    copyright_holder='Copyright holder name'),
                thumbnail=None,
                files=[
                    DocumentFile(
                        path=
                        './content/ricecooker-channel-files/commonlit_the-supreme-court-s-ruling-in-brown-vs-board-of-education_student.pdf',
                        language=getlang('en').id)
                ])
            parent.add_child(content12a)

        elif typ == 'h':

            content13a = HTML5AppNode(
                source_id='302723b4' + istr,
                title='Sample React app',
                author='First Last (author\'s name)',
                description='Put file description here',
                language=getlang('en').id,
                license=get_license(licenses.CC_BY,
                                    copyright_holder='Copyright holder name'),
                thumbnail='./content/ricecooker-channel-files/html5_react.jpg',
                files=[
                    HTMLZipFile(
                        path=
                        './content/ricecooker-channel-files/html5_react.zip',
                        language=getlang('en').id)
                ])
            parent.add_child(content13a)

        elif type == 'v':

            content14a = VideoNode(
                source_id='9e355995',
                title='Wave particle duality explained in 2 mins',
                author='First Last (author\'s name)',
                description='Put file description here',
                language=getlang('en').id,
                license=get_license(licenses.CC_BY,
                                    copyright_holder='Copyright holder name'),
                derive_thumbnail=True,  # video-specicig flag
                thumbnail=None,
                files=[
                    VideoFile(
                        path=
                        './content/ricecooker-channel-files/Wave_particle_duality.mp4',
                        language=getlang('en').id)
                ])
            parent.add_child(content14a)
Esempio n. 18
0
    def construct_channel(self, *args, **kwargs):
        """
        Create ChannelNode and build topic tree.
        """
        channel = self.get_channel(
            *args,
            **kwargs)  # create ChannelNode from data in self.channel_info

        topic1 = TopicNode(
            source_id='121232ms',
            title='Content Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        channel.add_child(topic1)

        # HTML5 APPS
        topic13 = TopicNode(
            source_id='asasa331',
            title='HTML5App Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        topic1.add_child(topic13)

        content13a = HTML5AppNode(
            source_id='302723b4',
            title='Shared Zip File app',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            files=[
                HTMLZipFile(path='./content/zipfiles/shared.zip',
                            language=getlang('en').id)
            ])
        topic13.add_child(content13a)

        content13b = HTML5AppNode(
            source_id='302723b5',
            title='Thin app 1',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            files=[
                HTMLZipFile(path='./content/zipfiles/thinapp1.zip',
                            language=getlang('en').id)
            ])
        topic13.add_child(content13b)

        raise_for_invalid_channel(channel)
        return channel
Esempio n. 19
0
    def download_sim(self, topic, sim, keywords, language):
        """
        Download, zip, and add a node for a sim, as well as any associated video.
        """

        localized_sim = sim["localizedSimulations"][0]

        print("\tProcessing sim:", localized_sim["title"])

        dst = tempfile.mkdtemp()
        download_file(
            localized_sim["downloadUrl"],
            dst,
            filename="index.html",
            request_fn=sess.get,
            middleware_callbacks=[process_sim_html],
        )

        zippath = create_predictable_zip(dst)

        authors = re.sub(" \(.*?\)", "", sim["credits"]["designTeam"])
        authors = re.sub("<br\/?>", ", ", authors)

        title = localized_sim["title"]
        if language == "ar":
            if title in ARABIC_NAME_CATEGORY:
                title = ARABIC_NAME_CATEGORY[title]
            if title in SIM_TYPO:
                title = SIM_TYPO[title]

        # create a node for the sim
        simnode = HTML5AppNode(
            source_id="sim-%d" % localized_sim["id"],
            files=[HTMLZipFile(zippath)],
            title=title,
            description=sim["description"][language][:200],
            license=CC_BYLicense(
                "PhET Interactive Simulations, University of Colorado Boulder"
            ),
            # author=authors,
            # tags=[keywords[topic] for topic in sim["topicIds"]],
            thumbnail=sim["media"]["thumbnailUrl"],
            language=getlang(language),
        )

        # if there's a video, extract it and put it in the topic right before the sim
        videos = sim["media"]["vimeoFiles"]
        if videos:
            video_url = [v for v in videos
                         if v.get("height") == 540][0]["link"]

            videonode = VideoNode(
                source_id="video-%d" % localized_sim["id"],
                files=[VideoFile(video_url)],
                title="Video: %s" % localized_sim["title"],
                license=CC_BYLicense(
                    "PhET Interactive Simulations, University of Colorado Boulder"
                ),
                thumbnail=sim["media"]["thumbnailUrl"],
            )

            topic.add_child(videonode)

        # add the sim node into the topic
        topic.add_child(simnode)
Esempio n. 20
0
    def create_content_nodes(self, channel):
        """
        This function uses the methods `add_child` and `add_file` to build the
        hierarchy of topic nodes (nested folder structure) and content nodes.
        Every content node is associated with one or more files.
        """
        content_nodes_folder = TopicNode(
            source_id='uniqid001',
            title='Content Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        channel.add_child(content_nodes_folder)

        # AUDIO
        audio_nodes_folder = TopicNode(
            source_id='uniqid002',
            title='Audio Files Folder',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(audio_nodes_folder)

        audio_node = AudioNode(
            source_id='uniqid003',
            title='Whale sounds',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            files=[],
        )
        audio_nodes_folder.add_child(audio_node)
        audio_file = AudioFile(
            path=
            './content/ricecooker-channel-files/Whale_sounds.mp3',  # note path can also be a URL
            language=getlang('en').id)
        audio_node.add_file(audio_file)

        # DOCUMENTS
        documents_folder = TopicNode(
            source_id='uniqid004',
            title='Document Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(documents_folder)

        document_node = DocumentNode(
            source_id='uniqid005',
            title=
            'The Supreme Court\u2019s Ruling in Brown vs. Board of Education',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            files=[
                DocumentFile(
                    path=
                    './content/ricecooker-channel-files/brown-vs-board-of-education.pdf',
                    language=getlang('en').id)
            ])
        documents_folder.add_child(document_node)

        # HTML5 APPS
        html5apps_folder = TopicNode(
            source_id='uniqid006',
            title='HTML5App Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(html5apps_folder)

        html5_node = HTML5AppNode(
            source_id='uniqid007',
            title='HTMLWeb capabilities test',
            author='First Last (author\'s name)',
            description=
            'Tests different HTML/JS capabilities. What capabilities are allowed and disallowed by the sandboxed iframe used to render HTML5App nodes on Kolibri.',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail='./content/ricecooker-channel-files/html5_tests.jpg',
            files=[
                HTMLZipFile(
                    path='./content/ricecooker-channel-files/html5_tests.zip',
                    language=getlang('en').id)
            ])
        html5apps_folder.add_child(html5_node)

        html5_node2 = HTML5AppNode(
            source_id='uniqid008',
            title='Sample Vue.js app',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail='./content/ricecooker-channel-files/html5_vuejs.jpg',
            files=[
                HTMLZipFile(
                    path='./content/ricecooker-channel-files/html5_vuejs.zip',
                    language=getlang('en').id)
            ])
        html5apps_folder.add_child(html5_node2)

        # VIDEOS
        videos_folder = TopicNode(
            source_id='uniqid009',
            title='Video Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(videos_folder)
        video_node = VideoNode(
            source_id='uniqid010',
            title='Wave particle duality explained in 2 mins',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            derive_thumbnail=True,  # video-specicig flag
            thumbnail=None,
            files=[
                VideoFile(
                    path=
                    './content/ricecooker-channel-files/Wave_particle_duality.mp4',
                    language=getlang('en').id)
            ])
        videos_folder.add_child(video_node)

        youtube_id = 'VJyk81HmcZQ'
        video_node2 = VideoNode(
            source_id=youtube_id,  # usually set source_id to youtube_id
            title='Estimating division that results in non whole numbers',
            author='Sal Khan',
            description='Video description would go here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Khan Academy'),
            derive_thumbnail=True,  # video-specicig flag
            thumbnail=None,
            files=[
                YouTubeVideoFile(youtube_id=youtube_id,
                                 high_resolution=False,
                                 language='en'),
                YouTubeSubtitleFile(youtube_id=youtube_id, language='ko')
            ])
        videos_folder.add_child(video_node2)
Esempio n. 21
0
    resource_urls = [
        urljoin(top_url, x)
        for x in root.xpath("//h2[@class='resource-listing__title']/a/@href")
    ]
    resource_titles = [
        x.text_content().strip()
        for x in root.xpath("//h2[@class='resource-listing__title']/a")
    ]
    resources.append([resource_title, zip(resource_titles, resource_urls)])

# get html apps
import localise

apps = []
for url, title in urls.items():
    root = lxml.html.fromstring(requests.get(urljoin(top_url, url)).content)
    container, = root.xpath("//section[@class='page__content']")
    drop, = container.xpath(".//div[@id='resource-listing-container']")
    drop.getparent().remove(drop)
    html = lxml.html.tostring(container)
    print(html)
    soup = BeautifulSoup(html, "html5lib")
    zip_ = localise.make_local(soup, url)
    app_file = HTMLZipFile(zip_)
    app = HTML5AppNode(source_id="app_{}".format(url),
                       title=title,
                       license=LICENCE,
                       files=[app_file],
                       role=COACH)
    apps.append(app)
Esempio n. 22
0
    def create_content_nodes(self, channel):
        """
        This function uses the methods `add_child` and `add_file` to build the
        hierarchy of topic nodes and content nodes. Every content node is associated
        with the underlying file node.
        """
        content_nodes_folder = TopicNode(
            source_id='121232ms',
            title='Content Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        channel.add_child(content_nodes_folder)

        # AUDIO
        audio_nodes_folder = TopicNode(
            source_id='138iuh23iu',
            title='Audio Files',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(audio_nodes_folder)

        audio_node = AudioNode(
            source_id='940ac8ff',
            title='Whale sounds',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            files=[],
        )
        audio_nodes_folder.add_child(audio_node)
        audio_file = AudioFile(
            path='./content/ricecooker-channel-files/Whale_sounds.mp3',
            language=getlang('en').id)
        audio_node.add_file(audio_file)

        # DOCUMENTS
        documents_folder = TopicNode(
            source_id='asanlksnaklsn',
            title='Document Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(documents_folder)

        document_node = DocumentNode(
            source_id='80b7136f',
            title=
            'The Supreme Court\u2019s Ruling in Brown vs. Board of Education',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            files=[
                DocumentFile(
                    path=
                    './content/ricecooker-channel-files/commonlit_the-supreme-court-s-ruling-in-brown-vs-board-of-education_student.pdf',
                    language=getlang('en').id)
            ])
        documents_folder.add_child(document_node)

        # HTML5 APPS
        html5apps_folder = TopicNode(
            source_id='asasa331',
            title='HTML5App Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(html5apps_folder)

        html5_node_a = HTML5AppNode(
            source_id='302723b4',
            title='Sample React app',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail='./content/ricecooker-channel-files/html5_react.jpg',
            files=[
                HTMLZipFile(
                    path='./content/ricecooker-channel-files/html5_react.zip',
                    language=getlang('en').id)
            ])
        html5apps_folder.add_child(html5_node_a)

        html5_node_b = HTML5AppNode(
            source_id='3f91184e',
            title='Sample Vue.js app',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail='./content/ricecooker-channel-files/html5_vuejs.jpg',
            files=[
                HTMLZipFile(
                    path='./content/ricecooker-channel-files/html5_vuejs.zip',
                    language=getlang('en').id)
            ])
        html5apps_folder.add_child(html5_node_b)

        html5_node_c = HTML5AppNode(
            source_id='0aec4296',
            title='Sample wget-scraped web content',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=
            './content/ricecooker-channel-files/html5_wget_scraped.jpg',
            files=[
                HTMLZipFile(
                    path=
                    './content/ricecooker-channel-files/html5_wget_scraped.zip',
                    language=getlang('en').id)
            ])
        html5apps_folder.add_child(html5_node_c)

        # VIDEOS
        videos_folder = TopicNode(
            source_id='121213m3m3',
            title='Video Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(videos_folder)
        video_node = VideoNode(
            source_id='9e355995',
            title='Wave particle duality explained in 2 mins',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            derive_thumbnail=True,  # video-specicig flag
            thumbnail=None,
            files=[
                VideoFile(
                    path=
                    './content/ricecooker-channel-files/Wave_particle_duality.mp4',
                    language=getlang('en').id)
            ])
        videos_folder.add_child(video_node)
Esempio n. 23
0
for link, title in link_pairs:
    app_response = requests.get(urljoin(top_url, link))
    root = lxml.html.fromstring(requests.get(urljoin(top_url, link)).content)
    container, = root.xpath("//div[@class='college-major__body wysiwyg']")
    roles = [x.text_content().strip()  for x in root.xpath("//a[@class='career-wrapper']")]

    h4 = lxml.html.Element("h4")
    h4.text = "Related Careers"
    container.append(h4)
    ul = lxml.html.Element("ul")
    for role in roles:
        li = lxml.html.Element("li")
        li.text = role
        ul.append(li)
    container.append(ul)
    
    html = lxml.html.tostring(container)
    soup = BeautifulSoup(html, "html5lib")
    # add roles to soup
    
    zip_ = localise.make_local(soup, link)
    app_file = HTMLZipFile(zip_)
    app = HTML5AppNode(source_id = "app_{}".format(link),
                       title = title,
                       license = LICENCE,
                       files = [app_file])
    #print (app)
    apps.append(app)