Exemple #1
0
def download_wikipedia_page(url, thumbnail, title):
    # create a temp directory to house our downloaded files
    destpath = tempfile.mkdtemp()

    # downlod the main wikipedia page, apply a middleware processor, and call it index.html
    localref, _ = download_file(
        url,
        destpath,
        filename="index.html",
        middleware_callbacks=process_wikipedia_page,
        request_fn=make_request,
    )

    # turn the temp folder into a zip file
    zippath = create_predictable_zip(destpath)

    # create an HTML5 app node
    html5app = HTML5AppNode(
        files=[HTMLZipFile(zippath)],
        title=title,
        thumbnail=thumbnail,
        source_id=url.split("/")[-1],
        license=licenses.PublicDomainLicense(),
    )

    return html5app
Exemple #2
0
    def construct_channel(self, *args, **kwargs):

        channel = self.get_channel(**kwargs)
        videos_topic = TopicNode(source_id="/wiki/Category:Articles_containing_video_clips",
                                 title="Articles containing video clips")
        channel.add_child(videos_topic)

        thumbnail_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/e/ee/A_Is_for_Atom_1953.webm/220px--A_Is_for_Atom_1953.webm.jpg'
        page = download_wikipedia_page('/wiki/Category:Articles_containing_video_clips',
                                       thumbnail_url, 'A Is for Atom')
        videos_topic.add_child(page)

        video_url = 'https://upload.wikimedia.org/wikipedia/commons/e/ee/A_Is_for_Atom_1953.webm'
        video_file = VideoFile(path=video_url)
        video_node = VideoNode(title='A Is for Atom 1953', source_id='A_Is_for_Atom_1953.webm',
                               files=[video_file], license=licenses.PublicDomainLicense())

        subtitle_url = 'https://commons.wikimedia.org/w/api.php?action=timedtext&title=File%3AA_Is_for_Atom_1953.webm&lang={}&trackformat=srt'
        subtitle_languages = [
            'en',
            'es',
        ]
        for lang in subtitle_languages:
            subtitle_file = SubtitleFile(path=subtitle_url.format(lang), language=lang, subtitlesformat='srt')
            video_node.add_file(subtitle_file)

        videos_topic.add_child(video_node)

        return channel
def download_puzzle(puzzle_url, title, description, thumbnail,
                    le_language_code, blockly_language_code):
    """Download a single puzzle and return an HTML5 app node."""
    with WebDriver("https://blockly-games.appspot.com/%s" % puzzle_url,
                   delay=1000) as driver:
        doc = BeautifulSoup(driver.page_source, "html.parser")

    # Create a temporary folder to download all the files for a puzzle.
    destination = tempfile.mkdtemp()

    # Download all the JS/CSS/images/audio/etc we can get from scraping the
    # page source.
    doc = download_static_assets(doc,
                                 destination,
                                 'https://blockly-games.appspot.com',
                                 request_fn=make_request,
                                 url_blacklist=['analytics.js'])

    # Download other files not picked up by the above generic assets fetching,
    # e.g. from GitHub.
    puzzle_name = puzzle_url.split('?')[0]
    download_additional_assets(destination, puzzle_name)

    # Make some modifications to the HTML source -- hide some elements.
    remove_node(doc, '#languageMenu')
    remove_node(doc, '#title')

    # Copy over some of our own JS/CSS files and then add links to them in the
    # page source.
    copy_tree("static", os.path.join(destination, "static"))

    chef_body_script = doc.new_tag("script", src="static/chef_end_of_body.js")
    doc.select_one('body').append(chef_body_script)

    chef_head_script = doc.new_tag("script")
    chef_head_script.string = 'window["BlocklyGamesLang"] = "%s";' % blockly_language_code
    doc.select_one('head').insert(0, chef_head_script)

    # Write out the HTML source.
    with open(os.path.join(destination, "index.html"), "w") as f:
        f.write(str(doc))

    print(
        "    Downloaded puzzle %s titled \"%s\" (thumbnail %s) to destination %s"
        % (puzzle_url, title, thumbnail, destination))
    # preview_in_browser(destination)

    zip_path = create_predictable_zip(destination)
    return nodes.HTML5AppNode(
        source_id=puzzle_url,
        title=truncate_metadata(title),
        description=description,
        license=licenses.PublicDomainLicense(copyright_holder='Google'),
        thumbnail=thumbnail,
        files=[files.HTMLZipFile(zip_path)],
        language=le_language_code,
    )
Exemple #4
0
    def construct_channel(self, *args, **kwargs):
        channel = self.get_channel(
            *args,
            **kwargs)  # Create ChannelNode from data in self.channel_info

        lang_names = list(self.data.keys())
        lang_names.sort()

        for lang_name in lang_names:
            lang_data = self.data[lang_name]
            LOGGER.info("Creating app for language: {}".format(lang_name))
            lang = languages.getlang_by_native_name(lang_name)

            zip_dir = self.client.create_zip_dir_for_page(lang_data['url'])

            soup = self.client.get_page_soup(lang_data['url'])

            # Remove the translation list if found
            translations = soup.find('div', {'id': 'translations'})
            if translations:
                translations.extract()

            # Grab the localized title
            title = soup.find('span', {'id': 'share_title'}).text

            # Save the modified index.html page
            thumbnail = None
            for resource in lang_data['resources']:
                if 'dp3t.png' in resource:
                    thumbnail = os.path.join(zip_dir, resource)
                    break

            with open(os.path.join(zip_dir, 'index.html'), 'wb') as f:
                f.write(soup.prettify(encoding='utf-8'))

            # create_predictable_zip ensures that the ZIP file does not change each time it's created. This
            # ensures that the zip doesn't get re-uploaded just because zip metadata changed.
            zip_file = zip.create_predictable_zip(zip_dir)
            zip_name = lang.primary_code if lang else lang_name
            zip_filename = os.path.join(self.ZIP_DIR,
                                        "{}.zip".format(zip_name))
            os.makedirs(os.path.dirname(zip_filename), exist_ok=True)
            os.rename(zip_file, zip_filename)

            topic = nodes.TopicNode(source_id=lang_name, title=lang_name)
            zip_node = nodes.HTML5AppNode(
                source_id="covid19-sim-{}".format(lang_name),
                title=title,
                files=[files.HTMLZipFile(zip_filename)],
                license=licenses.PublicDomainLicense(
                    "Marcel Salathé & Nicky Case"),
                language=lang,
                thumbnail=thumbnail)
            topic.add_child(zip_node)
            channel.add_child(topic)

        return channel
    def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree
        Args:
          - args: arguments passed in during upload_channel (currently None)
          - kwargs: extra argumens and options not handled by `uploadchannel`.
            For example, add the command line option   lang="fr"  and the string
            "fr" will be passed along to `construct_channel` as kwargs['lang'].
        Returns: ChannelNode

        Healing Classrooms is organized with the following hierarchy:
            Playlist (TopicNode)
            |   Youtube Video (VideoNode)
            |   Youtube Video (VideoNode)

        """
        channel = self.get_channel(*args, **kwargs)  # Create ChannelNode from data in self.channel_info

        # Download the playlist/video information
        with youtube_dl.YoutubeDL({'skip_download': True}) as ydl:
            info_dict = ydl.extract_info(PLAYLISTS_URL, download=False)

            # Generate topics based off playlist entries in dict
            for playlist in info_dict['entries']:

                # Get language of playlist (hack)
                language = "fr"
                if "English" in playlist['title']:
                    language = "en"
                elif "Arabic" in playlist['title']:
                    language = "ar"

                playlist_topic = nodes.TopicNode(title=playlist['title'], source_id=playlist['id'], language=language)
                channel.add_child(playlist_topic)


                # Generate videos based off video entries in dict
                for video in playlist['entries']:
                    thumbnail_url = len(video['thumbnails']) and video['thumbnails'][0]['url']

                    playlist_topic.add_child(nodes.VideoNode(
                        title = video['title'],
                        source_id = video['id'],
                        license = licenses.PublicDomainLicense(),
                        description = video['description'],
                        derive_thumbnail = not thumbnail_url,
                        files = [files.WebVideoFile(video['webpage_url'])],
                        thumbnail = thumbnail_url,
                        author = AUTHOR,
                        # tags = video['categories'] + video['tags'], # TODO: uncomment this when added
                    ))

        raise_for_invalid_channel(channel)  # Check for errors in channel construction

        return channel
Exemple #6
0
    def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree
        Args:
          - args: arguments passed in during upload_channel (currently None)
          - kwargs: extra argumens and options not handled by `uploadchannel`.
            For example, add the command line option   lang="fr"  and the string
            "fr" will be passed along to `construct_channel` as kwargs['lang'].
        Returns: ChannelNode

        Healing Classrooms is organized with the following hierarchy:
            Playlist (TopicNode)
            |   Youtube Video (VideoNode)
            |   Youtube Video (VideoNode)

        """
        channel = self.get_channel(
            *args,
            **kwargs)  # Create ChannelNode from data in self.channel_info

        # Download the playlist/video information
        try:
            with youtube_dl.YoutubeDL({'skip_download': True}) as ydl:
                info_dict = ydl.extract_info(PLAYLISTS_URL, download=False)
                print(info_dict.keys())

                # Generate topics based off playlist entries in dict
                #for playlist in info_dict['entries']:

                # Get language of playlist (hack)
                #    language = "fr"
                #    if "English" in playlist['title']:
                #        language = "en"
                #    elif "Arabic" in playlist['title']:
                language = "ar"

                #    playlist_topic = nodes.TopicNode(title=playlist['title'], source_id=playlist['id'], language=language)
                #    channel.add_child(playlist_topic)

                # Generate videos based off video entries in dict
                videos = sorted(
                    info_dict['entries'],
                    key=lambda x: int(re.search("\d+", x['title']).group()))
                print([v['title'] for v in videos])
                import time
                time.sleep(15)
                for video in videos:
                    #try:
                    #    num, = re.findall("\d+",video['title'])
                    #    title = re.sub(video['title'], num, "")
                    #    title = ("0"+num)[-2:] + " " + title
                    #except Exception as e:
                    #    print (e)
                    #    print (video['title'])
                    #    print (repr(video['title']))
                    #    raise
                    thumbnail_url = len(
                        video['thumbnails']) and video['thumbnails'][0]['url']

                    channel.add_child(
                        nodes.VideoNode(
                            title=video['title'],
                            source_id=video['id'],
                            license=licenses.PublicDomainLicense(),
                            description=video['description'],
                            derive_thumbnail=not thumbnail_url,
                            files=[files.WebVideoFile(video['webpage_url'])],
                            thumbnail=thumbnail_url,
                            author=AUTHOR,
                            # tags = video['categories'] + video['tags'], # TODO: uncomment this when added
                        ))
        except Exception as e:
            import traceback, sys
            traceback.print_exc(file=sys.stdout)
            raise

        raise_for_invalid_channel(
            channel)  # Check for errors in channel construction

        return channel