def download_wikipedia_page(url, thumbnail, title): # create a temp directory to house our downloaded files destpath = tempfile.mkdtemp() # downlod the main wikipedia page, apply a middleware processor, and call it index.html localref, _ = download_file( url, destpath, filename="index.html", middleware_callbacks=process_wikipedia_page, request_fn=make_request, ) # turn the temp folder into a zip file zippath = create_predictable_zip(destpath) # create an HTML5 app node html5app = HTML5AppNode( files=[HTMLZipFile(zippath)], title=title, thumbnail=thumbnail, source_id=url.split("/")[-1], license=licenses.PublicDomainLicense(), ) return html5app
def construct_channel(self, *args, **kwargs): channel = self.get_channel(**kwargs) videos_topic = TopicNode(source_id="/wiki/Category:Articles_containing_video_clips", title="Articles containing video clips") channel.add_child(videos_topic) thumbnail_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/e/ee/A_Is_for_Atom_1953.webm/220px--A_Is_for_Atom_1953.webm.jpg' page = download_wikipedia_page('/wiki/Category:Articles_containing_video_clips', thumbnail_url, 'A Is for Atom') videos_topic.add_child(page) video_url = 'https://upload.wikimedia.org/wikipedia/commons/e/ee/A_Is_for_Atom_1953.webm' video_file = VideoFile(path=video_url) video_node = VideoNode(title='A Is for Atom 1953', source_id='A_Is_for_Atom_1953.webm', files=[video_file], license=licenses.PublicDomainLicense()) subtitle_url = 'https://commons.wikimedia.org/w/api.php?action=timedtext&title=File%3AA_Is_for_Atom_1953.webm&lang={}&trackformat=srt' subtitle_languages = [ 'en', 'es', ] for lang in subtitle_languages: subtitle_file = SubtitleFile(path=subtitle_url.format(lang), language=lang, subtitlesformat='srt') video_node.add_file(subtitle_file) videos_topic.add_child(video_node) return channel
def download_puzzle(puzzle_url, title, description, thumbnail, le_language_code, blockly_language_code): """Download a single puzzle and return an HTML5 app node.""" with WebDriver("https://blockly-games.appspot.com/%s" % puzzle_url, delay=1000) as driver: doc = BeautifulSoup(driver.page_source, "html.parser") # Create a temporary folder to download all the files for a puzzle. destination = tempfile.mkdtemp() # Download all the JS/CSS/images/audio/etc we can get from scraping the # page source. doc = download_static_assets(doc, destination, 'https://blockly-games.appspot.com', request_fn=make_request, url_blacklist=['analytics.js']) # Download other files not picked up by the above generic assets fetching, # e.g. from GitHub. puzzle_name = puzzle_url.split('?')[0] download_additional_assets(destination, puzzle_name) # Make some modifications to the HTML source -- hide some elements. remove_node(doc, '#languageMenu') remove_node(doc, '#title') # Copy over some of our own JS/CSS files and then add links to them in the # page source. copy_tree("static", os.path.join(destination, "static")) chef_body_script = doc.new_tag("script", src="static/chef_end_of_body.js") doc.select_one('body').append(chef_body_script) chef_head_script = doc.new_tag("script") chef_head_script.string = 'window["BlocklyGamesLang"] = "%s";' % blockly_language_code doc.select_one('head').insert(0, chef_head_script) # Write out the HTML source. with open(os.path.join(destination, "index.html"), "w") as f: f.write(str(doc)) print( " Downloaded puzzle %s titled \"%s\" (thumbnail %s) to destination %s" % (puzzle_url, title, thumbnail, destination)) # preview_in_browser(destination) zip_path = create_predictable_zip(destination) return nodes.HTML5AppNode( source_id=puzzle_url, title=truncate_metadata(title), description=description, license=licenses.PublicDomainLicense(copyright_holder='Google'), thumbnail=thumbnail, files=[files.HTMLZipFile(zip_path)], language=le_language_code, )
def construct_channel(self, *args, **kwargs): channel = self.get_channel( *args, **kwargs) # Create ChannelNode from data in self.channel_info lang_names = list(self.data.keys()) lang_names.sort() for lang_name in lang_names: lang_data = self.data[lang_name] LOGGER.info("Creating app for language: {}".format(lang_name)) lang = languages.getlang_by_native_name(lang_name) zip_dir = self.client.create_zip_dir_for_page(lang_data['url']) soup = self.client.get_page_soup(lang_data['url']) # Remove the translation list if found translations = soup.find('div', {'id': 'translations'}) if translations: translations.extract() # Grab the localized title title = soup.find('span', {'id': 'share_title'}).text # Save the modified index.html page thumbnail = None for resource in lang_data['resources']: if 'dp3t.png' in resource: thumbnail = os.path.join(zip_dir, resource) break with open(os.path.join(zip_dir, 'index.html'), 'wb') as f: f.write(soup.prettify(encoding='utf-8')) # create_predictable_zip ensures that the ZIP file does not change each time it's created. This # ensures that the zip doesn't get re-uploaded just because zip metadata changed. zip_file = zip.create_predictable_zip(zip_dir) zip_name = lang.primary_code if lang else lang_name zip_filename = os.path.join(self.ZIP_DIR, "{}.zip".format(zip_name)) os.makedirs(os.path.dirname(zip_filename), exist_ok=True) os.rename(zip_file, zip_filename) topic = nodes.TopicNode(source_id=lang_name, title=lang_name) zip_node = nodes.HTML5AppNode( source_id="covid19-sim-{}".format(lang_name), title=title, files=[files.HTMLZipFile(zip_filename)], license=licenses.PublicDomainLicense( "Marcel Salathé & Nicky Case"), language=lang, thumbnail=thumbnail) topic.add_child(zip_node) channel.add_child(topic) return channel
def construct_channel(self, *args, **kwargs): """ Creates ChannelNode and build topic tree Args: - args: arguments passed in during upload_channel (currently None) - kwargs: extra argumens and options not handled by `uploadchannel`. For example, add the command line option lang="fr" and the string "fr" will be passed along to `construct_channel` as kwargs['lang']. Returns: ChannelNode Healing Classrooms is organized with the following hierarchy: Playlist (TopicNode) | Youtube Video (VideoNode) | Youtube Video (VideoNode) """ channel = self.get_channel(*args, **kwargs) # Create ChannelNode from data in self.channel_info # Download the playlist/video information with youtube_dl.YoutubeDL({'skip_download': True}) as ydl: info_dict = ydl.extract_info(PLAYLISTS_URL, download=False) # Generate topics based off playlist entries in dict for playlist in info_dict['entries']: # Get language of playlist (hack) language = "fr" if "English" in playlist['title']: language = "en" elif "Arabic" in playlist['title']: language = "ar" playlist_topic = nodes.TopicNode(title=playlist['title'], source_id=playlist['id'], language=language) channel.add_child(playlist_topic) # Generate videos based off video entries in dict for video in playlist['entries']: thumbnail_url = len(video['thumbnails']) and video['thumbnails'][0]['url'] playlist_topic.add_child(nodes.VideoNode( title = video['title'], source_id = video['id'], license = licenses.PublicDomainLicense(), description = video['description'], derive_thumbnail = not thumbnail_url, files = [files.WebVideoFile(video['webpage_url'])], thumbnail = thumbnail_url, author = AUTHOR, # tags = video['categories'] + video['tags'], # TODO: uncomment this when added )) raise_for_invalid_channel(channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ Creates ChannelNode and build topic tree Args: - args: arguments passed in during upload_channel (currently None) - kwargs: extra argumens and options not handled by `uploadchannel`. For example, add the command line option lang="fr" and the string "fr" will be passed along to `construct_channel` as kwargs['lang']. Returns: ChannelNode Healing Classrooms is organized with the following hierarchy: Playlist (TopicNode) | Youtube Video (VideoNode) | Youtube Video (VideoNode) """ channel = self.get_channel( *args, **kwargs) # Create ChannelNode from data in self.channel_info # Download the playlist/video information try: with youtube_dl.YoutubeDL({'skip_download': True}) as ydl: info_dict = ydl.extract_info(PLAYLISTS_URL, download=False) print(info_dict.keys()) # Generate topics based off playlist entries in dict #for playlist in info_dict['entries']: # Get language of playlist (hack) # language = "fr" # if "English" in playlist['title']: # language = "en" # elif "Arabic" in playlist['title']: language = "ar" # playlist_topic = nodes.TopicNode(title=playlist['title'], source_id=playlist['id'], language=language) # channel.add_child(playlist_topic) # Generate videos based off video entries in dict videos = sorted( info_dict['entries'], key=lambda x: int(re.search("\d+", x['title']).group())) print([v['title'] for v in videos]) import time time.sleep(15) for video in videos: #try: # num, = re.findall("\d+",video['title']) # title = re.sub(video['title'], num, "") # title = ("0"+num)[-2:] + " " + title #except Exception as e: # print (e) # print (video['title']) # print (repr(video['title'])) # raise thumbnail_url = len( video['thumbnails']) and video['thumbnails'][0]['url'] channel.add_child( nodes.VideoNode( title=video['title'], source_id=video['id'], license=licenses.PublicDomainLicense(), description=video['description'], derive_thumbnail=not thumbnail_url, files=[files.WebVideoFile(video['webpage_url'])], thumbnail=thumbnail_url, author=AUTHOR, # tags = video['categories'] + video['tags'], # TODO: uncomment this when added )) except Exception as e: import traceback, sys traceback.print_exc(file=sys.stdout) raise raise_for_invalid_channel( channel) # Check for errors in channel construction return channel