def construct_channel(self, *args, **kwargs): """ Creates ChannelNode and build topic tree Args: - args: arguments passed in during upload_channel (currently None) - kwargs: extra argumens and options not handled by `uploadchannel`. For example, add the command line option lang="fr" and the string "fr" will be passed along to `construct_channel` as kwargs['lang']. Returns: ChannelNode """ channel = self.get_channel( *args, **kwargs) # Create ChannelNode from data in self.channel_info # Parse the index page to get the topics resp = downloader.make_request( "http://proyectodescartes.org/descartescms/") soup = BeautifulSoup(resp.content, "html.parser") topics = soup.find_all("a", "item") final_topics = self.parse_topics(topics, channel) for topic in final_topics: self.download_subject(topic[0], topic[1], topic[2]) raise_for_invalid_channel( channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ Creates ChannelNode and build topic tree Args: - args: arguments passed in during upload_channel (currently None) - kwargs: extra argumens and options not handled by `uploadchannel`. For example, add the command line option lang="fr" and the string "fr" will be passed along to `construct_channel` as kwargs['lang']. Returns: ChannelNode Channel structure: Activities Subject Subdirectory (if any) Activity.zip Videos Subject Collection Video.mp4 """ channel = self.get_channel( *args, **kwargs) # Create ChannelNode from data in self.channel_info channel.add_child(scrape_snack_menu(SNACK_URL)) channel.add_child(scrape_video_menu(VIDEO_URL)) raise_for_invalid_channel( channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ construct_channel: Creates ChannelNode and build topic tree Wikipedia is organized with the following hierarchy: Citrus (Folder) | Citrus Page HTML Zip (File) Potatoes (Folder) | Potatoes Page HTML Zip (File) Returns: ChannelNode """ LOGGER.info("Constructing channel from {}...".format(BASE_URL)) channel = self.get_channel( *args, **kwargs) # Creates ChannelNode from data in self.channel_info create_topic(channel, "Citrus!", "List_of_citrus_fruits") # Add Citrus folder create_topic(channel, "Potatoes!", "List_of_potato_cultivars") # Add Potatoes folder raise_for_invalid_channel( channel) # Check for errors in channel construction return channel
def construct_channel(**kwargs): channel = create_channel(**kwargs) _build_tree(channel, SAMPLE_TREE) raise_for_invalid_channel(channel) return channel
def construct_channel(self, *args, **kwargs): """ construct_channel: Creates ChannelNode and build topic tree Solar Spell is organized with the following hierarchy(Sample): Creative Arts (source_id = dir-creative-arts) |--- Culinary Arts (source_id = dir-culinary-arts) |--- |--- Real Pasifik 2 introducing Chef Alexis Tahiapuhe of Tahiti (source_id = file-real pasifik 2 introducing chef lela bolobolo of fiji.mp4) |--- Pacific Islands Arts and Culture(source_id = dir_pacific_islands_arts_and_culture) |--- |--- Cook Islands National Cultural Policy 10 July 2017_final english (File) |--- Teaching Resources and Classroom Activities Environment (source_id = dir-environment) |--- Adapting to Climate Change |--- |--- Action Against Climate Change Tuvalu Water and climate change |--- Climate Change Info |--- |--- Animated Pacific Island Climate Change Videos ... Returns: ChannelNode """ LOGGER.info("Constructing channel from {}...".format(BASE_URL)) channel = self.get_channel( *args, **kwargs) # Creates ChannelNode from data in self.channel_info LOGGER.info(' Writing {} Folder...'.format(CHANNEL_NAME)) endpoint = BASE_URL + "content/" scrape_content(endpoint, channel) raise_for_invalid_channel( channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ Create ChannelNode and build topic tree. """ channel = self.get_channel(*args, **kwargs) # creates ChannelNode from data in self.channel_info _build_tree(channel, SAMPLE_TREE) raise_for_invalid_channel(channel) return channel
def construct_channel(self, *args, **kwargs): """ Creates ChannelNode and build topic tree Args: - args: arguments passed in during upload_channel (currently None) - kwargs: extra argumens and options not handled by `uploadchannel`. For example, add the command line option lang="fr" and the string "fr" will be passed along to `construct_channel` as kwargs['lang']. Returns: ChannelNode """ channel = self.get_channel( *args, **kwargs) # Create ChannelNode from data in self.channel_info topics = load_json_from_file(JSON_FILE) for topic in topics: book_title = topic['book_title'] source_id = book_title.replace(" ", "_") url = topic['path_or_url'] topic_node = nodes.TopicNode(source_id=source_id, title=book_title, tags=[ "Teacher facing", "Professional development", "Life skills", "Intercultural skills", "Mentorship", "Formal contexts" ]) channel.add_child(topic_node) parser = pdf.PDFParser(url, toc=topic['chapters']) parser.open() chapters = parser.split_chapters() for chapter in chapters: title = chapter['title'] pdf_path = chapter['path'] pdf_file = files.DocumentFile(pdf_path) pdf_node = nodes.DocumentNode( source_id="{} {}".format(book_title, title), title=title, author="INTO", tags=[ "Teacher facing", "Professional development", "Life skills", "Intercultural skills", "Mentorship", "Formal contexts" ], files=[pdf_file], license=licenses.get_license(CHANNEL_LICENSE, "INTO", LICENSE_DESCRIPTION), copyright_holder="INTO") topic_node.add_child(pdf_node) raise_for_invalid_channel( channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ Create ChannelNode and build topic tree. """ channel = self.get_channel( *args, **kwargs) # create ChannelNode from data in self.channel_info self.create_video_subfolders(channel) raise_for_invalid_channel(channel) return channel
def construct_channel(self, *args, **kwargs): """ Creates ChannelNode and build topic tree Args: - args: arguments passed in during upload_channel (currently None) - kwargs: extra argumens and options not handled by `uploadchannel`. For example, add the command line option lang="fr" and the string "fr" will be passed along to `construct_channel` as kwargs['lang']. Returns: ChannelNode Healing Classrooms is organized with the following hierarchy: Playlist (TopicNode) | Youtube Video (VideoNode) | Youtube Video (VideoNode) """ channel = self.get_channel(*args, **kwargs) # Create ChannelNode from data in self.channel_info # Download the playlist/video information with youtube_dl.YoutubeDL({'skip_download': True}) as ydl: info_dict = ydl.extract_info(PLAYLISTS_URL, download=False) # Generate topics based off playlist entries in dict for playlist in info_dict['entries']: # Get language of playlist (hack) language = "fr" if "English" in playlist['title']: language = "en" elif "Arabic" in playlist['title']: language = "ar" playlist_topic = nodes.TopicNode(title=playlist['title'], source_id=playlist['id'], language=language) channel.add_child(playlist_topic) # Generate videos based off video entries in dict for video in playlist['entries']: thumbnail_url = len(video['thumbnails']) and video['thumbnails'][0]['url'] playlist_topic.add_child(nodes.VideoNode( title = video['title'], source_id = video['id'], license = licenses.PublicDomainLicense(), description = video['description'], derive_thumbnail = not thumbnail_url, files = [files.WebVideoFile(video['webpage_url'])], thumbnail = thumbnail_url, author = AUTHOR, # tags = video['categories'] + video['tags'], # TODO: uncomment this when added )) raise_for_invalid_channel(channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ Create ChannelNode and build topic tree. """ channel = self.get_channel( *args, **kwargs) # create ChannelNode from data in self.channel_info self.create_content_nodes(channel) self.create_exercise_nodes(channel) raise_for_invalid_channel(channel) return channel
def construct_channel(result=None): result_data = get_magogenie_info_url() channel = nodes.ChannelNode( source_domain="magogenie.com", source_id="Magogenie BalBharati Final Import", title="Magogenie BalBharati", thumbnail = "/Users/Admin/Documents/mago.png", ) _build_tree(channel, result_data) raise_for_invalid_channel(channel) return channel
def construct_channel(self, *args, **kwargs): """ construct_channel: Creates ChannelNode and build topic tree Returns: ChannelNode """ channel = self.get_channel(*args, **kwargs) # Creates ChannelNode from data in self.channel_info # TODO: Replace line with scraping code raise NotImplementedError("Scraping method not implemented") raise_for_invalid_channel(channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ Create ChannelNode and build topic tree. """ result_data = get_magogenie_info_url() channel = self.get_channel( *args, **kwargs) # creates ChannelNode from data in self.channel_info # print("sample tree::",folder_dict) _build_tree(channel, result_data) raise_for_invalid_channel(channel) return channel
def construct_channel(args): channel = Channel( domain="learningequality.org", channel_id="sample-channel", title="Sample channel", thumbnail= "https://s.graphiq.com/sites/default/files/stories/t4/15_Tiniest_Dog_Breeds_1718_3083.jpg", ) _build_tree(channel, SAMPLE_TREE) raise_for_invalid_channel(channel) return channel
def construct_channel(self, *args, **kwargs): """ construct_channel: Creates ChannelNode and build topic tree Returns: ChannelNode """ channel = self.get_channel( *args, **kwargs) # Creates ChannelNode from data in self.channel_info scrape_page(channel) raise_for_invalid_channel( channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ Creates ChannelNode and build topic tree Args: - args: arguments passed in during upload_channel (currently None) - kwargs: extra argumens and options not handled by `uploadchannel`. For example, add the command line option lang="fr" and the string "fr" will be passed along to `construct_channel` as kwargs['lang']. Returns: ChannelNode """ channel = self.get_channel(*args, **kwargs) # Create ChannelNode from data in self.channel_info scrape_channel(channel) raise_for_invalid_channel(channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ Creates ChannelNode and build topic tree Args: - args: arguments passed in on the command line - kwargs: extra options passed in as key="value" pairs on the command line For example, add the command line option lang="fr" and the value "fr" will be passed along to `construct_channel` as kwargs['lang']. Returns: ChannelNode """ channel = self.get_channel(*args, **kwargs) # Create ChannelNode from data in self.channel_info # TODO: Replace next line with chef code raise NotImplementedError("constuct_channel method not implemented yet...") raise_for_invalid_channel(channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ Creates ChannelNode and build topic tree Args: - args: arguments passed in during upload_channel (currently None) - kwargs: extra argumens and options not handled by `uploadchannel`. For example, add the command line option lang="fr" and the string "fr" will be passed along to `construct_channel` as kwargs['lang']. Returns: ChannelNode """ channel = self.get_channel( *args, **kwargs) # Create ChannelNode from data in self.channel_info LOGGER.info("Starting to scrape the channel...") # Parse the index page to get the topics # resp = downloader.make_request("{}courses".format(BASE_URL), cookies=self.cookies).content.decode("utf-8") with open("coursepage", "r") as resp: page = BeautifulSoup(resp, "html.parser") self.parse_page(channel, page) raise_for_invalid_channel( channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ Creates ChannelNode and build topic tree Migration-Matters is organized with the following hierarchy: Iversity Site Understanding Diversity (Topic) |--- Welcome (Video - VideoNode) |--- Who is 'Us' and Who is 'Them'? (Video - VideoNode) ... Email Course A MIGRANT'S VIEW (Topic) |--- Nassim's Takeaway: Can Europe Welcome Them All? (Video - VideoNode) |--- Do Deportations Cut Migration? (Video - VideoNode) ... """ channel = self.get_channel( *args, **kwargs) # Create ChannelNode from data in self.channel_info CLIENT.login("{}/en/users/sign_in".format(BASE_URL)) scrape_iversity(channel) scrape_email_courses(EMAIL_COURSE_URL) # create a topic node for each episode # and add videos with same episode as children for episode in EPISODE_DICT: source_id = episode.strip().replace(" ", "_") topic = nodes.TopicNode(source_id=source_id, title=episode) for video in EPISODE_DICT[episode]: topic.add_child(video) channel.add_child(topic) raise_for_invalid_channel( channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ construct_channel: Creates ChannelNode and build topic tree OpenStax is organized with the following hierarchy: Subject (Topic) | Book (Topic) | | Main High Resolution PDF (DocumentNode) | | Main Low Resolution PDF (DocumentNode) | | Instructor Resources (Topic) | | | Resource PDF (DocumentNode) | | Student Resources (Topic) | | | Resource PDF (DocumentNode) Returns: ChannelNode """ LOGGER.info("Constructing channel from {}...".format(BASE_URL)) channel = self.get_channel( *args, **kwargs) # Creates ChannelNode from data in self.channel_info contents = read_source() # Get json data from page for book in contents.get('books'): subject = book.get('subject') # Get subject, add if not available subject_node = next( (child for child in channel.children if child.source_id == subject), None) if not subject_node: subject_node = nodes.TopicNode(source_id=subject, title=subject) channel.add_child(subject_node) content = read_source( endpoint=book.get('slug')) # Read detailed page for content if not content: # Skip to next item if nothing is found continue # Format licensing metadata for content auth_info = { "license": LICENSE_MAPPING[content.get('license_name')], "license_description": content.get('license_text'), "copyright_holder": COPYRIGHT_HOLDER, } # Format content metadata for content authors = ", ".join( [a['value']['name'] for a in content['authors'][:5]]) authors = authors + " et. al." if len( content['authors']) > 5 else authors details = { "description": parse_description(content.get('description')), "thumbnail": get_thumbnail(content.get('cover_url')), "author": authors, } # Add book topic book_node = nodes.TopicNode( source_id=str(content.get('cnx_id')), title=content.get('title'), author=details.get('author'), description=details.get('description'), thumbnail=details.get('thumbnail'), ) subject_node.add_child(book_node) # Create high resolution document LOGGER.info(" Writing {} documents...".format(book.get('title'))) highres_title = "{} ({} Resolution)".format( content['title'], "High") add_file_node(book_node, content.get("high_resolution_pdf_url"), highres_title, **auth_info, **details) # Create low resolution document lowres_title = "{} ({} Resolution)".format(content['title'], "Low") add_file_node(book_node, content.get("low_resolution_pdf_url"), lowres_title, **auth_info, **details) # Create student handbook document add_file_node(book_node, content.get("student_handbook_url"), "Student Handbook", **auth_info, **details) # Parse resource materials LOGGER.info(" Writing {} resources...".format(book.get('title'))) parse_resources("Instructor Resources", content.get('book_faculty_resources'), book_node, **auth_info) parse_resources("Student Resources", content.get('book_student_resources'), book_node, **auth_info) raise_for_invalid_channel( channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ Create ChannelNode and build topic tree. """ channel = self.get_channel( *args, **kwargs) # create ChannelNode from data in self.channel_info topic1 = TopicNode( source_id='121232ms', title='Content Nodes', description='Put folder description here', author=None, language=getlang('en').id, thumbnail=None, ) channel.add_child(topic1) # HTML5 APPS topic13 = TopicNode( source_id='asasa331', title='HTML5App Nodes', description='Put folder description here', author=None, language=getlang('en').id, thumbnail=None, ) topic1.add_child(topic13) content13a = HTML5AppNode( source_id='302723b4', title='Shared Zip File app', author='First Last (author\'s name)', description='Put file description here', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), thumbnail=None, files=[ HTMLZipFile(path='./content/zipfiles/shared.zip', language=getlang('en').id) ]) topic13.add_child(content13a) content13b = HTML5AppNode( source_id='302723b5', title='Thin app 1', author='First Last (author\'s name)', description='Put file description here', language=getlang('en').id, license=get_license(licenses.CC_BY, copyright_holder='Copyright holder name'), thumbnail=None, files=[ HTMLZipFile(path='./content/zipfiles/thinapp1.zip', language=getlang('en').id) ]) topic13.add_child(content13b) raise_for_invalid_channel(channel) return channel
def construct_channel(self, *args, **kwargs): """ Creates ChannelNode and build topic tree Args: - args: arguments passed in during upload_channel (currently None) - kwargs: extra argumens and options not handled by `uploadchannel`. For example, add the command line option lang="fr" and the string "fr" will be passed along to `construct_channel` as kwargs['lang']. Returns: ChannelNode """ from apiclient.discovery import build # instantiate a YouTube Data API v3 client youtube = build('youtube', 'v3', developerKey=kwargs['--youtube-api-token']) youtube_channel_info = youtube.channels().list( id=YOUTUBE_CHANNEL_ID, part='snippet').execute()['items'][0] self.channel_info['CHANNEL_THUMBNAIL'] = get_largest_thumbnail( youtube_channel_info['snippet']['thumbnails']).get('url') channel = self.get_channel( *args, **kwargs) # Create ChannelNode from data in self.channel_info # Grade 1 Topic grade1_playlist_id = "PL7PgvYjSilJD6uFfdqbQBUAZzbE48c8ns" grade1 = YoutubePlaylistTopicNode(title="الرابع العلمي", source_id=grade1_playlist_id) grade1.add_video_nodes_from_playlist(youtube, grade1_playlist_id) # Grade 2 Topic grade2_playlist_id = "PL7PgvYjSilJAx5ib4t4z9X1j7foWrPp6j" grade2 = YoutubePlaylistTopicNode(title="السادس الأدبي", source_id=grade2_playlist_id) grade2.add_video_nodes_from_playlist(youtube, grade2_playlist_id) # Grade 3 Topic grade3 = nodes.TopicNode(title="السادس الإحيائي والتطبيقي", source_id="al-riyadiyat-grade-3-playlists") grade3_subtopics = { "المعادلات التفاضلية": "PL7PgvYjSilJCCvAhZkHocn0XixWQzhcMJ", "المجاميع العليا والسفلى والتكامل": "PL7PgvYjSilJAsUyCzGdDFw5X5q9CiUQCN", "التفاضل": "PL7PgvYjSilJCRcrTWwyARbyZ8zwN6v8PD", "القطوع المكافئة": "PL7PgvYjSilJD-2MhwtwAMkdF7LhInCk5p", "الأعداد المركبة": "PL7PgvYjSilJBMIX26GJ31YVOt8LEE_vC_", } for title, playlist_id in grade3_subtopics.items(): subtopic = YoutubePlaylistTopicNode(title=title, source_id=playlist_id) subtopic.add_video_nodes_from_playlist(youtube, playlist_id) grade3.add_child(subtopic) for grade in (grade1, grade2, grade3): channel.add_child(grade) raise_for_invalid_channel( channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ Creates ChannelNode and build topic tree Args: - args: arguments passed in during upload_channel (currently None) - kwargs: extra argumens and options not handled by `uploadchannel`. For example, add the command line option lang="fr" and the string "fr" will be passed along to `construct_channel` as kwargs['lang']. Returns: ChannelNode """ channel = self.get_channel(*args, **kwargs) # Create ChannelNode from data in self.channel_info from apiclient.discovery import build # instantiate a YouTube Data API v3 client youtube = build('youtube', 'v3', developerKey=kwargs['--youtube-api-token']) playlists = youtube.playlists().list( # list all of the YouTube channel's playlists part='snippet', channelId=YOUTUBE_CHANNEL_ID, maxResults=50 ).execute()['items'] # For getting the thumbnail automatically # youtube_channel = youtube.channels().list( # id=YOUTUBE_CHANNEL_ID, # part='snippet' # ).execute()['items'][0] # channel.thumbnail = get_largest_thumbnail(youtube_channel['snippet']['thumbnails']).get('url') for playlist in playlists: topic = nodes.TopicNode(title=playlist['snippet']['title'], source_id=playlist['id']) first_page = True next_page_token = None playlist_request_kwargs = { 'part': 'contentDetails', 'maxResults': 50, 'playlistId': playlist['id'], } while first_page or next_page_token: first_page = False # we're visiting the first page now! playlist_info = youtube.playlistItems().list(**playlist_request_kwargs).execute() playlist_items = playlist_info['items'] video_ids = [vid['contentDetails']['videoId'] for vid in playlist_items] videos = youtube.videos().list( part='status,snippet', id=','.join(video_ids) ).execute()['items'] for video in videos: if video['status']['license'] == 'creativeCommon': try: video_node = nodes.VideoNode( source_id=video['id'], title=video['snippet']['title'], language=CHANNEL_LANGUAGE, license=get_license(licenses.CC_BY, copyright_holder='Espresso English'), thumbnail=get_largest_thumbnail(video['snippet']['thumbnails']).get('url'), files=[ files.YouTubeVideoFile(video['id']), ] ) topic.add_child(video_node) # Get subtitles for languages designated in SUBTITLE_LANGUAGES for lang_code in SUBTITLE_LANGUAGES: if files.is_youtube_subtitle_file_supported_language(lang_code): video_node.add_file( files.YouTubeSubtitleFile( youtube_id=video['id'], language=lang_code ) ) else: print('Unsupported subtitle language code:', lang_code) except Exception as e: raise e # set up the next page, if there is one next_page_token = playlist_info.get('nextPageToken') if next_page_token: playlist_request_kwargs['pageToken'] = next_page_token else: try: del playlist_request_kwargs['pageToken'] except Exception as e: pass channel.add_child(topic) raise_for_invalid_channel(channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ Creates ChannelNode and build topic tree Args: - args: arguments passed in on the command line - kwargs: extra options passed in as key="value" pairs on the command line For example, add the command line option lang="fr" and the value "fr" will be passed along to `construct_channel` as kwargs['lang']. Returns: ChannelNode with the following hierarchy (empty topics are not included): -> Language -> Level -> Tag -> Book """ channel = self.get_channel( *args, **kwargs) # Create ChannelNode from data in self.channel_info books_saved = [] books_not_saved = [] try: books = fetch_books_list() except HTTPError: LOGGER.error("Could not fetch all books list") return books_details = {} for book in books: master_book_id = book["masterBookId"] language_id = book["languageId"] try: book_detail = fetch_book_detail(master_book_id, language_id) except HTTPError: LOGGER.error( "Could not fetch a book detail for \n {}".format(book)) books_not_saved.append(book) continue books_details[book_detail["id"]] = book_detail available_languages = book_detail["availableLanguages"] for language in available_languages: # we already have the book detail for this language if language["id"] == language_id: continue try: book_detail = fetch_book_detail(master_book_id, language["id"]) except HTTPError: LOGGER.error( "Could not fetch a book detail for \n {}".format(book)) books_not_saved.append(book) else: books_details[book_detail["id"]] = book_detail books_details_list = list(books_details.values()) # make sure that languages and levels will be displayed in a correct order books_details_list.sort(key=lambda book_detail: (book_detail[ "language"]["name"], book_detail["readingLevel"])) for book_detail in books_details_list: try: save_book(book_detail, channel) books_saved.append(book_detail) except NoFileAvailableError: books_not_saved.append(book_detail) write_stats(books_saved, books_not_saved) raise_for_invalid_channel( channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ Creates ChannelNode and build topic tree Args: - args: arguments passed in on the command line - kwargs: extra options passed in as key="value" pairs on the command line For example, add the command line option lang="fr" and the value "fr" will be passed along to `construct_channel` as kwargs['lang']. - Handled command line options: --nocache: Do not use cached YouTube playlist or video info; --tosheet: Only upload YouTube video information to Google sheet, will not generate channel; Please provide Google sheet ID in form '--tosheet=[sheet_id]'; --playlist, --video: These two options must be used together. They are used to save YouTube video cache info to a specified YouTube playlist cache file. This feature is useful when one or more videos from a playlist keep failing during extraction. A single video extraction usually works better than a playlist extraction. Multiple video IDs should be separated by commas. Returns: ChannelNode """ # Update language info from option input global CHANNEL_NAME, CHANNEL_LANGUAGE for key, value in kwargs.items(): if key == NO_CACHE_KEYNAME: self.use_cache = False LOGGER.info("use_cache = '%d'", self.use_cache) if key == DOWNLOAD_TO_GOOGLE_SHEET_KEYNAME: self.to_sheet = True self.sheet_id = value LOGGER.info("to_sheet = '%d'", self.to_sheet) if key == EXTRACT_VIDEO_INFO: self.insert_video_info = True self.video_list = value.split(",") if key == EXTRACT_VIDEO_PLAYLIST_INFO: self.insert_video_info = True self.to_playlist = value LOGGER.info("playlist = '%s'", self.to_playlist) if self.to_sheet: upload_description_to_google_sheet(self.sheet_id, self.use_cache) exit(0) if self.insert_video_info: if self.video_list is not None and self.to_playlist in PLAYLIST_MAP and len( self.video_list) > 0: insert_video_info(self.video_list, self.to_playlist, self.use_cache) exit(0) elif self.video_list is None or len(self.video_list) == 0: LOGGER.error("Invalid video value!") exit(1) else: LOGGER.error( "Option '--video' and '--playlist' must be used together. " + "And please make sure input YouTube playlist ID is inside 'PLAYLIST_MAP'" ) exit(1) channel = self.get_channel( *args, **kwargs) # Create ChannelNode from data in self.channel_info # Get YouTube playlist URL by language for lang, id_list in PLAYLIST_MAP.items(): rr_lang_obj = RefugeeResponseLanguage(name=lang, code=lang) if not rr_lang_obj.get_lang_obj(): raise RefugeeResponseLangInputError("Invalid Language: " + lang) if id_list is not None and len(id_list) > 0: playlist_id = id_list[0] tipic_source_id = 'refugeeresponse-child-topic-{0}'.format( rr_lang_obj.name) topic_node = nodes.TopicNode(title=rr_lang_obj.native_name, source_id=tipic_source_id, author=REFUGEE_RESPONSE, provider=REFUGEE_RESPONSE, description=CHANNEL_DESCRIPTION, language=rr_lang_obj.code) download_video_topics(topic_node, (lang, id_list), rr_lang_obj, self.use_cache) channel.add_child(topic_node) LOGGER.info("Added TopicNode: '%s'", tipic_source_id) else: raise RefugeeResponseConfigError( "Empty playlist info for language: " + lang) raise_for_invalid_channel( channel) # Check for errors in channel construction return channel
def construct_channel(self, *args, **kwargs): """ Creates ChannelNode and build topic tree Args: - args: arguments passed in during upload_channel (currently None) - kwargs: extra argumens and options not handled by `uploadchannel`. For example, add the command line option lang="fr" and the string "fr" will be passed along to `construct_channel` as kwargs['lang']. Returns: ChannelNode Healing Classrooms is organized with the following hierarchy: Playlist (TopicNode) | Youtube Video (VideoNode) | Youtube Video (VideoNode) """ channel = self.get_channel( *args, **kwargs) # Create ChannelNode from data in self.channel_info # Download the playlist/video information try: with youtube_dl.YoutubeDL({'skip_download': True}) as ydl: info_dict = ydl.extract_info(PLAYLISTS_URL, download=False) print(info_dict.keys()) # Generate topics based off playlist entries in dict #for playlist in info_dict['entries']: # Get language of playlist (hack) # language = "fr" # if "English" in playlist['title']: # language = "en" # elif "Arabic" in playlist['title']: language = "ar" # playlist_topic = nodes.TopicNode(title=playlist['title'], source_id=playlist['id'], language=language) # channel.add_child(playlist_topic) # Generate videos based off video entries in dict videos = sorted( info_dict['entries'], key=lambda x: int(re.search("\d+", x['title']).group())) print([v['title'] for v in videos]) import time time.sleep(15) for video in videos: #try: # num, = re.findall("\d+",video['title']) # title = re.sub(video['title'], num, "") # title = ("0"+num)[-2:] + " " + title #except Exception as e: # print (e) # print (video['title']) # print (repr(video['title'])) # raise thumbnail_url = len( video['thumbnails']) and video['thumbnails'][0]['url'] channel.add_child( nodes.VideoNode( title=video['title'], source_id=video['id'], license=licenses.PublicDomainLicense(), description=video['description'], derive_thumbnail=not thumbnail_url, files=[files.WebVideoFile(video['webpage_url'])], thumbnail=thumbnail_url, author=AUTHOR, # tags = video['categories'] + video['tags'], # TODO: uncomment this when added )) except Exception as e: import traceback, sys traceback.print_exc(file=sys.stdout) raise raise_for_invalid_channel( channel) # Check for errors in channel construction return channel