def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree
        Args:
          - args: arguments passed in during upload_channel (currently None)
          - kwargs: extra argumens and options not handled by `uploadchannel`.
            For example, add the command line option   lang="fr"  and the string
            "fr" will be passed along to `construct_channel` as kwargs['lang'].
        Returns: ChannelNode
        """
        channel = self.get_channel(
            *args,
            **kwargs)  # Create ChannelNode from data in self.channel_info

        # Parse the index page to get the topics
        resp = downloader.make_request(
            "http://proyectodescartes.org/descartescms/")
        soup = BeautifulSoup(resp.content, "html.parser")
        topics = soup.find_all("a", "item")
        final_topics = self.parse_topics(topics, channel)

        for topic in final_topics:
            self.download_subject(topic[0], topic[1], topic[2])

        raise_for_invalid_channel(
            channel)  # Check for errors in channel construction

        return channel
    def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree
        Args:
          - args: arguments passed in during upload_channel (currently None)
          - kwargs: extra argumens and options not handled by `uploadchannel`.
            For example, add the command line option   lang="fr"  and the string
            "fr" will be passed along to `construct_channel` as kwargs['lang'].
        Returns: ChannelNode

        Channel structure:
            Activities
                Subject
                    Subdirectory (if any)
                        Activity.zip
            Videos
                Subject
                    Collection
                        Video.mp4
        """
        channel = self.get_channel(
            *args,
            **kwargs)  # Create ChannelNode from data in self.channel_info

        channel.add_child(scrape_snack_menu(SNACK_URL))
        channel.add_child(scrape_video_menu(VIDEO_URL))

        raise_for_invalid_channel(
            channel)  # Check for errors in channel construction

        return channel
    def construct_channel(self, *args, **kwargs):
        """ construct_channel: Creates ChannelNode and build topic tree

            Wikipedia is organized with the following hierarchy:
                Citrus (Folder)
                |   Citrus Page HTML Zip (File)
                Potatoes (Folder)
                |   Potatoes Page HTML Zip (File)

            Returns: ChannelNode
        """
        LOGGER.info("Constructing channel from {}...".format(BASE_URL))

        channel = self.get_channel(
            *args,
            **kwargs)  # Creates ChannelNode from data in self.channel_info

        create_topic(channel, "Citrus!",
                     "List_of_citrus_fruits")  # Add Citrus folder
        create_topic(channel, "Potatoes!",
                     "List_of_potato_cultivars")  # Add Potatoes folder

        raise_for_invalid_channel(
            channel)  # Check for errors in channel construction

        return channel
예제 #4
0
def construct_channel(**kwargs):

    channel = create_channel(**kwargs)
    _build_tree(channel, SAMPLE_TREE)
    raise_for_invalid_channel(channel)

    return channel
예제 #5
0
    def construct_channel(self, *args, **kwargs):
        """ construct_channel: Creates ChannelNode and build topic tree

            Solar Spell is organized with the following hierarchy(Sample):
                Creative Arts (source_id = dir-creative-arts)
                |--- Culinary Arts (source_id = dir-culinary-arts)
                |--- |--- Real Pasifik 2 introducing Chef Alexis Tahiapuhe of Tahiti (source_id = file-real pasifik 2 introducing chef lela bolobolo of fiji.mp4)
                |--- Pacific Islands Arts and Culture(source_id = dir_pacific_islands_arts_and_culture)
                |--- |--- Cook Islands National Cultural Policy 10 July 2017_final english (File)
                |--- Teaching Resources and Classroom Activities
                Environment (source_id = dir-environment)
                |--- Adapting to Climate Change
                |--- |--- Action Against Climate Change Tuvalu Water and climate change
                |--- Climate Change Info                
                |--- |--- Animated Pacific Island Climate Change Videos
                ...
            Returns: ChannelNode
        """
        LOGGER.info("Constructing channel from {}...".format(BASE_URL))
        channel = self.get_channel(
            *args,
            **kwargs)  # Creates ChannelNode from data in self.channel_info
        LOGGER.info('   Writing {} Folder...'.format(CHANNEL_NAME))
        endpoint = BASE_URL + "content/"
        scrape_content(endpoint, channel)
        raise_for_invalid_channel(
            channel)  # Check for errors in channel construction
        return channel
예제 #6
0
    def construct_channel(self, *args, **kwargs):
        """
        Create ChannelNode and build topic tree.
        """
        channel = self.get_channel(*args, **kwargs)   # creates ChannelNode from data in self.channel_info
        _build_tree(channel, SAMPLE_TREE)
        raise_for_invalid_channel(channel)

        return channel
예제 #7
0
    def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree
        Args:
          - args: arguments passed in during upload_channel (currently None)
          - kwargs: extra argumens and options not handled by `uploadchannel`.
            For example, add the command line option   lang="fr"  and the string
            "fr" will be passed along to `construct_channel` as kwargs['lang'].
        Returns: ChannelNode
        """
        channel = self.get_channel(
            *args,
            **kwargs)  # Create ChannelNode from data in self.channel_info

        topics = load_json_from_file(JSON_FILE)
        for topic in topics:
            book_title = topic['book_title']
            source_id = book_title.replace(" ", "_")
            url = topic['path_or_url']
            topic_node = nodes.TopicNode(source_id=source_id,
                                         title=book_title,
                                         tags=[
                                             "Teacher facing",
                                             "Professional development",
                                             "Life skills",
                                             "Intercultural skills",
                                             "Mentorship", "Formal contexts"
                                         ])
            channel.add_child(topic_node)

            parser = pdf.PDFParser(url, toc=topic['chapters'])
            parser.open()
            chapters = parser.split_chapters()
            for chapter in chapters:
                title = chapter['title']
                pdf_path = chapter['path']
                pdf_file = files.DocumentFile(pdf_path)
                pdf_node = nodes.DocumentNode(
                    source_id="{} {}".format(book_title, title),
                    title=title,
                    author="INTO",
                    tags=[
                        "Teacher facing", "Professional development",
                        "Life skills", "Intercultural skills", "Mentorship",
                        "Formal contexts"
                    ],
                    files=[pdf_file],
                    license=licenses.get_license(CHANNEL_LICENSE, "INTO",
                                                 LICENSE_DESCRIPTION),
                    copyright_holder="INTO")
                topic_node.add_child(pdf_node)

        raise_for_invalid_channel(
            channel)  # Check for errors in channel construction

        return channel
예제 #8
0
 def construct_channel(self, *args, **kwargs):
     """
     Create ChannelNode and build topic tree.
     """
     channel = self.get_channel(
         *args,
         **kwargs)  # create ChannelNode from data in self.channel_info
     self.create_video_subfolders(channel)
     raise_for_invalid_channel(channel)
     return channel
    def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree
        Args:
          - args: arguments passed in during upload_channel (currently None)
          - kwargs: extra argumens and options not handled by `uploadchannel`.
            For example, add the command line option   lang="fr"  and the string
            "fr" will be passed along to `construct_channel` as kwargs['lang'].
        Returns: ChannelNode

        Healing Classrooms is organized with the following hierarchy:
            Playlist (TopicNode)
            |   Youtube Video (VideoNode)
            |   Youtube Video (VideoNode)

        """
        channel = self.get_channel(*args, **kwargs)  # Create ChannelNode from data in self.channel_info

        # Download the playlist/video information
        with youtube_dl.YoutubeDL({'skip_download': True}) as ydl:
            info_dict = ydl.extract_info(PLAYLISTS_URL, download=False)

            # Generate topics based off playlist entries in dict
            for playlist in info_dict['entries']:

                # Get language of playlist (hack)
                language = "fr"
                if "English" in playlist['title']:
                    language = "en"
                elif "Arabic" in playlist['title']:
                    language = "ar"

                playlist_topic = nodes.TopicNode(title=playlist['title'], source_id=playlist['id'], language=language)
                channel.add_child(playlist_topic)


                # Generate videos based off video entries in dict
                for video in playlist['entries']:
                    thumbnail_url = len(video['thumbnails']) and video['thumbnails'][0]['url']

                    playlist_topic.add_child(nodes.VideoNode(
                        title = video['title'],
                        source_id = video['id'],
                        license = licenses.PublicDomainLicense(),
                        description = video['description'],
                        derive_thumbnail = not thumbnail_url,
                        files = [files.WebVideoFile(video['webpage_url'])],
                        thumbnail = thumbnail_url,
                        author = AUTHOR,
                        # tags = video['categories'] + video['tags'], # TODO: uncomment this when added
                    ))

        raise_for_invalid_channel(channel)  # Check for errors in channel construction

        return channel
예제 #10
0
 def construct_channel(self, *args, **kwargs):
     """
     Create ChannelNode and build topic tree.
     """
     channel = self.get_channel(
         *args,
         **kwargs)  # create ChannelNode from data in self.channel_info
     self.create_content_nodes(channel)
     self.create_exercise_nodes(channel)
     raise_for_invalid_channel(channel)
     return channel
예제 #11
0
def construct_channel(result=None):

    result_data = get_magogenie_info_url()
    channel = nodes.ChannelNode(
        source_domain="magogenie.com",
        source_id="Magogenie BalBharati Final Import",
        title="Magogenie BalBharati",
        thumbnail = "/Users/Admin/Documents/mago.png",
    )
    _build_tree(channel, result_data)
    raise_for_invalid_channel(channel)
    return channel
예제 #12
0
    def construct_channel(self, *args, **kwargs):
        """ construct_channel: Creates ChannelNode and build topic tree
            Returns: ChannelNode
        """
        channel = self.get_channel(*args, **kwargs)   # Creates ChannelNode from data in self.channel_info

        # TODO: Replace line with scraping code
        raise NotImplementedError("Scraping method not implemented")

        raise_for_invalid_channel(channel)            # Check for errors in channel construction

        return channel
예제 #13
0
 def construct_channel(self, *args, **kwargs):
     """
     Create ChannelNode and build topic tree.
     """
     result_data = get_magogenie_info_url()
     channel = self.get_channel(
         *args,
         **kwargs)  # creates ChannelNode from data in self.channel_info
     # print("sample tree::",folder_dict)
     _build_tree(channel, result_data)
     raise_for_invalid_channel(channel)
     return channel
예제 #14
0
def construct_channel(args):

    channel = Channel(
        domain="learningequality.org",
        channel_id="sample-channel",
        title="Sample channel",
        thumbnail=
        "https://s.graphiq.com/sites/default/files/stories/t4/15_Tiniest_Dog_Breeds_1718_3083.jpg",
    )
    _build_tree(channel, SAMPLE_TREE)
    raise_for_invalid_channel(channel)

    return channel
    def construct_channel(self, *args, **kwargs):
        """ construct_channel: Creates ChannelNode and build topic tree
            Returns: ChannelNode
        """
        channel = self.get_channel(
            *args,
            **kwargs)  # Creates ChannelNode from data in self.channel_info

        scrape_page(channel)

        raise_for_invalid_channel(
            channel)  # Check for errors in channel construction

        return channel
    def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree
        Args:
          - args: arguments passed in during upload_channel (currently None)
          - kwargs: extra argumens and options not handled by `uploadchannel`.
            For example, add the command line option   lang="fr"  and the string
            "fr" will be passed along to `construct_channel` as kwargs['lang'].
        Returns: ChannelNode
        """
        channel = self.get_channel(*args, **kwargs)  # Create ChannelNode from data in self.channel_info

        scrape_channel(channel)

        raise_for_invalid_channel(channel)  # Check for errors in channel construction

        return channel
예제 #17
0
    def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree
        Args:
          - args: arguments passed in on the command line
          - kwargs: extra options passed in as key="value" pairs on the command line
            For example, add the command line option   lang="fr"  and the value
            "fr" will be passed along to `construct_channel` as kwargs['lang'].
        Returns: ChannelNode
        """
        channel = self.get_channel(*args, **kwargs)  # Create ChannelNode from data in self.channel_info

        # TODO: Replace next line with chef code
        raise NotImplementedError("constuct_channel method not implemented yet...")

        raise_for_invalid_channel(channel)  # Check for errors in channel construction

        return channel
    def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree
        Args:
          - args: arguments passed in during upload_channel (currently None)
          - kwargs: extra argumens and options not handled by `uploadchannel`.
            For example, add the command line option   lang="fr"  and the string
            "fr" will be passed along to `construct_channel` as kwargs['lang'].
        Returns: ChannelNode
        """
        channel = self.get_channel(
            *args,
            **kwargs)  # Create ChannelNode from data in self.channel_info

        LOGGER.info("Starting to scrape the channel...")
        # Parse the index page to get the topics
        # resp = downloader.make_request("{}courses".format(BASE_URL), cookies=self.cookies).content.decode("utf-8")
        with open("coursepage", "r") as resp:
            page = BeautifulSoup(resp, "html.parser")
            self.parse_page(channel, page)

            raise_for_invalid_channel(
                channel)  # Check for errors in channel construction
            return channel
    def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree

        Migration-Matters is organized with the following hierarchy:
        Iversity Site
        Understanding Diversity (Topic)
        |--- Welcome (Video - VideoNode)
        |--- Who is 'Us' and Who is 'Them'? (Video - VideoNode)
        ...
        Email Course
        A MIGRANT'S VIEW (Topic)
        |--- Nassim's Takeaway: Can Europe Welcome Them All? (Video - VideoNode)
        |--- Do Deportations Cut Migration? (Video - VideoNode)
        ...
        """

        channel = self.get_channel(
            *args,
            **kwargs)  # Create ChannelNode from data in self.channel_info
        CLIENT.login("{}/en/users/sign_in".format(BASE_URL))
        scrape_iversity(channel)
        scrape_email_courses(EMAIL_COURSE_URL)

        # create a topic node for each episode
        # and add videos with same episode as children
        for episode in EPISODE_DICT:
            source_id = episode.strip().replace(" ", "_")
            topic = nodes.TopicNode(source_id=source_id, title=episode)
            for video in EPISODE_DICT[episode]:
                topic.add_child(video)
            channel.add_child(topic)

        raise_for_invalid_channel(
            channel)  # Check for errors in channel construction
        return channel
    def construct_channel(self, *args, **kwargs):
        """ construct_channel: Creates ChannelNode and build topic tree

            OpenStax is organized with the following hierarchy:
                Subject (Topic)
                |   Book (Topic)
                |   |   Main High Resolution PDF (DocumentNode)
                |   |   Main Low Resolution PDF (DocumentNode)
                |   |   Instructor Resources (Topic)
                |   |   |   Resource PDF (DocumentNode)
                |   |   Student Resources (Topic)
                |   |   |   Resource PDF (DocumentNode)

            Returns: ChannelNode
        """
        LOGGER.info("Constructing channel from {}...".format(BASE_URL))

        channel = self.get_channel(
            *args,
            **kwargs)  # Creates ChannelNode from data in self.channel_info
        contents = read_source()  # Get json data from page

        for book in contents.get('books'):
            subject = book.get('subject')

            # Get subject, add if not available
            subject_node = next(
                (child
                 for child in channel.children if child.source_id == subject),
                None)
            if not subject_node:
                subject_node = nodes.TopicNode(source_id=subject,
                                               title=subject)
                channel.add_child(subject_node)

            content = read_source(
                endpoint=book.get('slug'))  # Read detailed page for content

            if not content:  # Skip to next item if nothing is found
                continue

            # Format licensing metadata for content
            auth_info = {
                "license": LICENSE_MAPPING[content.get('license_name')],
                "license_description": content.get('license_text'),
                "copyright_holder": COPYRIGHT_HOLDER,
            }

            # Format content metadata for content
            authors = ", ".join(
                [a['value']['name'] for a in content['authors'][:5]])
            authors = authors + " et. al." if len(
                content['authors']) > 5 else authors
            details = {
                "description": parse_description(content.get('description')),
                "thumbnail": get_thumbnail(content.get('cover_url')),
                "author": authors,
            }

            # Add book topic
            book_node = nodes.TopicNode(
                source_id=str(content.get('cnx_id')),
                title=content.get('title'),
                author=details.get('author'),
                description=details.get('description'),
                thumbnail=details.get('thumbnail'),
            )
            subject_node.add_child(book_node)

            # Create high resolution document
            LOGGER.info("   Writing {} documents...".format(book.get('title')))
            highres_title = "{} ({} Resolution)".format(
                content['title'], "High")
            add_file_node(book_node, content.get("high_resolution_pdf_url"),
                          highres_title, **auth_info, **details)

            # Create low resolution document
            lowres_title = "{} ({} Resolution)".format(content['title'], "Low")
            add_file_node(book_node, content.get("low_resolution_pdf_url"),
                          lowres_title, **auth_info, **details)

            # Create student handbook document
            add_file_node(book_node, content.get("student_handbook_url"),
                          "Student Handbook", **auth_info, **details)

            # Parse resource materials
            LOGGER.info("   Writing {} resources...".format(book.get('title')))
            parse_resources("Instructor Resources",
                            content.get('book_faculty_resources'), book_node,
                            **auth_info)
            parse_resources("Student Resources",
                            content.get('book_student_resources'), book_node,
                            **auth_info)

        raise_for_invalid_channel(
            channel)  # Check for errors in channel construction

        return channel
예제 #21
0
    def construct_channel(self, *args, **kwargs):
        """
        Create ChannelNode and build topic tree.
        """
        channel = self.get_channel(
            *args,
            **kwargs)  # create ChannelNode from data in self.channel_info

        topic1 = TopicNode(
            source_id='121232ms',
            title='Content Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        channel.add_child(topic1)

        # HTML5 APPS
        topic13 = TopicNode(
            source_id='asasa331',
            title='HTML5App Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        topic1.add_child(topic13)

        content13a = HTML5AppNode(
            source_id='302723b4',
            title='Shared Zip File app',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            files=[
                HTMLZipFile(path='./content/zipfiles/shared.zip',
                            language=getlang('en').id)
            ])
        topic13.add_child(content13a)

        content13b = HTML5AppNode(
            source_id='302723b5',
            title='Thin app 1',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            files=[
                HTMLZipFile(path='./content/zipfiles/thinapp1.zip',
                            language=getlang('en').id)
            ])
        topic13.add_child(content13b)

        raise_for_invalid_channel(channel)
        return channel
    def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree
        Args:
          - args: arguments passed in during upload_channel (currently None)
          - kwargs: extra argumens and options not handled by `uploadchannel`.
            For example, add the command line option   lang="fr"  and the string
            "fr" will be passed along to `construct_channel` as kwargs['lang'].
        Returns: ChannelNode
        """
        from apiclient.discovery import build
        # instantiate a YouTube Data API v3 client
        youtube = build('youtube',
                        'v3',
                        developerKey=kwargs['--youtube-api-token'])
        youtube_channel_info = youtube.channels().list(
            id=YOUTUBE_CHANNEL_ID, part='snippet').execute()['items'][0]

        self.channel_info['CHANNEL_THUMBNAIL'] = get_largest_thumbnail(
            youtube_channel_info['snippet']['thumbnails']).get('url')

        channel = self.get_channel(
            *args,
            **kwargs)  # Create ChannelNode from data in self.channel_info

        # Grade 1 Topic
        grade1_playlist_id = "PL7PgvYjSilJD6uFfdqbQBUAZzbE48c8ns"
        grade1 = YoutubePlaylistTopicNode(title="الرابع العلمي",
                                          source_id=grade1_playlist_id)
        grade1.add_video_nodes_from_playlist(youtube, grade1_playlist_id)

        # Grade 2 Topic
        grade2_playlist_id = "PL7PgvYjSilJAx5ib4t4z9X1j7foWrPp6j"
        grade2 = YoutubePlaylistTopicNode(title="السادس الأدبي",
                                          source_id=grade2_playlist_id)
        grade2.add_video_nodes_from_playlist(youtube, grade2_playlist_id)

        # Grade 3 Topic
        grade3 = nodes.TopicNode(title="السادس الإحيائي والتطبيقي",
                                 source_id="al-riyadiyat-grade-3-playlists")

        grade3_subtopics = {
            "المعادلات التفاضلية": "PL7PgvYjSilJCCvAhZkHocn0XixWQzhcMJ",
            "المجاميع العليا والسفلى والتكامل":
            "PL7PgvYjSilJAsUyCzGdDFw5X5q9CiUQCN",
            "التفاضل": "PL7PgvYjSilJCRcrTWwyARbyZ8zwN6v8PD",
            "القطوع المكافئة": "PL7PgvYjSilJD-2MhwtwAMkdF7LhInCk5p",
            "الأعداد المركبة": "PL7PgvYjSilJBMIX26GJ31YVOt8LEE_vC_",
        }

        for title, playlist_id in grade3_subtopics.items():
            subtopic = YoutubePlaylistTopicNode(title=title,
                                                source_id=playlist_id)
            subtopic.add_video_nodes_from_playlist(youtube, playlist_id)
            grade3.add_child(subtopic)

        for grade in (grade1, grade2, grade3):
            channel.add_child(grade)

        raise_for_invalid_channel(
            channel)  # Check for errors in channel construction

        return channel
예제 #23
0
    def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree
        Args:
          - args: arguments passed in during upload_channel (currently None)
          - kwargs: extra argumens and options not handled by `uploadchannel`.
            For example, add the command line option   lang="fr"  and the string
            "fr" will be passed along to `construct_channel` as kwargs['lang'].
        Returns: ChannelNode
        """
        channel = self.get_channel(*args, **kwargs)  # Create ChannelNode from data in self.channel_info
        
        from apiclient.discovery import build
        # instantiate a YouTube Data API v3 client
        youtube = build('youtube', 'v3', developerKey=kwargs['--youtube-api-token'])
        playlists = youtube.playlists().list( # list all of the YouTube channel's playlists
            part='snippet',
            channelId=YOUTUBE_CHANNEL_ID,
            maxResults=50
        ).execute()['items']

        # For getting the thumbnail automatically
        
        # youtube_channel = youtube.channels().list(
        #     id=YOUTUBE_CHANNEL_ID,
        #     part='snippet'
        # ).execute()['items'][0]

        # channel.thumbnail = get_largest_thumbnail(youtube_channel['snippet']['thumbnails']).get('url')

        for playlist in playlists:
            topic = nodes.TopicNode(title=playlist['snippet']['title'], source_id=playlist['id'])
            first_page = True
            next_page_token = None
            playlist_request_kwargs = {
                'part': 'contentDetails',
                'maxResults': 50,
                'playlistId': playlist['id'],
            }

            while first_page or next_page_token:
                first_page = False # we're visiting the first page now!
                playlist_info = youtube.playlistItems().list(**playlist_request_kwargs).execute()
                playlist_items = playlist_info['items']

                video_ids = [vid['contentDetails']['videoId'] for vid in playlist_items]
                videos = youtube.videos().list(
                    part='status,snippet',
                    id=','.join(video_ids)
                ).execute()['items']

                for video in videos:
                    if video['status']['license'] == 'creativeCommon':
                        try:
                            video_node = nodes.VideoNode(
                                source_id=video['id'],
                                title=video['snippet']['title'],
                                language=CHANNEL_LANGUAGE,
                                license=get_license(licenses.CC_BY, copyright_holder='Espresso English'),
                                thumbnail=get_largest_thumbnail(video['snippet']['thumbnails']).get('url'),
                                files=[
                                    files.YouTubeVideoFile(video['id']),
                                ]
                            )

                            topic.add_child(video_node)
                            
                            # Get subtitles for languages designated in SUBTITLE_LANGUAGES
                            for lang_code in SUBTITLE_LANGUAGES:
                                if files.is_youtube_subtitle_file_supported_language(lang_code):
                                    video_node.add_file(
                                        files.YouTubeSubtitleFile(
                                            youtube_id=video['id'],
                                            language=lang_code
                                        )
                                    )
                                else:
                                    print('Unsupported subtitle language code:', lang_code)

                        except Exception as e:
                            raise e
                
                # set up the next page, if there is one
                next_page_token = playlist_info.get('nextPageToken')
                if next_page_token:
                    playlist_request_kwargs['pageToken'] = next_page_token
                else:
                    try:
                        del playlist_request_kwargs['pageToken']
                    except Exception as e:
                        pass

            channel.add_child(topic)

        raise_for_invalid_channel(channel)  # Check for errors in channel construction

        return channel
    def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree
        Args:
          - args: arguments passed in on the command line
          - kwargs: extra options passed in as key="value" pairs on the command line
            For example, add the command line option   lang="fr"  and the value
            "fr" will be passed along to `construct_channel` as kwargs['lang'].
        Returns: ChannelNode with the following hierarchy (empty topics are not included):
                  -> Language
                    -> Level
                      -> Tag
                        -> Book
        """
        channel = self.get_channel(
            *args,
            **kwargs)  # Create ChannelNode from data in self.channel_info

        books_saved = []
        books_not_saved = []

        try:
            books = fetch_books_list()
        except HTTPError:
            LOGGER.error("Could not fetch all books list")
            return

        books_details = {}
        for book in books:
            master_book_id = book["masterBookId"]
            language_id = book["languageId"]

            try:
                book_detail = fetch_book_detail(master_book_id, language_id)
            except HTTPError:
                LOGGER.error(
                    "Could not fetch a book detail for \n {}".format(book))
                books_not_saved.append(book)
                continue

            books_details[book_detail["id"]] = book_detail

            available_languages = book_detail["availableLanguages"]
            for language in available_languages:
                # we already have the book detail for this language
                if language["id"] == language_id:
                    continue

                try:
                    book_detail = fetch_book_detail(master_book_id,
                                                    language["id"])
                except HTTPError:
                    LOGGER.error(
                        "Could not fetch a book detail for \n {}".format(book))
                    books_not_saved.append(book)
                else:
                    books_details[book_detail["id"]] = book_detail

        books_details_list = list(books_details.values())
        # make sure that languages and levels will be displayed in a correct order
        books_details_list.sort(key=lambda book_detail: (book_detail[
            "language"]["name"], book_detail["readingLevel"]))

        for book_detail in books_details_list:
            try:
                save_book(book_detail, channel)
                books_saved.append(book_detail)
            except NoFileAvailableError:
                books_not_saved.append(book_detail)

        write_stats(books_saved, books_not_saved)

        raise_for_invalid_channel(
            channel)  # Check for errors in channel construction

        return channel
    def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree
        Args:
          - args: arguments passed in on the command line
          - kwargs: extra options passed in as key="value" pairs on the command line
            For example, add the command line option   lang="fr"  and the value
            "fr" will be passed along to `construct_channel` as kwargs['lang'].
          - Handled command line options:
            --nocache:  Do not use cached YouTube playlist or video info; 
            --tosheet:  Only upload YouTube video information to Google sheet, will not generate channel;
                        Please provide Google sheet ID in form '--tosheet=[sheet_id]';
            --playlist, --video:
                        These two options must be used together. They are used to save YouTube video cache info
                        to a specified YouTube playlist cache file. This feature is useful when one or more videos
                        from a playlist keep failing during extraction. A single video extraction usually works better
                        than a playlist extraction. Multiple video IDs should be separated by commas.
        Returns: ChannelNode
        """
        # Update language info from option input
        global CHANNEL_NAME, CHANNEL_LANGUAGE
        for key, value in kwargs.items():
            if key == NO_CACHE_KEYNAME:
                self.use_cache = False
                LOGGER.info("use_cache = '%d'", self.use_cache)
            if key == DOWNLOAD_TO_GOOGLE_SHEET_KEYNAME:
                self.to_sheet = True
                self.sheet_id = value
                LOGGER.info("to_sheet = '%d'", self.to_sheet)
            if key == EXTRACT_VIDEO_INFO:
                self.insert_video_info = True
                self.video_list = value.split(",")
            if key == EXTRACT_VIDEO_PLAYLIST_INFO:
                self.insert_video_info = True
                self.to_playlist = value
                LOGGER.info("playlist = '%s'", self.to_playlist)

        if self.to_sheet:
            upload_description_to_google_sheet(self.sheet_id, self.use_cache)
            exit(0)

        if self.insert_video_info:
            if self.video_list is not None and self.to_playlist in PLAYLIST_MAP and len(
                    self.video_list) > 0:
                insert_video_info(self.video_list, self.to_playlist,
                                  self.use_cache)
                exit(0)
            elif self.video_list is None or len(self.video_list) == 0:
                LOGGER.error("Invalid video value!")
                exit(1)
            else:
                LOGGER.error(
                    "Option '--video' and '--playlist' must be used together. "
                    +
                    "And please make sure input YouTube playlist ID is inside 'PLAYLIST_MAP'"
                )
                exit(1)

        channel = self.get_channel(
            *args,
            **kwargs)  # Create ChannelNode from data in self.channel_info

        # Get YouTube playlist URL by language
        for lang, id_list in PLAYLIST_MAP.items():
            rr_lang_obj = RefugeeResponseLanguage(name=lang, code=lang)
            if not rr_lang_obj.get_lang_obj():
                raise RefugeeResponseLangInputError("Invalid Language: " +
                                                    lang)

            if id_list is not None and len(id_list) > 0:
                playlist_id = id_list[0]
                tipic_source_id = 'refugeeresponse-child-topic-{0}'.format(
                    rr_lang_obj.name)
                topic_node = nodes.TopicNode(title=rr_lang_obj.native_name,
                                             source_id=tipic_source_id,
                                             author=REFUGEE_RESPONSE,
                                             provider=REFUGEE_RESPONSE,
                                             description=CHANNEL_DESCRIPTION,
                                             language=rr_lang_obj.code)
                download_video_topics(topic_node, (lang, id_list), rr_lang_obj,
                                      self.use_cache)
                channel.add_child(topic_node)
                LOGGER.info("Added TopicNode: '%s'", tipic_source_id)
            else:
                raise RefugeeResponseConfigError(
                    "Empty playlist info for language: " + lang)

        raise_for_invalid_channel(
            channel)  # Check for errors in channel construction
        return channel
예제 #26
0
    def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree
        Args:
          - args: arguments passed in during upload_channel (currently None)
          - kwargs: extra argumens and options not handled by `uploadchannel`.
            For example, add the command line option   lang="fr"  and the string
            "fr" will be passed along to `construct_channel` as kwargs['lang'].
        Returns: ChannelNode

        Healing Classrooms is organized with the following hierarchy:
            Playlist (TopicNode)
            |   Youtube Video (VideoNode)
            |   Youtube Video (VideoNode)

        """
        channel = self.get_channel(
            *args,
            **kwargs)  # Create ChannelNode from data in self.channel_info

        # Download the playlist/video information
        try:
            with youtube_dl.YoutubeDL({'skip_download': True}) as ydl:
                info_dict = ydl.extract_info(PLAYLISTS_URL, download=False)
                print(info_dict.keys())

                # Generate topics based off playlist entries in dict
                #for playlist in info_dict['entries']:

                # Get language of playlist (hack)
                #    language = "fr"
                #    if "English" in playlist['title']:
                #        language = "en"
                #    elif "Arabic" in playlist['title']:
                language = "ar"

                #    playlist_topic = nodes.TopicNode(title=playlist['title'], source_id=playlist['id'], language=language)
                #    channel.add_child(playlist_topic)

                # Generate videos based off video entries in dict
                videos = sorted(
                    info_dict['entries'],
                    key=lambda x: int(re.search("\d+", x['title']).group()))
                print([v['title'] for v in videos])
                import time
                time.sleep(15)
                for video in videos:
                    #try:
                    #    num, = re.findall("\d+",video['title'])
                    #    title = re.sub(video['title'], num, "")
                    #    title = ("0"+num)[-2:] + " " + title
                    #except Exception as e:
                    #    print (e)
                    #    print (video['title'])
                    #    print (repr(video['title']))
                    #    raise
                    thumbnail_url = len(
                        video['thumbnails']) and video['thumbnails'][0]['url']

                    channel.add_child(
                        nodes.VideoNode(
                            title=video['title'],
                            source_id=video['id'],
                            license=licenses.PublicDomainLicense(),
                            description=video['description'],
                            derive_thumbnail=not thumbnail_url,
                            files=[files.WebVideoFile(video['webpage_url'])],
                            thumbnail=thumbnail_url,
                            author=AUTHOR,
                            # tags = video['categories'] + video['tags'], # TODO: uncomment this when added
                        ))
        except Exception as e:
            import traceback, sys
            traceback.print_exc(file=sys.stdout)
            raise

        raise_for_invalid_channel(
            channel)  # Check for errors in channel construction

        return channel