Ejemplo n.º 1
0
def license_objects():
    regular_ids = [
        CC_BY, CC_BY_SA, CC_BY_ND, CC_BY_NC, CC_BY_NC_SA, CC_BY_NC_ND,
        ALL_RIGHTS_RESERVED, PUBLIC_DOMAIN
    ]
    license_objects = []
    for regular_id in regular_ids:
        # with desciption and copyright_holder
        licence_obj = get_license(regular_id,
                                  copyright_holder='Some name',
                                  description='Le description')
        assert licence_obj, 'licence_obj should exist'
        license_objects.append(licence_obj)

        # with desciption only
        licence_obj = get_license(regular_id,
                                  description='Le description solo2')
        assert licence_obj, 'licence_obj should exist'
        license_objects.append(licence_obj)

        # with copyright_holder only
        licence_obj = get_license(regular_id, copyright_holder='Some name3')
        assert licence_obj, 'licence_obj should exist'
        license_objects.append(licence_obj)

        # bare
        licence_obj = get_license(regular_id)
        assert licence_obj, 'licence_obj should exist'
        license_objects.append(licence_obj)

    return license_objects
def pdfNode(infoDict):
    #Get response from converted-to-pdf path
    response = requests.get(infoDict['pdfPath'], auth=auth)

    global pdfCopy
    if os.path.exists(infoDict['pdfTitle']):
        infoDict['pdfTitle'] = infoDict['pdfTitle'].replace(
            ".pdf",
            str(pdfCopy) + ".pdf")
        pdfCopy += 1

    #Write pdf to local file
    with open(infoDict['pdfTitle'], 'wb') as f:
        f.write(response.content)

    filesCreated.append(infoDict['pdfTitle'])

    #Create Document Node
    pdfNode = DocumentNode(
        source_id=str(infoDict['id']),
        title=infoDict['pdfTitle'],
        language="en",
        description="",
        license=get_license(licenses.CC_BY,
                            copyright_holder='Copyright holder name'),
        files=[DocumentFile(
            path=infoDict['pdfTitle'],
            language="en",
        )],
    )
    return pdfNode
Ejemplo n.º 3
0
def make_video_node(title,
                    video_url,
                    video_language='en',
                    ffmpeg_settings=None):
    """
    Create a VideoNode from video_url. Assumes title is unique within containin topic.
    """
    content_node = VideoNode(source_id=title,
                             title=title,
                             author='Sikana',
                             description='',
                             language=getlang(video_language).id,
                             license=get_license(
                                 licenses.CC_BY_NC_ND,
                                 copyright_holder='Sikana Education'),
                             thumbnail=None,
                             derive_thumbnail=True,
                             files=[
                                 VideoFile(
                                     path=video_url,
                                     language=getlang(video_language).id,
                                     ffmpeg_settings=ffmpeg_settings,
                                 )
                             ])
    return content_node
Ejemplo n.º 4
0
    def add_content_to_tree(self, channel):
        tree = self.channel_tree
        lang = 'English'
        lang_obj = getlang("en")
        for class_name in tree[lang]:
            class_obj = tree[lang][class_name]
            class_id = "{}-{}".format(lang, class_name)
            class_node = nodes.TopicNode(source_id=class_name, title=class_name)
            for subject_name in class_obj:
                subject_id = "{}-{}".format(class_id, subject_name)
                subject_node = nodes.TopicNode(source_id=subject_id, title=subject_name)
                subject_obj = class_obj[subject_name]
                for item in subject_obj['items']:
                    item_id = "{}-{}".format(subject_id, get_column(item, 'id'))
                    video = nodes.VideoNode(
                        source_id=item_id,
                        title=get_column(item, 'name'),
                        description=get_column(item, 'description'),
                        files=[
                            files.VideoFile(path=get_column(item, 'file'))
                        ],
                        language=lang_obj,
                        # FIXME: Use the column's license field instead of hardcoding.
                        license=licenses.get_license(le_licenses.CC_BY, copyright_holder=get_column(item, "copyright")),
                        # thumbnail=get_column(item, "thumbnail")
                    )
                    subject_node.add_child(video)

                class_node.add_child(subject_node)


            channel.add_child(class_node)
def add_node_document(booklist, level_topic, as_booklist):
    # Add books according to level, language and publisher
    for item in booklist:
        # initailize the source domain and content_id
        domain = uuid.uuid5(uuid.NAMESPACE_DNS, 'storyweaver.org.in')
        book_id = str(item['source_id'])
        """ 
        If the publisher is AS and the book is found, 
        then change the source_domain and content_id
        """
        if item['publisher'] == 'African Storybook Initiative':
            check = check_if_story_in_AS(as_booklist, item['title'])
            if check[0] == True:
                domain = uuid.uuid5(uuid.NAMESPACE_DNS,
                                    'www.africanstorybook.org')
                book_id = check[1]

        link = get_html5_app_zip_path(item['slug'])
        if link:
            html5_file = HTMLZipFile(path=link)
            book = HTML5AppNode(
                title=item['title'],
                source_id=book_id,
                author=item['author'],
                files=[html5_file],
                license=get_license(licenses.CC_BY,
                                    copyright_holder='Pratham Books'),
                thumbnail=item.get('thumbnail'),
                description=item['description'],
                domain_ns=domain,
            )
            level_topic.add_child(book)
def vimeoNode(url):
    r = requests.get(url).text  # grabs request of the URL

    #Get video title
    bs = bs4.BeautifulSoup(r, "html.parser")
    videoTitle = bs.find_all('title', limit=1)

    #videoTitle includes html tags, stripping them
    newTitle = str(re.sub('<.*?>', '', str(videoTitle)))

    #May have to delete if there are brackets in title
    newTitle = newTitle.replace("]", '')
    newTitle = newTitle.replace("[", '')

    #Create Video Node
    video_node = VideoNode(
        source_id=url,  # set to url
        title=str(newTitle),
        license=get_license(licenses.CC_BY,
                            copyright_holder='Copyright holder name'),
        language=getlang('en').id,
        derive_thumbnail=True,  # video-specicig flag
        thumbnail=None,
        files=[
            WebVideoFile(web_url=url, language='en'),
        ])

    #Return Video Node
    return video_node
def googleNode(url):
    #Get doc id
    id = getIdFromUrl(url)

    #Call proper google download function
    if (url.find('document') != -1):
        print("Found Document")
        #Uses download function to get pdf of document, returns filename
        fileName = downloadDocument(id)
    elif (url.find('presentation') != -1):
        print("Found Presentation")
        #Uses download function to get pdf of powerpoint, returns filename
        fileName = downloadPowerpoint(id)
    elif (url.find('spreadsheets') != -1):
        print("Found Spreadsheet")
        fileName = downloadSpreadsheet(id)

    #Use download and filename to create node
    googleNode = DocumentNode(
        source_id=id,
        title=fileName,
        language="en",
        description="",
        license=get_license(licenses.CC_BY,
                            copyright_holder='Copyright holder name'),
        files=[DocumentFile(
            path=fileName,
            language="en",
        )],
    )
    return googleNode
def linkAssignment(linkData):
    #Get URL and Title from JSON info
    url = linkData['attachments']['links']['link'][0]['url']
    title = linkData['attachments']['links']['link'][0]['title']

    #Make session and request to get HTML
    session = requests.Session()
    session.headers[
        "User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"
    html = session.get(url).content

    #HTML parser
    soup = bs4.BeautifulSoup(html, "html.parser")

    #Path for folder to hold content
    global zipId
    filename = 'myzipper' + str(zipId)
    print("\n\n\n" + filename + "\n\n\n")
    zipId = zipId + 1

    #Delete folder if it already exists
    if (os.path.exists(filename)):
        shutil.rmtree(filename)
        #os.unlink(filename)

    #Download all assets(html, css, js,...) from url
    doc = download_static_assets(soup,
                                 filename,
                                 url,
                                 request_fn=make_request,
                                 url_blacklist=url_blacklist)

    # Write out the HTML source.
    with open(os.path.join(filename, "index.html"), "w",
              encoding="utf-8") as f:
        f.write(str(doc))

    #Outputs files being downloaded
    print("        ... downloaded to %s" % filename)

    filesCreated.append(filename)

    #Make zip file from folder contents
    shutil.make_archive(filename, 'zip', filename)

    filesCreated.append(filename + '.zip')

    #Creation of file and node
    link_file = HTMLZipFile(path=(filename + '.zip'))
    link_node = HTML5AppNode(source_id=url,
                             title=title,
                             license=get_license(
                                 licenses.CC_BY,
                                 copyright_holder='Copyright holder name'),
                             language=getlang('en').id,
                             derive_thumbnail=False,
                             thumbnail=None,
                             files=[link_file])
    return link_node
 def __init__(self, source_id=None, lang="en", name=None):
     self.source_id = source_id
     self.filepath = None
     self.name = name
     self.lang = lang
     self.menu = Menu(lang=self.lang, name=name)
     self.license = get_license(
         licenses.CC_BY_NC_SA, copyright_holder=COPYRIGHT_HOLDER).as_dict()
 def __init__(self, source_id, lang="en", lincese="", name=None):
     self.filename = get_name_from_url(source_id)
     self.source_id = urljoin(
         BASE_URL, source_id) if source_id.startswith("/") else source_id
     self.filepath = None
     self.lang = lang
     self.name = "{}_{}".format(name, self.filename)
     self.license = get_license(
         licenses.CC_BY_NC_SA, copyright_holder=COPYRIGHT_HOLDER).as_dict()
Ejemplo n.º 11
0
    def construct_channel(self, *args, **kwargs):
        """
        Creates ChannelNode and build topic tree
        Args:
          - args: arguments passed in during upload_channel (currently None)
          - kwargs: extra argumens and options not handled by `uploadchannel`.
            For example, add the command line option   lang="fr"  and the string
            "fr" will be passed along to `construct_channel` as kwargs['lang'].
        Returns: ChannelNode
        """
        channel = self.get_channel(
            *args,
            **kwargs)  # Create ChannelNode from data in self.channel_info

        topics = load_json_from_file(JSON_FILE)
        for topic in topics:
            book_title = topic['book_title']
            source_id = book_title.replace(" ", "_")
            url = topic['path_or_url']
            topic_node = nodes.TopicNode(source_id=source_id,
                                         title=book_title,
                                         tags=[
                                             "Teacher facing",
                                             "Professional development",
                                             "Life skills",
                                             "Intercultural skills",
                                             "Mentorship", "Formal contexts"
                                         ])
            channel.add_child(topic_node)

            parser = pdf.PDFParser(url, toc=topic['chapters'])
            parser.open()
            chapters = parser.split_chapters()
            for chapter in chapters:
                title = chapter['title']
                pdf_path = chapter['path']
                pdf_file = files.DocumentFile(pdf_path)
                pdf_node = nodes.DocumentNode(
                    source_id="{} {}".format(book_title, title),
                    title=title,
                    author="INTO",
                    tags=[
                        "Teacher facing", "Professional development",
                        "Life skills", "Intercultural skills", "Mentorship",
                        "Formal contexts"
                    ],
                    files=[pdf_file],
                    license=licenses.get_license(CHANNEL_LICENSE, "INTO",
                                                 LICENSE_DESCRIPTION),
                    copyright_holder="INTO")
                topic_node.add_child(pdf_node)

        raise_for_invalid_channel(
            channel)  # Check for errors in channel construction

        return channel
Ejemplo n.º 12
0
def linkAssignment(material):
    url = material["link"]["url"]
    session = requests.Session()
    session.headers[
        "User-Agent"] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36"
    html = session.get(url).content
    soup = bs(html, "html.parser")

    script_files = []
    css_files = []

    for script in soup.find_all("script"):
        if script.attrs.get("src"):
            # if the tag has the attribute 'src'
            script_url = urljoin(url, script.attrs.get("src"))
            script_files.append(script_url)

    for css in soup.find_all("link"):
        if css.attrs.get("href"):
            css_url = urljoin(url, css.attrs.get("href"))
            css_files.append(css_url)

    with HTMLWriter('./myzipper.zip') as zipper:
        with open("index.html", "w", encoding="utf-8") as f:
            index_content = soup.prettify()
            zipper.write_index_contents(index_content)

        with open("javascript_files.js") as f:
            for js_file in script_files:
                script_path = zipper.write_url(js_file,
                                               "scripts.js",
                                               directory="src")
                script = "<script src='{}' type='text/javascript'></script>".format(
                    script_path)

        with open("css_files.css", "w") as f:
            for css_file in css_files:
                print(css_file, file=f)
                css_path = zipper.write_url(css_file,
                                            "style.css",
                                            directory="styles")
                extra_head = "<link href='{}' rel='stylesheet'></link>".format(
                    css_path)

    link_file = HTMLZipFile(path='./myzipper.zip')
    link_node = HTML5AppNode(source_id=material["link"]["url"],
                             title=material["link"]["title"],
                             license=get_license(
                                 licenses.CC_BY,
                                 copyright_holder='Copyright holder name'),
                             language=getlang('en').id,
                             derive_thumbnail=False,
                             thumbnail=None,
                             files=[link_file])
    return link_node
Ejemplo n.º 13
0
def download_videos(topic, language):
    scraped_video_urls_path = os.path.join(DOWNLOADS_FOLDER,
                                           'scraped_video_urls.json')
    with open(scraped_video_urls_path) as f:
        scraped_video_urls = json.load(f)

    video_urls_list = scraped_video_urls[language]['urls']
    video_descriptions_list = scraped_video_urls[language]['descriptions']

    nodes = []
    for video_num, video_url in enumerate(video_urls_list):
        ydl_options = {
            'outtmpl':
            f'downloads/videos/{video_num}_{language}.%(ext)s',  # uses output templates, see documentation
            'writethumbnail':
            False,
            'no_warnings':
            True,
            'continuedl':
            False,
            'restrictfilenames':
            True,
            'quiet':
            False,
            'format':
            "bestvideo[height<=480][ext=mp4]+bestaudio[ext=m4a]/best[height<=480][ext=mp4]",
            # Note the format specification is important so we get mp4 and not taller than 480
        }

        with youtube_dl.YoutubeDL(ydl_options) as ydl:
            try:
                ydl.add_default_info_extractors()
                vinfo = ydl.extract_info(video_url, download=True)
            except (youtube_dl.utils.DownloadError,
                    youtube_dl.utils.ContentTooShortError,
                    youtube_dl.utils.ExtractorError) as e:
                print('error_occured')

        ext_p1 = vinfo['requested_formats'][0]['format_id']
        ext_p2 = vinfo['requested_formats'][0]['ext']
        video_path = f'downloads/videos/{video_num}_{language}.f{ext_p1}.{ext_p2}'
        video_node = VideoNode(
            source_id=vinfo['webpage_url'],
            title=vinfo['title'],
            description=video_descriptions_list[video_num],
            # aggregator=LE,
            thumbnail=vinfo['thumbnail'],
            license=get_license('CC BY', copyright_holder='NC-SA 4.0'),
            # role=roles.COACH,
            files=[VideoFile(path=video_path, language=language)])

        topic.add_child(video_node)
    return topic
Ejemplo n.º 14
0
def test_license_serilizibility(license_objects, special_license):
    orig_licenses = license_objects
    orig_licenses.append(special_license)
    for licence_orig in orig_licenses:
        # serizlize
        license_dict = licence_orig.as_dict()
        license_json = json.dumps(license_dict)
        # deserizlize
        license_copy_dict = json.loads(license_json)
        license_copy = get_license(**license_copy_dict)

        same_attributes = _compare_licence_objects(licence_orig, license_copy)
        assert same_attributes, 'License attributes not the same after serizlize'
Ejemplo n.º 15
0
    def construct_channel(self, *args, **kwargs):
        """
        This method is reponsible for creating a `ChannelNode` object and
        populating it with `TopicNode` and `ContentNode` children.
        """
        # Create channel
        ########################################################################
        channel = self.get_channel(*args, **kwargs)  # uses self.channel_info

        # Create topics to add to your channel
        ########################################################################
        teen_topic = TopicNode(source_id="topic-teen", title="K-12 Resources")
        adult_topic = TopicNode(source_id="topic-adult",
                                title="Adult Continue Education Resources")

        channel.add_child(teen_topic)
        channel.add_child(adult_topic)

        level_map = {}
        for index, teen_level in enumerate(teen_levels):
            level_map[teen_level] = TopicNode(source_id="topic-teen-" +
                                              teen_level,
                                              title=teen_level)

        for index, adult_level in enumerate(adult_levels):
            level_map[adult_level] = TopicNode(source_id="topic-adult-" +
                                               adult_level,
                                               title=adult_level)

        for level, subtopics in parse_website().items():
            for subtopic, resources in subtopics.items():
                subtopic_node = TopicNode(source_id=subtopic, title=subtopic)
                for resource in resources:
                    resource_file = DocumentFile(path=resource['link'])
                    resource_pdf = DocumentNode(title=resource['title'],
                                                source_id=resource['title'],
                                                files=[resource_file],
                                                license=get_license(
                                                    licenses.PUBLIC_DOMAIN))
                    subtopic_node.add_child(resource_pdf)
                level_map[level].add_child(subtopic_node)

        for key, value in level_map.items():
            if key in teen_levels:
                teen_topic.add_child(value)
            elif key in adult_levels:
                adult_topic.add_child(value)

        # the `construct_channel` method returns a ChannelNode that will be
        # processed by the ricecooker framework
        return channel
Ejemplo n.º 16
0
def _build_tree(node, sourcetree):
    """
    Parse nodes given in `sourcetree` and add as children of `node`.
    """
    for child_source_node in sourcetree:
        try:
            main_file = child_source_node['files'][
                0] if 'files' in child_source_node else {}
            kind = guess_content_kind(
                path=main_file.get('path'),
                web_video_data=main_file.get('youtube_id')
                or main_file.get('web_url'),
                questions=child_source_node.get("questions"))
        except UnknownContentKindError:
            continue

        if kind == content_kinds.TOPIC:
            child_node = nodes.TopicNode(
                source_id=child_source_node["id"],
                title=child_source_node["title"],
                author=child_source_node.get("author"),
                description=child_source_node.get("description"),
                thumbnail=child_source_node.get("thumbnail"),
            )
            node.add_child(child_node)

            source_tree_children = child_source_node.get("children", [])

            _build_tree(child_node, source_tree_children)

        elif kind == content_kinds.VIDEO:
            child_node = nodes.VideoNode(
                source_id=child_source_node["id"],
                title=child_source_node["title"],
                license=get_license(child_source_node.get("license"),
                                    description="Description of license",
                                    copyright_holder=child_source_node.get(
                                        'copyright_holder')),
                author=child_source_node.get("author"),
                description=child_source_node.get("description"),
                derive_thumbnail=True,  # video-specific data
                thumbnail=child_source_node.get('thumbnail'),
            )
            add_files(child_node, child_source_node.get("files") or [])
            node.add_child(child_node)

        else:  # unknown content file format
            continue

    return node
Ejemplo n.º 17
0
def build_pdf_topics(main_topic, sections, lang_code):
    """
    Adds the documents from the sections tree to the `main_topic`.
     - CASE A = no children => add as DocumentNode
     - CASE B = has children => add as TopicNode and add all children as DocumentNode
    """
    LICENSE = get_license("CC BY-NC-SA", copyright_holder=POINTB)

    for i, section in enumerate(sections):

        # CASE A: All sections except Section 2
        if 'children' not in section:
            title = section['title']
            abspath = section['path']
            filename = os.path.basename(abspath)
            doc_node = DocumentNode(
                title=title,
                description=
                'Chapter from A GUIDE TO BECOMING A 21ST CENTURY TEACHER',
                source_id='%s-%s' % (filename, lang_code),
                license=LICENSE,
                aggregator=LE,
                language=lang_code,
                role=roles.COACH,
                files=[DocumentFile(path=abspath, language=lang_code)])
            main_topic.add_child(doc_node)

        # CASE B: Section 2
        else:
            section_topic = TopicNode(title=section['title'],
                                      source_id="pointb_section_" + str(i))
            main_topic.add_child(section_topic)

            for subsection in section['children']:
                title = subsection['title']
                abspath = subsection['path']
                filename = os.path.basename(abspath)
                subsection_doc_node = DocumentNode(
                    title=title,
                    description='',
                    source_id='%s-%s' % (filename, lang_code),
                    license=LICENSE,
                    aggregator=LE,
                    language=lang_code,
                    role=roles.COACH,
                    files=[DocumentFile(path=abspath, language=lang_code)])
                section_topic.add_child(subsection_doc_node)

    return main_topic
Ejemplo n.º 18
0
def add_documents(topic, chapters, language):
    for idx, chapter in enumerate(chapters):
        # if chapter has 'children'
        if 'children' in chapter.keys():
            doc_title = chapter['title']
            child_topic_node = TopicNode(title=doc_title,
                                         source_id=language + doc_title,
                                         thumbnail=DOWNLOADS_FOLDER +
                                         '/thumbnail.png')
            for child in chapter['children']:
                child_doc_title = child['title']
                doc_node = DocumentNode(
                    title=child_doc_title,
                    description=f'Chapter {idx} from {doc_title}',
                    source_id=language + child_doc_title,
                    license=get_license('CC BY', copyright_holder='NC-SA 4.0'),
                    language=language,
                    thumbnail=DOWNLOADS_FOLDER + '/thumbnail.png',
                    files=[
                        DocumentFile(path=child['path'], language=language)
                    ],
                )
                child_topic_node.add_child(doc_node)
            topic.add_child(child_topic_node)
        else:
            doc_title = chapter['title']
            doc_node = DocumentNode(
                title=doc_title,
                description=f'Chapter {idx} from 21ST CENTURY GUIDE',
                source_id=language + doc_title,
                license=get_license('CC BY', copyright_holder='NC-SA 4.0'),
                language=language,
                thumbnail=DOWNLOADS_FOLDER + '/thumbnail.png',
                files=[DocumentFile(path=chapter['path'], language=language)],
            )
            topic.add_child(doc_node)
    def process_file(self, download=False, filepath=None):
        self.download(download=download, base_path=filepath)
        if self.filepath:
            files = [dict(file_type=content_kinds.VIDEO, path=self.filepath)]
            files += self.subtitles_dict()

            self.node = dict(kind=content_kinds.VIDEO,
                             source_id=self.resource_url,
                             title=self.filename,
                             description='',
                             files=files,
                             language=self.lang,
                             license=get_license(
                                 licenses.CC_BY,
                                 copyright_holder=COPYRIGHT_HOLDER).as_dict())
Ejemplo n.º 20
0
def documentAssignment(material):
    docPath = "documents/" + slugify(
        material["driveFile"]["driveFile"]["title"]) + ".pdf"

    document_node = DocumentNode(
        source_id=material["driveFile"]["driveFile"]["id"],
        title=material["driveFile"]["driveFile"]["title"],
        language=getlang('en').id,
        license=get_license(licenses.CC_BY,
                            copyright_holder='Copyright holder name'),
        derive_thumbnail=True,
        thumbnail=None,
        files=[DocumentFile(path=str(docPath), language=getlang('en').id)])
    print(courseDataCopy["Assignments"])
    return document_node
 def construct_channel(self, **kwargs):
     channel = self.get_channel(**kwargs)
     potato_topic = TopicNode(title="Potatoes!", source_id="<potatos_id>")
     channel.add_child(potato_topic)
     doc_node = DocumentNode(
         title='Growing potatoes',
         description='An article about growing potatoes on your rooftop.',
         source_id='pubs/mafri-potatoe',
         license=get_license('CC BY', copyright_holder='University of Alberta'),
         language='en',
         files=[DocumentFile(path='https://www.gov.mb.ca/inr/pdf/pubs/mafri-potatoe.pdf',
                             language='en')],
     )
     potato_topic.add_child(doc_node)
     return channel
Ejemplo n.º 22
0
def videoAssignment(material):
    video_node = VideoNode(
        source_id=material["youtubeVideo"]
        ["id"],  # usually set source_id to youtube_id
        title=material["youtubeVideo"]["title"],
        license=get_license(licenses.CC_BY,
                            copyright_holder='Copyright holder name'),
        language=getlang('en').id,
        derive_thumbnail=True,  # video-specicig flag
        thumbnail=None,
        files=[
            YouTubeVideoFile(youtube_id=material["youtubeVideo"]["id"],
                             high_resolution=False,
                             language='en'),
            YouTubeSubtitleFile(youtube_id=material["youtubeVideo"]["id"],
                                language='en')
        ])
    return video_node
Ejemplo n.º 23
0
def include_video_topic(topic_node, video_data, lang_obj):
    # Include video details to the parent topic node
    video = video_data
    create_id = uuid.uuid4().hex[:12].lower()
    video_source_id = str(
        video.uid)  # For YouTube imports, set source_id to the youtube_id
    video_node = VideoNode(source_id=video_source_id,
                           title=clean_video_title(video.title, lang_obj),
                           description=video.description,
                           author=ARVIND,
                           thumbnail=video.thumbnail,
                           license=get_license("CC BY-NC",
                                               copyright_holder=ARVIND),
                           files=[
                               YouTubeVideoFile(youtube_id=video.uid,
                                                language=video.language)
                           ])
    topic_node.add_child(video_node)
Ejemplo n.º 24
0
def include_video_topic(topic_node, video_data, lang_obj):
    # Include video details to the parent topic node
    video_id = video_data.uid
    video_source_id = 'arvind-video-{0}'.format(video_id)
    video_node = VideoNode(source_id=video_source_id,
                           title=clean_video_title(video_data.title, lang_obj),
                           description=video_data.description,
                           author=ARVIND,
                           thumbnail=video_data.thumbnail,
                           license=get_license("CC BY-NC",
                                               copyright_holder=ARVIND),
                           files=[
                               YouTubeVideoFile(
                                   youtube_id=video_id,
                                   language=video_data.language,
                                   high_resolution=False,
                               )
                           ])
    topic_node.add_child(video_node)
Ejemplo n.º 25
0
 def construct_channel(self, **kwargs):
     channel = self.get_channel(**kwargs)
     potato_topic = TopicNode(title="Potatoes!", source_id="<potatoes_id>")
     channel.add_child(potato_topic)
     document_node = DocumentNode(
         title="Growing potatoes",
         description="An article about growing potatoes on your rooftop.",
         source_id="pubs/mafri-potatoe",
         license=get_license("CC BY",
                             copyright_holder="University of Alberta"),
         language="en",
         files=[
             DocumentFile(
                 path="https://www.gov.mb.ca/inr/pdf/pubs/mafri-potatoe.pdf",
                 language="en",
             )
         ],
     )
     potato_topic.add_child(document_node)
     return channel
Ejemplo n.º 26
0
def build_burmese_video_topics(topic):
    """
    """
    video_data = download_videos(LANG_CODE_MY)
    if not video_data:
        print('==> Download of Videos FAILED!')
        return False

    for i, video in enumerate(video_data):
        filepath = video.filepath
        video_node = VideoNode(
            source_id=video.uid,
            title=video.title,
            description=video.description,
            aggregator=LE,
            thumbnail=video.thumbnail,
            license=get_license("CC BY-NC-SA", copyright_holder=POINTB),
            role=roles.COACH,
            files=[VideoFile(path=filepath, language=LANG_CODE_MY)])
        topic.add_child(video_node)
    return topic
Ejemplo n.º 27
0
    def construct_channel(self, **kwargs):
        channel = self.get_channel(**kwargs)

        # Soupify goalkicker main page
        gk_url = 'https://' + self.channel_info['CHANNEL_SOURCE_DOMAIN'] + '/'
        gk_soup = get_soup(gk_url)

        # Get urls for each goalkicker book
        els_with_page_urls = gk_soup.find_all(class_='bookContainer')
        page_urls = [
            gk_url + el.find('a')['href'] for el in els_with_page_urls
        ]

        for page_url in page_urls:
            # Soupify book page
            page_soup = get_soup(page_url)

            # Extract and construct book info
            book_info = parse_book_info(page_soup)
            book_info['absolute_url'] = page_url + book_info['relative_url']

            # Add book to channel tree
            topic_node_source_id = 'topic/' + book_info['subject']
            page_topic_node = TopicNode(title=book_info['subject'],
                                        source_id=topic_node_source_id)
            channel.add_child(page_topic_node)
            doc_node = DocumentNode(
                title=book_info['title'],
                description=book_info['description'],
                source_id=book_info['source_id'],
                license=get_license('CC BY-SA',
                                    copyright_holder='Creative Commons'),
                language='en',
                files=[
                    DocumentFile(path=book_info['absolute_url'], language='en')
                ],
            )
            page_topic_node.add_child(doc_node)

        return channel
def download_video_topics(topic_node,
                          playlist_item,
                          lang_obj,
                          use_cache=True,
                          to_sheet=False):
    """
    Scrape, collect, and download the videos from playlist.
    """
    playlist_obj = RefugeeResponsePlaylist(playlist_item, use_cache)
    playlist_info = playlist_obj.get_playlist_info()
    videos = [entry['id'] for entry in playlist_info.get('children')]
    for video in playlist_info.get('children'):
        video_id = video['id']
        video_url = YOUTUBE_VIDEO_URL_FORMAT.format(video_id)
        video_source_id = 'refugee-response-{0}-{1}'.format(
            lang_obj.name, video_id)
        if video_id in VIDEO_DESCRIPTION_MAP:
            video_description = VIDEO_DESCRIPTION_MAP[video_id]
        else:
            # Exclude videos
            continue
        LOGGER.info("Video Description: '%s'", video_description)
        try:
            video_node = nodes.VideoNode(
                source_id=video_source_id,
                title=video['title'],
                description=video_description,
                author=REFUGEE_RESPONSE,
                language=lang_obj.code,
                provider=REFUGEE_RESPONSE,
                thumbnail=video['thumbnail'],
                license=licenses.get_license(
                    "CC BY-NC-ND", copyright_holder=REFUGEE_RESPONSE),
                files=[
                    files.YouTubeVideoFile(youtube_id=video_id,
                                           language=lang_obj.code)
                ])
            topic_node.add_child(video_node)
        except Exception as e:
            LOGGER.error('Error downloading this video: %s', e)
def youtubeNode(url):
    #Picking out youtube video ID from URL
    url_data = urlparse(url)
    query = urlparse.parse_qs(url_data.query)
    videoID = query["v"][0]

    r = requests.get(url).text  # grabs request of the URL

    #Get video title
    bs = bs4.BeautifulSoup(r, "html.parser")
    videoTitle = bs.find_all('title', limit=1)

    #videoTitle includes html tags, stripping them
    newTitle = str(re.sub('<.*?>', '', str(videoTitle)))

    #May have to delete if there are brackets in title
    newTitle = newTitle.replace("]", '')
    newTitle = newTitle.replace("[", '')

    #Create Video Node
    video_node = VideoNode(
        source_id=videoID,  # usually set source_id to youtube_id
        title=str(newTitle),
        license=get_license(licenses.CC_BY,
                            copyright_holder='Copyright holder name'),
        language=getlang('en').id,
        derive_thumbnail=True,  # video-specicig flag
        thumbnail=None,
        files=[
            YouTubeVideoFile(youtube_id=videoID,
                             high_resolution=False,
                             language='en'),
            YouTubeSubtitleFile(youtube_id=videoID, language='en')
        ])

    #Return Video Node
    return video_node
Ejemplo n.º 30
0
def build_english_video_topics(topic):
    """
    """
    video_data = download_videos(LANG_CODE_EN)
    if not video_data:
        print('==> Download of Videos FAILED!')
        return False

    # NOTE(cpauya: VideoNode constructor has no argument for language code?
    for i, video in enumerate(video_data):
        filepath = video.filepath
        title = video.title.replace('(English Language)', '').strip()
        video_node = VideoNode(
            source_id=video.uid,
            title=title,
            description=video.description,
            aggregator=LE,
            thumbnail=video.thumbnail,
            license=get_license("CC BY-NC-SA", copyright_holder=POINTB),
            role=roles.COACH,
            files=[VideoFile(path=filepath, language=LANG_CODE_EN)])
        topic.add_child(video_node)

    return topic