Ejemplo n.º 1
0
 def build_video_nodes(self, base_path, content):
     videos_url = self.get_videos_urls(content)
     base_path = build_path([DATA_DIR])
     video_nodes = []
     for video_url in videos_url:
         if YouTubeResource.is_youtube(
                 video_url) and not YouTubeResource.is_channel(video_url):
             video = YouTubeResourceNode(video_url, lang=self.lang)
             video.download(download=DOWNLOAD_VIDEOS, base_path=base_path)
             yield video
Ejemplo n.º 2
0
    def get_videos_urls(self, content):
        urls = set([])
        if content is not None:
            video_urls = content.find_all(
                lambda tag: tag.name == "a" and tag.attrs.get("href", "").find(
                    "youtube") != -1 or tag.attrs.get("href", "").find(
                        "youtu.be") != -1 or tag.text.lower() == "youtube")

            for video_url in video_urls:
                urls.add(video_url.get("href", ""))

            for iframe in content.find_all("iframe"):
                url = iframe["src"]
                if YouTubeResource.is_youtube(
                        url) and not YouTubeResource.is_channel(url):
                    urls.add(YouTubeResource.transform_embed(url))
        return urls