예제 #1
0
    def getVideoDetails(self, url, video_quality, subs_language=None):
        talk_html = self.getHTML(url)
        video_url, title, speaker, plot, talk_json = talk_scraper.get(talk_html, video_quality)

        subs = None
        if subs_language:
            subs = subtitles_scraper.get_subtitles_for_talk(talk_json, subs_language, self.logger)

        return title, video_url, subs, {'Director':speaker, 'Genre':'TED', 'Plot':plot, 'PlotOutline':plot}
예제 #2
0
class TedTalks:
    def __init__(self, getHTML, logger):
        self.getHTML = getHTML
        self.logger = logger

    def getVideoDetails(self, url, video_quality, subs_language=None):
        talk_html = self.getHTML(url)
        try:
            video_url, title, speaker, plot, talk_json = talk_scraper.get(
                talk_html, video_quality)
        except Exception, e:
            raise type(e)(e.message + "\nfor url '%s'" % (url))

        subs = None
        if subs_language:
            subs = subtitles_scraper.get_subtitles_for_talk(
                talk_json, subs_language, self.logger)

        return title, video_url, subs, {
            'Director': speaker,
            'Genre': 'TED',
            'Plot': plot,
            'PlotOutline': plot
        }
    def getVideoDetails(self, url, subs_language=None):
        """self.videoDetails={Title, Director, Genre, Plot, id, url}"""
        #TODO: get 'related tags' and list them under genre
        html = self.getHTML(url)
        url = ""
        soup = BeautifulSoup(html)
        #get title
        title = soup.find('span', attrs={'id':'altHeadline'}).string
        #get speaker from title
        speaker = title.split(':', 1)[0]
        #get description:
        plot = soup.find('p', attrs={'id':'tagline'}).string
        #get url
        #detectors for link to video in order of preference
        linkDetectors = [
            lambda l: re.compile('High-res video \(MP4\)').match(str(l.string)),
            lambda l: re.compile('http://download.ted.com/talks/.+.mp4').match(str(l['href'])),
        ]
        for link in soup.findAll('a', href=True):
            for detector in linkDetectors:
                if detector(link):
                    url = link['href']
                    linkDetectors = linkDetectors[:linkDetectors.index(detector)] # Only look for better matches than what we have
                    break

        if url == "":
            # look for utub link
            utublinks = re.compile('http://(?:www.)?youtube.com/v/([^\&]*)\&').findall(html)
            for link in utublinks:
                url = 'plugin://plugin.video.youtube/?action=play_video&videoid=%s' % (link)

        subs = None
        if subs_language:
            subs = subtitles_scraper.get_subtitles_for_talk(soup, subs_language, self.logger)

        return title, url, subs, {'Director':speaker, 'Genre':'TED', 'Plot':plot, 'PlotOutline':plot}