def getVideoDetails(self, url, video_quality, subs_language=None): talk_html = self.getHTML(url) video_url, title, speaker, plot, talk_json = talk_scraper.get(talk_html, video_quality) subs = None if subs_language: subs = subtitles_scraper.get_subtitles_for_talk(talk_json, subs_language, self.logger) return title, video_url, subs, {'Director':speaker, 'Genre':'TED', 'Plot':plot, 'PlotOutline':plot}
class TedTalks: def __init__(self, getHTML, logger): self.getHTML = getHTML self.logger = logger def getVideoDetails(self, url, video_quality, subs_language=None): talk_html = self.getHTML(url) try: video_url, title, speaker, plot, talk_json = talk_scraper.get( talk_html, video_quality) except Exception, e: raise type(e)(e.message + "\nfor url '%s'" % (url)) subs = None if subs_language: subs = subtitles_scraper.get_subtitles_for_talk( talk_json, subs_language, self.logger) return title, video_url, subs, { 'Director': speaker, 'Genre': 'TED', 'Plot': plot, 'PlotOutline': plot }
def getVideoDetails(self, url, subs_language=None): """self.videoDetails={Title, Director, Genre, Plot, id, url}""" #TODO: get 'related tags' and list them under genre html = self.getHTML(url) url = "" soup = BeautifulSoup(html) #get title title = soup.find('span', attrs={'id':'altHeadline'}).string #get speaker from title speaker = title.split(':', 1)[0] #get description: plot = soup.find('p', attrs={'id':'tagline'}).string #get url #detectors for link to video in order of preference linkDetectors = [ lambda l: re.compile('High-res video \(MP4\)').match(str(l.string)), lambda l: re.compile('http://download.ted.com/talks/.+.mp4').match(str(l['href'])), ] for link in soup.findAll('a', href=True): for detector in linkDetectors: if detector(link): url = link['href'] linkDetectors = linkDetectors[:linkDetectors.index(detector)] # Only look for better matches than what we have break if url == "": # look for utub link utublinks = re.compile('http://(?:www.)?youtube.com/v/([^\&]*)\&').findall(html) for link in utublinks: url = 'plugin://plugin.video.youtube/?action=play_video&videoid=%s' % (link) subs = None if subs_language: subs = subtitles_scraper.get_subtitles_for_talk(soup, subs_language, self.logger) return title, url, subs, {'Director':speaker, 'Genre':'TED', 'Plot':plot, 'PlotOutline':plot}