def getTalks(self): # themes loaded with a json call. Why are they not more consistant? from simplejson import loads # search HTML for the link to tedtalk's "api". It is easier to use regex here than BS. jsonUrl = URLTED+re.findall('DataSource\("(.+?)"', self.html)[0] # make a dict from the json formatted string from above url talksMarkup = loads(self.get_HTML(jsonUrl)) # parse through said dict for all the metadata for markup in talksMarkup['resultSet']['result']: talk = BeautifulSoup(markup['markup']) link = URLTED+talk.dt.a['href'] title = cleanHTML(talk.dt.a['title']) pic = resizeImage(talk.find('img', attrs = {'src':re.compile('.+?\.jpg')})['src']) yield {'url':link, 'Title':title, 'Thumb':pic}
def getTalks(self): # themes loaded with a json call. Why are they not more consistant? from simplejson import loads # search HTML for the link to tedtalk's "api". It is easier to use regex here than BS. jsonUrl = URLTED + re.findall('DataSource\("(.+?)"', self.html)[0] # make a dict from the json formatted string from above url talksMarkup = loads(self.get_HTML(jsonUrl)) # parse through said dict for all the metadata for markup in talksMarkup['resultSet']['result']: talk = BeautifulSoup(markup['markup']) link = URLTED + talk.dt.a['href'] title = cleanHTML(talk.dt.a['title']) pic = resizeImage( talk.find('img', attrs={'src': re.compile('.+?\.jpg')})['src']) yield {'url': link, 'Title': title, 'Thumb': pic}
def getNewTalks(self, url = None): """ Returns 2-tuples, first value is whether this is a folder, second is attributes dict """ if url == None: url = 'http://www.ted.com/talks/list/page/' html = self.getHTML(url) # Forward/backwards navItems = getNavItems(html) if navItems['next']: yield True, {'mode':'newTalks', 'Title': self.getLS(30020), 'url':navItems['next']} if navItems['previous']: yield True, {'mode':'newTalks', 'Title': self.getLS(30021), 'url':navItems['previous']} talkContainers = SoupStrainer(attrs = {'class':re.compile('talkMedallion')}) for talk in BeautifulSoup(html, parseOnlyThese = talkContainers): link = URLTED+talk.dt.a['href'] title = cleanHTML(talk.dt.a['title']) pic = resizeImage(talk.find('img', attrs = {'src':re.compile('.+?\.jpg')})['src']) yield False, {'mode':'playVideo', 'url':link, 'Title':title, 'Thumb':pic}
def getNewTalks(self, url=None): """ Returns 2-tuples, first value is whether this is a folder, second is attributes dict """ if url == None: url = 'http://www.ted.com/talks/list/page/' html = self.getHTML(url) # Forward/backwards navItems = getNavItems(html) if navItems['next']: yield True, { 'mode': 'newTalks', 'Title': self.getLS(30020), 'url': navItems['next'] } if navItems['previous']: yield True, { 'mode': 'newTalks', 'Title': self.getLS(30021), 'url': navItems['previous'] } talkContainers = SoupStrainer( attrs={'class': re.compile('talkMedallion')}) for talk in BeautifulSoup(html, parseOnlyThese=talkContainers): link = URLTED + talk.dt.a['href'] title = cleanHTML(talk.dt.a['title']) pic = resizeImage( talk.find('img', attrs={'src': re.compile('.+?\.jpg')})['src']) yield False, { 'mode': 'playVideo', 'url': link, 'Title': title, 'Thumb': pic }