def display_lectures(self, url): """displays the lectures for a given course url""" html = urlread(url) #get the div which contains all of the <li> lecture tags div_tag = BS(html, parseOnlyThese=SS('div', {'class': 'results-list'})) #parse the name, url, desc, tn for each lecture dirs = [{'name': li.h4.a.text, 'htmlurl': self._urljoin(li.h4.a['href']), 'info': {'plot': li.p.text, 'title': li.h4.a.text}, 'tn':self._urljoin( li.find('img', {'class': 'thumb-144'})['src'])} for li in div_tag('li')] #for each dir, download the lecture's html page and parse the video url self.dp = DialogProgress(self.getString(30000), line1=self.getString(30101), num_steps=(len(dirs))) urls = [d['htmlurl'] for d in dirs] responses = async_urlread(urls, self.dp) [d.update({'url': self._get_video_url(response)}) for d, response in zip(dirs, responses)] #filter out lectures that don't have urls, currently a fix for a chem #course which contains a bad link to a lecture dirs = filter(lambda d: d['url'] != None, dirs) self.dp.update(100) self.dp.close() self.add_videos(dirs)
def _get_courses_lectures(self, htmls): """returns a tuple of lists: (courses_list, lectures_list). It takes the html source(s) of a topic page and parses all results by visiting each page of results""" if type(htmls).__name__ == 'str': htmls = [htmls] #Each topic page displays only 12 results to a page. So to get all #results for a topic, parse all page results urls from the topic page, #then download each of the extra pages of results, then parse the video #results. pagination_urls = [url for html in htmls for url in self._get_pagination_urls(html)] #Download every pagination page. If a dialog progress box exists, #update the step for each increment. Allocate 50% of the bar for #downloading the pagination urls. The other 50% is allocated to #downloading all of the topic pages when choosing 'View All' for a #subject. if self.dp and len(pagination_urls) != 0: self.dp.step = int(50 / len(pagination_urls)) page_htmls = async_urlread(pagination_urls, self.dp) else: page_htmls = async_urlread(pagination_urls) #extend the list of pagination htmls with the given htmls page_htmls.extend(htmls) #get a complete list of video results by parsing results from all pages results = self._get_video_results(page_htmls) #filter courses and lectures so they can be displayed in groups courses = filter(lambda r: '/courses/' in r['url'], results) lectures = filter(lambda r: '/lectures/' in r['url'], results) #add mode argument to courses, lectures don't need it since they will #contain a direct url to the video [c.update({'mode': 3}) for c in courses] #get the actual URL for the video for each lecture, this ensures that #the display link plays a video, and doesn't go to another level of #directory listings [l.update({'url': self._get_video_url(l['url']), 'name': self.getString(30103) + l['name']}) for l in lectures] #filter out lectures with no video url. This is a result of bad regex #parsing, crappy fix... lectures = [l for l in lectures if l['url'] is not None] return courses, lectures
def display_allresults(self, url): """displays all results for a given url, used on a subject page t lis all video results without having to drill down into each category""" #dp = self.xbmcgui.DialogProgress() html = urlread(url) #get the div which contains all of the topic <a> tags div_topics = BS(html, parseOnlyThese=SS('div', {'class': 'results-side'})) #create a list of urls for all topics topic_urls = [self._urljoin(a['href']) for a in div_topics('a') if a.text.startswith('Online') == False and 'Credit' not in a.text and not a.text.startswith('All')] self.dp = DialogProgress(self.getString(30000), line1=self.getString(30102), num_steps=(2 * len(topic_urls))) topic_htmls = async_urlread(topic_urls, self.dp) courses, lectures = self._get_courses_lectures(topic_htmls) self.dp.update(100) self.dp.close() courses = sorted(courses, key=lambda c: c['name']) lectures = sorted(lectures, key=lambda l: l['name']) self.add_dirs(courses, end=False) self.add_videos(lectures)