def __retrieveTVShows__(tvShowsUrl): tvShows = [] if tvShowsUrl is None: return tvShows tvShowsUrl = BASE_WSITE_URL + tvShowsUrl contentDiv = BeautifulSoup.SoupStrainer('div', {'id':'forumbits', 'class':'forumbits'}) soup = HttpClient().getBeautifulSoup(url=tvShowsUrl, parseOnlyThese=contentDiv) for tvShowTitleTag in soup.findAll('h2', {'class':'forumtitle'}): aTag = tvShowTitleTag.find('a') tvshowUrl = str(aTag['href']) if tvshowUrl[0:4] != "http": tvshowUrl = BASE_WSITE_URL + '/' + tvshowUrl tvshowName = aTag.getText() if not re.search('Past Shows', tvshowName, re.IGNORECASE): tvShows.append({"name":HttpUtils.unescape(tvshowName), "url":tvshowUrl}) return tvShows
def __retrieveChannels__(tvChannels, dtUrl, channelType): contentDiv = BeautifulSoup.SoupStrainer('div', {'class':re.compile(r'\bhentry\b')}) soup = HttpClient().getBeautifulSoup(url=dtUrl, parseOnlyThese=contentDiv) for tvChannelTag in soup.div.findAll('div', recursive=False): try: tvChannel = {} running_tvshows = [] finished_tvshows = [] tmp_tvshows_list = None firstRow = False for tag in tvChannelTag.findAll(re.compile('div|a'), recursive=False): if tag.name == 'div' and tag.get('class') == 'nav_up': continue if not firstRow: channelImg = '' if(tag.find('img').has_key('src')): channelImg = str(tag.find('img')['src']) else: channelImg = str(tag.find('img')['file']) channelName = re.compile(BASE_WSITE_URL + '/category/(tv-serials|pakistan-tvs)/(.+?)/').findall(str(tag.find('a')['href']))[0][1] channelName = string.upper(channelName.replace('-', ' ')) Logger.logDebug(channelName) tvChannels[channelName] = tvChannel tvChannel['iconimage'] = channelImg tvChannel['channelType'] = channelType firstRow = True else: if tag.name == 'div' and tag.get('class') == 'dtLink': txt = tag.getText() Logger.logDebug(txt) if re.search('running', txt, flags=re.IGNORECASE): tmp_tvshows_list = running_tvshows tvChannel['running_tvshows'] = running_tvshows elif re.search('finished', txt, flags=re.IGNORECASE): tmp_tvshows_list = finished_tvshows tvChannel['finished_tvshows'] = finished_tvshows else: Logger.logWarning('UNKNOWN TV SHOW CATEGORY') elif tag.name == 'a': tvshowUrl = str(tag['href']) tvshowName = tag.getText().encode('utf-8') Logger.logDebug(tvshowName) tmp_tvshows_list.append({'name':HttpUtils.unescape(tvshowName), 'url':tvshowUrl}) except Exception, e: Logger.logFatal(e) Logger.logDebug(tvChannelTag)
def __retrieveTVShowEpisodes__(threads, response_obj): if threads is None: return for aTag in threads.findAll('a', {'class':re.compile(r'\btitle\b')}): episodeName = aTag.getText() if not re.search(r'\b(Watch|Episode|Video|Promo)\b', episodeName, re.IGNORECASE): pass else: item = ListItem() item.add_request_data('episodeName', HttpUtils.unescape(episodeName)) episodeUrl = str(aTag['href']) if not episodeUrl.lower().startswith(BASE_WSITE_URL): if episodeUrl[0] != '/': episodeUrl = '/' + episodeUrl episodeUrl = BASE_WSITE_URL + episodeUrl item.add_request_data('episodeUrl', episodeUrl) item.set_next_action_name('Episode_VLinks') xbmcListItem = xbmcgui.ListItem(label=episodeName) item.set_xbmc_list_item_obj(xbmcListItem) response_obj.addListItem(item)
def __retrieveChannels__(tvChannels, dtUrl, channelType): contentDiv = BeautifulSoup.SoupStrainer('div', {'class':'copy fix'}) soup = HttpClient().getBeautifulSoup(url=dtUrl, parseOnlyThese=contentDiv) for tvChannelTag in soup.findAll('tbody'): try: tvChannel = {} running_tvshows = [] finished_tvshows = [] tmp_tvshows_list = None firstRow = False for trTag in tvChannelTag.findAll('tr', recursive=False): if not firstRow: channelImg = str(trTag.find('img')['src']) channelName = re.compile(BASE_WSITE_URL + '/category/(tv-serials|pakistan-tvs)/(.+?)/').findall(str(trTag.find('a')['href']))[0][1] channelName = string.upper(channelName.replace('-', ' ')) tvChannels[channelName] = tvChannel tvChannel['iconimage'] = channelImg tvChannel['channelType'] = channelType firstRow = True else: divTag = trTag.find('div') if divTag != None: txt = divTag.getText() if re.search('running', txt, flags=re.IGNORECASE): tmp_tvshows_list = running_tvshows tvChannel['running_tvshows'] = running_tvshows elif re.search('finished', txt, flags=re.IGNORECASE): tmp_tvshows_list = finished_tvshows tvChannel['finished_tvshows'] = finished_tvshows else: print 'UNKNOWN TV SHOW CATEGORY' else: for aTag in trTag.findAll('a'): tvshowUrl = str(aTag['href']) tvshowName = aTag.getText() tmp_tvshows_list.append({'name':HttpUtils.unescape(tvshowName), 'url':tvshowUrl}) except: print 'Failed to load a tv channel links.'
def __retrieveTVShowEpisodes__(threads, response_obj): if threads is None: return aTags = threads.findAll('a', {'class':re.compile(r'\btitle\b')}) videoEpisodes = [] for aTag in aTags: episodeName = aTag.getText() if not re.search(r'\b(Watch|Episode|Video|Promo)\b', episodeName, re.IGNORECASE): pass else: videoEpisodes.append(aTag) if len(videoEpisodes) == 0: videoEpisodes = aTags for aTag in videoEpisodes: episodeName = aTag.getText() item = ListItem() titleInfo = HttpUtils.unescape(episodeName) movieInfo = re.compile("(.+?)\((\d+)\)").findall(titleInfo) if(len(movieInfo) >= 1 and len(movieInfo[0]) >= 2): title = unicode(movieInfo[0][0].rstrip()).encode('utf-8') year = unicode(movieInfo[0][1]).encode('utf-8') item.add_moving_data('movieTitle', title) item.add_moving_data('movieYear', year) item.add_request_data('episodeName', titleInfo) episodeUrl = str(aTag['href']) if not episodeUrl.lower().startswith(BASE_WSITE_URL): if episodeUrl[0] != '/': episodeUrl = '/' + episodeUrl episodeUrl = BASE_WSITE_URL + episodeUrl item.add_request_data('episodeUrl', episodeUrl) item.set_next_action_name('Episode_VLinks') xbmcListItem = xbmcgui.ListItem(label=episodeName) item.set_xbmc_list_item_obj(xbmcListItem) response_obj.addListItem(item)