def get_data_cached(self, url, useCache, timeout, page = True): markizalog.logDebug("getdata(%s,%s,%s,%s)..."%(url, useCache, timeout, page)) if useCache: if timeout == -1: return self.cache_request_special(url) if timeout == 1: return self.cache_request_1(url, page); if timeout == 3: return self.cache_request_3(url, page); if timeout == 8: return self.cache_request_8(url, page); return self.cache_request_30(url, page); else: return read_page(url) if page else util.request(url)
def videoLink(self, url): result = [] doc = read_page(url) main = doc.find('main') url = main.find('iframe')['src'] req = urllib2.Request(url) req.add_header('User-Agent', _UserAgent_) response = urllib2.urlopen(req) httpdata = response.read() response.close() httpdata = httpdata.replace("\r", "").replace("\n", "").replace("\t", "") thumb = re.compile( '<meta property="og:image" content="(.+?)">').findall(httpdata) thumb = thumb[0] if len(thumb) > 0 else '' desc = re.compile('<meta name="description" content="(.+?)">').findall( httpdata) desc = desc[0] if len(desc) > 0 else '' name = re.compile( '<meta property="og:title" content="(.+?)">').findall(httpdata) name = name[0] if len(name) > 0 else '?' renditions = re.compile('renditions: \[(.+?)\]').findall(httpdata) if len(renditions) > 0: renditions = re.compile('[\'\:](.+?)[\'\:]').findall(renditions[0]) bitrates = re.compile('src = {(.+?):(.+?)}').findall(httpdata) if len(bitrates) > 0: urls = re.compile('[\'\"](.+?)[\'\"]').findall(bitrates[0][1]) markizalog.logDebug('URLS=%s' % urls) for num, url in enumerate(urls): markizalog.logDebug('URL=%s\nNUM=%s' % (url, num)) if num < len(renditions): result.append(self.addLink(renditions[num], url)) else: result.append(self.addLink(name, url)) else: markizalog.logError('Chyba - Video nejde prehrat') return result
def episodes(self, url): result = [] doc = read_page(url) for article in doc.findAll( 'article', 'b-article b-article-text b-article-inline'): url = article.a['href'].encode('utf-8') title = article.a['title'].encode('utf-8') thumb = article.a.div.img['data-original'].encode('utf-8') result.append(self.addDir(title, url, 3)) for section in doc.findAll('section', 'b-main-section b-section-articles my-5'): if section.div.h3.getText(" ").encode('utf-8') == 'Celé epizódy': for article in section.findAll('article'): url = article.a['href'].encode('utf-8') title = 'Celé epizódy - ' + article.a['title'].encode( 'utf-8') thumb = article.a.div.img['data-original'].encode('utf-8') result.append(self.addDir(title, url, 3, thumb, 1)) if section.div.h3.getText(" ").encode( 'utf-8') == 'Mohlo by sa vám páčiť': for article in section.findAll('article'): url = article.a['href'].encode('utf-8') title = 'Mohlo by sa vám páčiť - ' + article.a[ 'title'].encode('utf-8') thumb = article.a.div.img['data-original'].encode('utf-8') result.append(self.addDir(title, url, 3)) if section.div.h3.getText(" ").encode('utf-8') == 'Zo zákulisia': for article in section.findAll('article'): url = article.a['href'].encode('utf-8') title = 'Zo zákulisia - ' + article.a['title'].encode( 'utf-8') thumb = article.a.div.img['data-original'].encode('utf-8') result.append(self.addDir(title, url, 3)) return result
def list(self, url): result = [] url, mode = self.getMode(url) markizalog.logDebug('list hit mode=%s, url=%s' % (mode, url)) if mode == 5: # az doc = read_page(url) for article in doc.findAll('article'): url = article.a['href'].encode('utf-8') title = article.a['title'].encode('utf-8') thumb = article.a.div.img['data-original'].encode('utf-8') result.append(self.addDir(title, url, 2)) elif mode == 2: # episodes result = self.episodes(url) pass elif mode == 9: # @TODO # top pass elif mode == 8: # @TODO # new epizody pass elif mode == 6: # @TODO # najsledovanejsie pass elif mode == 7: # @TODO # odporucane pass elif mode == 3: # @TODO # video link result = self.videoLink(url) return result
def cache_request_30(self, url, page): markizalog.logDebug("NOT CACHED REQUEST") return read_page(url) if page else util.request(url)
def cache_request_special(self, url): markizalog.logDebug("NOT CACHED REQUEST") return read_page(url)