def scrape_movie(self, title, year, imdb, debrid=False): try: query = '%s %s' % (title, year) query = self.search_link % (urllib.quote_plus(query)) query = urlparse.urljoin(self.base_link, query) result = client.request(query, error=True) items = client.parseDOM(result, "item") cleanedTitle = cleantitle.get(title) for item in items: linkTitle = client.parseDOM(item, "title")[0] if cleanedTitle in cleantitle.get(linkTitle): url = client.parseDOM(item, "a", attrs={"rel": "nofollow"}, ret="href")[0] break return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def scrape_movie(self, title, year, imdb, debrid=False): try: query = '%s %s' % (title, year) query = self.search_link % (urllib.quote_plus(query)) query = urlparse.urljoin(self.base_link, query) result = client.request(query) result = result.decode('iso-8859-1').encode('utf-8') result = client.parseDOM(result, "item") cleanedTitle = cleantitle.get(title) for item in result: linkTitle = client.parseDOM(item, "title")[0] if cleanedTitle == cleantitle.get(linkTitle): url = client.parseDOM(item, "link")[0] break return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: query = '%s %s' % (title, episode) query = self.search_link % (urllib.quote_plus(query)) result = client.request(self.base_link + query) result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n', '').replace('\t', '') items = client.parseDOM(result, 'item') cleanedTitle = cleantitle.get('%s %s' % (title, episode)) for item in items: linkTitle = client.parseDOM(item, 'title')[0] linkTitle = cleantitle.get(linkTitle).replace( 'watchonlineepisodehd', '') if cleanedTitle == linkTitle: url = client.parseDOM(item, "link")[0] break return self.sources(client.replaceHTMLCodes(url)) except: return self.srcs
def scrape_movie(self, title, year, imdb, debrid = False): try: langMap = {'hi':'hindi', 'ta':'tamil', 'te':'telugu', 'ml':'malayalam', 'kn':'kannada', 'bn':'bengali', 'mr':'marathi', 'pa':'punjabi'} lang = 'http://www.imdb.com/title/%s/' % imdb lang = client.request(lang) lang = re.findall('href\s*=\s*[\'|\"](.+?)[\'|\"]', lang) lang = [i for i in lang if 'primary_language' in i] lang = [urlparse.parse_qs(urlparse.urlparse(i).query) for i in lang] lang = [i['primary_language'] for i in lang if 'primary_language' in i] lang = langMap[lang[0][0]] q = self.search_link % (lang, urllib.quote_plus(title)) q = urlparse.urljoin(self.base_link, q) t = cleantitle.get(title) r = self.request(q) r = client.parseDOM(r, 'li') r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'h3'), client.parseDOM(i, 'div', attrs = {'class': 'info'})) for i in r] r = [(i[0][0], i[1][0], i[2][0]) for i in r if i[0] and i[1] and i[2]] r = [(re.findall('(\d+)', i[0]), i[1], re.findall('(\d{4})', i[2])) for i in r] r = [(i[0][0], i[1], i[2][0]) for i in r if i[0] and i[2]] r = [i[0] for i in r if t == cleantitle.get(i[1]) and year == i[2]][0] url = str(r) return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def scrape_movie(self, title, year, imdb, debrid=False): try: query = '%s' % (title) query = self.search_link % (urllib.quote_plus(query)) query = urlparse.urljoin(self.base_link, query) cleanedTitle = cleantitle.get(title) result = client.request(query) result = result.decode('iso-8859-1').encode('utf-8') items = client.parseDOM(result, "item") for item in items: linkTitle = client.parseDOM(item, 'title')[0] try: parsed = re.compile('(.+?) \((\d{4})\) ').findall( linkTitle)[0] parsedTitle = parsed[0] parsedYears = parsed[1] except: parsedTitle = '' pass if cleanedTitle == cleantitle.get(parsedTitle): url = client.parseDOM(item, "link")[0] return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def scrape_movie(self, title, year, imdb, debrid=False): try: query = self.moviesearch_link % urllib.quote_plus( cleantitle.query(title)) query = urlparse.urljoin(self.base_link, query) result = str(proxy.request(query, 'item')) if 'page=2' in result or 'page%3D2' in result: result += str(proxy.request(query + '&page=2', 'item')) result = client.parseDOM(result, 'div', attrs={'class': 'item'}) title = 'watchputlocker' + cleantitle.get(title) years = [ '(%s)' % str(year), '(%s)' % str(int(year) + 1), '(%s)' % str(int(year) - 1) ] result = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title')) for i in result] result = [(i[0][0], i[1][0]) for i in result if len(i[0]) > 0 and len(i[1]) > 0] result = [i for i in result if any(x in i[1] for x in years)] r = [(proxy.parse(i[0]), i[1]) for i in result] match = [ i[0] for i in r if title == cleantitle.get(i[1]) and '(%s)' % str(year) in i[1] ] match2 = [i[0] for i in r] match2 = [x for y, x in enumerate(match2) if x not in match2[:y]] if match2 == []: return for i in match2[:5]: try: if len(match) > 0: url = match[0] break r = proxy.request(urlparse.urljoin(self.base_link, i), 'link_ite') r = re.findall('(tt\d+)', r) if imdb in r: url = i break except: pass url = re.findall('(?://.+?|)(/.+)', url)[0] url = client.replaceHTMLCodes(url) url = url.encode('utf-8') return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def sources(self, url): try: logger.debug('SOURCES URL %s' % url, __name__) if url == None: return [] data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] cleanedTitle = cleantitle.get(title) hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) r = client.request(url) posts = client.parseDOM(r, 'item') items = [] for post in posts: try: t = client.parseDOM(post, 'title')[0] post = post.replace('\n','').replace('\t','') post = re.compile('<span style="color: #ff0000">Single Link</b></span><br />(.+?)<span style="color: #ff0000">').findall(post)[0] u = re.findall('<a href="(http(?:s|)://.+?)">', post) items += [(t, i) for i in u] except: pass for item in items: try: name = client.replaceHTMLCodes(item[0]) linkTitle = re.sub('(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name) if not cleanedTitle == cleantitle.get(linkTitle): raise Exception() year = re.findall('[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper() if not year == hdlr: raise Exception() self.source(item) except: pass logger.debug('SOURCES [%s]' % self.srcs, __name__) return self.srcs except: return self.srcs
def sources(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: quality = '' srcs = [] if url == None: return srcs url = urlparse.urljoin(self.base_link, url) result = proxy.request(url, 'link_ite') links = client.parseDOM(result, 'table', attrs={'class': 'link_ite.+?'}) for i in links: try: url = client.parseDOM(i, 'a', ret='href') url = [x for x in url if 'gtfo' in x][-1] url = proxy.parse(url) url = urlparse.parse_qs( urlparse.urlparse(url).query)['gtfo'][0] url = base64.b64decode(url) url = client.replaceHTMLCodes(url) url = url.encode('utf-8') host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(url.strip().lower()).netloc)[0] host = host.encode('utf-8') quality = client.parseDOM(i, 'div', attrs={'class': 'quality'}) if any(x in ['[CAM]', '[TS]'] for x in quality): quality = 'CAM' else: quality = 'SD' quality = quality.encode('utf-8') srcs.append({ 'source': host, 'parts': '1', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': False }) except: pass logger.debug('SOURCES [%s]' % srcs, __name__) return srcs except: return srcs
def source(self, item): try: try: if '720p' in item: quality = 'HD' else: quality = 'SD' urls = client.parseDOM(item, "a", ret="href") for j in range(0, len(urls)): videoID = self.getVideoID(urls[j]) result = client.request(self.info_link % videoID) result = result.decode('iso-8859-1').encode('utf-8') item = client.parseDOM( result, name="div", attrs={ "style": "float:none;height:700px;margin-left:200px" })[0] rUrl = re.compile( '(SRC|src|data-config)=[\'|\"](.+?)[\'|\"]').findall( item)[0][1] if not rUrl.startswith('http:') and not rUrl.startswith( 'https:'): rUrl = '%s%s' % ('http:', rUrl) urls[j] = rUrl host = client.host(urls[0]) url = "##".join(urls) self.srcs.append({ 'source': host, 'parts': str(len(urls)), 'quality': quality, 'scraper': self.name, 'url': url, 'direct': False }) urls = [] except Exception as e: logger.error(e) pass except: return self.srcs
def sources(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: srcs = [] if url == None: return srcs #url = urlparse.urljoin(self.base_link, url) try: result = client.request(url, referer=self.base_link) except: result = '' result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n', '').replace('\t', '') result = client.parseDOM( result, "div", attrs={"class": "entry-content clearfix single-post-content"}) result = client.parseDOM(result, "p", attrs={"style": "text-align: center;"}) items = client.parseDOM(result, "a", ret="href") for item in items: try: url = item if 'digibolly.se' in url: result = client.request(url) url = re.findall('<iframe src="(.+?)"', result, re.IGNORECASE)[0] host = client.host(url) srcs.append({ 'source': host, 'parts': '1', 'quality': 'HD', 'scraper': self.name, 'url': url, 'direct': False }) except: pass logger.debug('SOURCES [%s]' % srcs, __name__) return srcs except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) return srcs
def sources(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: if url == None: return self.srcs try: result = client.request(url, referer=self.base_link) except: result = '' items = client.parseDOM(result, "source", ret="src") for item in items: try: url = item host = client.host(url) self.srcs.append({ 'source': host, 'parts': '1', 'quality': 'HD', 'scraper': self.name, 'url': url, 'direct': False }) except: pass logger.debug('SOURCES [%s]' % self.srcs, __name__) return self.srcs except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) return self.srcs
def sources(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: srcs = [] if url == None: return srcs url = urlparse.urljoin(self.base_link, url) try: result = client.request(url, referer=self.base_link) except: result = '' result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n','').replace('\t','') items = client.parseDOM(result, "div", attrs={"class":"entry-content"}) for item in items: try : url = re.compile('(SRC|src|data-config)=[\'|\"](.+?)[\'|\"]').findall(item)[0][1] host = client.host(url) srcs.append({'source': host, 'parts' : '1', 'quality': 'HD', 'scraper': self.name, 'url': url, 'direct':False}) except : pass logger.debug('SOURCES [%s]' % srcs, __name__) return srcs except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) return srcs
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: query = '%s %s' % (title, episode) query = self.search_link % (urllib.quote_plus(query)) try: result = client.request(self.base_link + query) except: result = '' result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n', '').replace('\t', '') items = client.parseDOM(result, 'content:encoded')[0] items = re.compile('class=\"single-heading\">(.+?)<span').findall( items) for i in range(0, len(items)): self.source(items[i]) return self.srcs except: return self.srcs
def sources(self, url): try: logger.debug('SOURCES URL %s' % url, __name__) quality = 'HD' srcs = [] try: result = client.request(url) except: result = '' result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n','').replace('\t','') result = client.parseDOM(result, "div", attrs={"class": "single-post-video"})[0] items = re.compile('(SRC|src|data-config)=[\'|\"](.+?)[\'|\"]').findall(result) for item in items: if item[1].endswith('png'): continue host = client.host(item[1]) url = item[1] parts = [url] srcs.append({'source':host, 'parts': len(parts), 'quality':quality,'scraper':self.name,'url':"##".join(parts), 'direct':False}) logger.debug('SOURCES [%s]' % srcs, __name__) return srcs except: return srcs
def resolve(url): try: result = client.request(url) dek = EnkDekoder.dekode(result) if not dek == None: url = client.parseDOM(dek, "param", attrs={"name": "flashvars"}, ret="value")[0] else: dek = result url = re.compile('file*:*"(http.+?)"').findall(dek)[0] if re.search(';video_url', url): url = re.findall(';video_url=(.+?)&', url)[0] elif re.search('iframe src=', url): url = re.findall('<iframe src="(.+?)"', url)[0] url = url.replace('_ipod.mp4', '.flv') url = url.replace('preview', 'edit') logger.debug('URL [%s]' % url, __name__) return url except: return False
def source(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: srcs = [] if url == None: return srcs result, response_code, response_headers, headers, cookie = client.request(url, output='extended') result = result.replace('\n','').replace('\t','').replace('\r','') referer = headers.get('Referer') result = client.parseDOM(result, 'div', attrs={"class":"detail ls_item"})[0] link = client.parseDOM(result, 'div', attrs={"class":"loaer_detai"})[0] link = client.parseDOM(link, 'a', ret='href')[0] link = urlparse.urljoin(referer, link) result = client.request(link) result = re.compile('sources:\s\[(.+?)\]').findall(result)[0] result = '[%s]' % result result = json.loads(result) for item in result: url = item.get('file') label = item.get('label') if '1080p' in label: quality = '1080p' elif '720p' in label : quality = 'HD' elif '360p' in label: quality = 'SD' else: quality = 'SCR' host = client.host(url) srcs.append({'source': host, 'parts' : '1', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': False}) logger.debug('SOURCES [%s]' % srcs, __name__) return srcs except : return srcs
def source(self, item): title = item[0] links = item[1] urls = [] if '720p' in title: quality = 'HD' else: quality = 'SD' parts = client.parseDOM(links, "a", ret="href") srcs = [] for part in parts: try: part = client.request(part) part = part.decode('iso-8859-1').encode('utf-8') part = client.parseDOM( part, "td", attrs={ "style": "vertical-align:middle;text-align:center;" })[0] tUrl = re.compile('(SRC|src|data-config)=[\'|\"](.+?)[\'|\"]' ).findall(part)[0][1] host = client.host(tUrl) urls.append(tUrl) except Exception as e: logger.error(e) pass url = "##".join(urls) srcs.append({ 'source': host, 'parts': len(urls), 'quality': quality, 'scraper': self.name, 'url': url, 'direct': False }) return srcs
def sources(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: quality = '' srcs = [] if url == None: return srcs result = client.request(url) result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n', '') result = client.parseDOM(result, "div", attrs={"class": "post-content bottom"})[0] items = client.parseDOM(result, "p") hosts = client.parseDOM(result, "span", attrs={"style": "color: red;"}) links = [] for item in items: if 'a href' in item: links.append(item) items = zip(hosts, links) for item in items: self.srcs.extend(self.source(item)) logger.debug('SOURCES [%s]' % self.srcs, __name__) return self.srcs except Exception as e: logger.error(e) return self.srcs
def sources(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: srcs = [] if url == None: return srcs url = urlparse.urljoin(self.base_link, url) try: result = client.request(url, referer=self.base_link) except: result = '' result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n', '').replace('\t', '') result = client.parseDOM(result, "div", attrs={"id": "list-dl"}) items = client.parseDOM(result, "a", ret="href") for item in items: try: url = item host = client.host(url) srcs.append({ 'source': host, 'parts': '1', 'quality': 'HD', 'scraper': self.name, 'url': url, 'direct': False }) except: pass logger.debug('SOURCES [%s]' % srcs, __name__) return srcs except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) return srcs
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid = False): try: query = '%s %s' % (title, episode) query = self.search_link % (urllib.quote_plus(query)) try: result = client.request(self.base_link + query) except: result = '' result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n','').replace('\t','') result = client.parseDOM(result, 'content:encoded')[0] url = client.parseDOM(result, "a", attrs={"rel": "nofollow"}, ret="href")[0] if url == None: pass else: return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) return []
def desiRulezCache(self): try: links = [self.base_link_1, self.base_link_2, self.base_link_3] for base_link in links: try: base_link = '%s/forums/20-Latest-Exclusive-Movie-HQ' % base_link result = client.request(base_link, timeout=2) if result == None: continue else: break except: pass result = result.decode('iso-8859-1').encode('utf-8') result = client.parseDOM(result, "li", attrs={"class": "threadbit hot"}) movies = [] for link in result: link = client.parseDOM(link, "h3", attrs={"class": "threadtitle"})[0] url = client.parseDOM(link, "a", ret="href")[0] linkTitle = client.parseDOM(link, "a")[0] parsed = re.compile('(.+) [\(](\d{4})[/)] ').findall( linkTitle)[0] title = parsed[0].encode('ascii', 'ignore') year = parsed[1] movies.append({'url': url, 'title': title, 'year': year}) return movies except: import traceback traceback.print_exc() pass
def resolve(url): try: url = url + '#' url = re.compile('http://www.mediaplaybox.com/video/(.+?)#').findall(url)[0] url = 'http://www.mediaplaybox.com/mobile?vinf=%s' % url result = client.request(url, debug=True) try : url = client.parseDOM(result, "div", attrs = {"class": "divider"})[0] url = client.parseDOM(url, "a", ret ="href") url = url[0] url = url.replace('_ipod.mp4', '.flv') return url except: pass try :url = client.parseDOM(result, "meta", attrs={"itemprop": "contentURL"}, ret="content")[0] except: pass logger.debug('URL [%s]' % url, __name__) return url except: return False
def resolve(url): try: rUrl = None hdUrl = None try: result = client.request(url) rUrl = client.parseDOM(result, name="source", ret="src")[0] videoId = getVideoID(rUrl) rUrl = 'http://www.apnasave.in/media/player/config_embed.php?vkey=%s' % videoId result = client.request(rUrl) try: hdUrl = client.parseDOM(result, name="hd")[0] url = hdUrl except: pass if hdUrl == None: url = client.parseDOM(result, name="src")[0] except: pass logger.debug('URL [%s]' % url, __name__) return url except: return False
def resolve(url): res = ['720', '480', '360', '240'] try: result = client.request(url) for r in res: try: url = client.parseDOM(result, name="source", attrs={"res": r}, ret="src")[0] break except: pass logger.debug('URL [%s]' % url, __name__) return url except Exception as e: return False
def sources(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: srcs = [] if url == None: return srcs result = client.request(url) result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n', '') result = client.parseDOM( result, name="div", attrs={"class": "entry-content rich-content"})[0] result = client.parseDOM(result, name="p") try: quality = host = '' urls = [] result = result[1::] serversList = result[::2] linksList = result[1::2] for i in range(0, len(serversList)): try: links = linksList[i] urls = client.parseDOM(links, name="a", ret="href") for j in range(0, len(urls)): try: item = client.request(urls[j], mobile=True) item = client.parseDOM(item, "td")[0] item = re.compile( '(SRC|src|data-config)=\"(.+?)\"').findall( item)[0][1] urls[j] = item except: pass if len(urls) > 1: url = "##".join(urls) else: url = urls[0] host = client.host(urls[0]) srcs.append({ 'source': host, 'parts': str(len(urls)), 'quality': quality, 'scraper': self.name, 'url': url, 'direct': False }) except: pass except: pass logger.debug('SOURCES [%s]' % srcs, __name__) return srcs except: return srcs
def scrape_movie(self, title, year, imdb, debrid=False): try: query = '%s %s' % (title, year) query = self.search_link % (urllib.quote_plus(query)) query = urlparse.urljoin(self.base_link, query) result = client.request(query) result = result.decode('iso-8859-1').encode('utf-8') posts = client.parseDOM(result, "item") items = [] for post in posts: try: t = client.parseDOM(post, 'title')[0] if 'trailer' in cleantitle.get(t): raise Exception() try: s = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)(?:GB|GiB|MB|MiB|mb|gb))', t)[0] except: s = '0' i = client.parseDOM(post, 'link')[0] items += [{'name': t, 'url': i, 'size': s}] except: pass title = cleantitle.get(title) for item in items: try: name = item.get('name') t = re.sub( '(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name) if cleantitle.get(title) == cleantitle.get(t): y = re.findall( '[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper() if not y == year: raise Exception() fmt = re.sub( '(.+)(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*)(\.|\)|\]|\s)', '', name.upper()) fmt = re.split('\.|\(|\)|\[|\]|\s|\-', fmt) fmt = [i.lower() for i in fmt] if any( i.endswith(('subs', 'sub', 'dubbed', 'dub')) for i in fmt): raise Exception() if any(i in ['extras'] for i in fmt): raise Exception() if '1080p' in fmt: quality = '1080p' elif '720p' in fmt: quality = 'HD' else: quality = 'SD' if any(i in ['dvdscr', 'r5', 'r6'] for i in fmt): quality = 'SCR' elif any(i in [ 'camrip', 'tsrip', 'hdcam', 'hdts', 'dvdcam', 'dvdts', 'cam', 'telesync', 'ts' ] for i in fmt): quality = 'CAM' info = [] if '3d' in fmt: info.append('3D') try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)(?:GB|GiB|MB|MiB|mb|gb))', item.get('size'))[-1] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except: pass if any(i in ['hevc', 'h265', 'x265'] for i in fmt): info.append('HEVC') info = ' | '.join(info) movieurl = item.get('url') result = client.request(movieurl) result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n', '').replace('\t', '') result = client.parseDOM(result, 'div', attrs={'class': 'entry'})[0] links = client.parseDOM(result, 'a', attrs={'target': '_blank'}, ret='href') for link in links: if 'http' in link: host = client.host(link) self.srcs.append({ 'source': host, 'parts': '1', 'quality': quality, 'scraper': self.name, 'url': link, 'direct': False, 'info': info }) except: pass logger.debug('SOURCES [%s]' % self.srcs, __name__) return self.srcs except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def episodes(self, title, url): try: episodes = [] links = [self.base_link_1, self.base_link_2, self.base_link_3] tvshowurl = url for base_link in links: try: url = url.replace(base_link, '') result = client.request(base_link + '/' + url) if result == None: raise Exception() except: result = '' if 'threadtitle' in result: break rawResult = result.decode('windows-1252').encode('utf-8') result = client.parseDOM( rawResult, "h3", attrs={"class": "title threadtitle_unread"}) result += client.parseDOM(rawResult, "h3", attrs={"class": "threadtitle"}) for item in result: name = client.parseDOM(item, "a", attrs={"class": "title"}) name += client.parseDOM( item, "a", attrs={"class": "title threadtitle_unread"}) if type(name) is list: name = name[0] url = client.parseDOM(item, "a", ret="href") if type(url) is list: url = url[0] if "Online" not in name: continue name = name.replace(title, '') if not title == 'awards': try: name = re.compile('([\d{1}|\d{2}]\w.+\d{4})').findall( name)[0] except: pass name = name.strip() try: season = title.lower() season = re.compile('[0-9]+').findall(season)[0] except: season = '0' episodes.append({ 'season': season, 'tvshowtitle': title, 'title': name, 'name': name, 'url': url, 'provider': 'desirulez', 'tvshowurl': tvshowurl }) next = client.parseDOM(rawResult, "span", attrs={"class": "prev_next"}) next = client.parseDOM(next, "a", attrs={"rel": "next"}, ret="href")[0] episodes[0].update({'next': next}) except Exception as e: logger.error(e) logger.debug(episodes, __name__) return episodes
def tvshows(self, name, url): try: result = '' shows = [] links = [self.base_link_1, self.base_link_2, self.base_link_3] for base_link in links: try: result, response_code, response_headers, headers, cookie = client.request( '%s/%s' % (base_link, url), output='extended') if result == None: raise Exception() except: result = '' if 'forumtitle' in result: break #result = result.decode('ISO-8859-1').encode('utf-8') result = result.decode('windows-1252').encode('utf-8') result = client.parseDOM(result, "h2", attrs={"class": "forumtitle"}) for item in result: title = '' url = '' try: title = client.parseDOM( item, "a", attrs={"class": "title threadtitle_unread"})[0] except: title = client.parseDOM(item, "a", attrs={"class": "title"}) title = title[0] if title else client.parseDOM(item, "a")[0] #title = cleantitle.unicodetoascii(title) title = client.replaceHTMLCodes(title) if title == 'Naamkarann': title = 'Naamkaran' url = client.parseDOM(item, "a", ret="href") if not url: url = client.parseDOM(item, "a", attrs={"class": "title"}, ret="href") if type(url) is list and len(url) > 0: url = str(url[0]) if not 'Past Shows' in title: # name , title, poster, imdb, tvdb, year, poster, banner, fanart, duration shows.append({ 'name': title, 'title': title, 'url': url, 'poster': '0', 'banner': '0', 'fanart': '0', 'next': '0', 'year': '0', 'duration': '0', 'provider': 'desirulez' }) return shows except Exception as e: logger.error(e) return
def imdb_list(self, url): try: for i in re.findall('date\[(\d+)\]', url): url = url.replace( 'date[%s]' % i, (self.datetime - datetime.timedelta(days=int(i))).strftime('%Y-%m-%d')) result = client.request(url) result = result.replace('\n', '') result = result.decode('iso-8859-1').encode('utf-8') items = client.parseDOM( result, 'div', attrs={'class': 'lister-item mode-advanced'}) items += client.parseDOM(result, 'div', attrs={'class': 'list_item.+?'}) except: return try: next = client.parseDOM(result, 'a', ret='href', attrs={'class': 'lister-page-next.+?'}) if len(next) == 0: next = client.parseDOM(result, 'div', attrs={'class': 'pagination'})[0] next = zip(client.parseDOM(next, 'a', ret='href'), client.parseDOM(next, 'a')) next = [i[0] for i in next if 'Next' in i[1]] next = url.replace( urlparse.urlparse(url).query, urlparse.urlparse(next[0]).query) next = client.replaceHTMLCodes(next) next = next.encode('utf-8') except: next = '' for item in items: try: title = client.parseDOM(item, 'a')[1] title = client.replaceHTMLCodes(title) title = title.encode('utf-8') year = client.parseDOM(item, 'span', attrs={'class': 'lister-item-year.+?'}) year += client.parseDOM(item, 'span', attrs={'class': 'year_type'}) year = re.findall('(\d{4})', year[0])[0] year = year.encode('utf-8') if int(year) > int((self.datetime).strftime('%Y')): raise Exception() imdb = client.parseDOM(item, 'a', ret='href')[0] imdb = re.findall('(tt\d*)', imdb)[0] imdb = imdb.encode('utf-8') tmdb = '0' try: poster = client.parseDOM(item, 'img', ret='loadlate')[0] except: poster = '0' poster = re.sub( '(?:_SX\d+?|)(?:_SY\d+?|)(?:_UX\d+?|)_CR\d+?,\d+?,\d+?,\d*', '_SX500', poster) poster = client.replaceHTMLCodes(poster) poster = poster.encode('utf-8') try: genre = client.parseDOM(item, 'span', attrs={'class': 'genre'})[0] except: genre = '0' genre = ' / '.join([i.strip() for i in genre.split(',')]) if genre == '': genre = '0' genre = client.replaceHTMLCodes(genre) genre = genre.encode('utf-8') try: duration = re.findall('(\d+?) min(?:s|)', item)[-1] except: duration = '0' duration = duration.encode('utf-8') rating = '0' try: rating = client.parseDOM(item, 'span', attrs={'class': 'rating-rating'})[0] except: pass try: rating = client.parseDOM(rating, 'span', attrs={'class': 'value'})[0] except: rating = '0' try: rating = client.parseDOM(item, 'div', ret='data-value', attrs={'class': '.*?imdb-rating'})[0] except: pass if rating == '' or rating == '-': rating = '0' rating = client.replaceHTMLCodes(rating) rating = rating.encode('utf-8') try: votes = client.parseDOM(item, 'div', ret='title', attrs={'class': '.*?rating-list'})[0] except: votes = '0' try: votes = re.findall('\((.+?) vote(?:s|)\)', votes)[0] except: votes = '0' if votes == '': votes = '0' votes = client.replaceHTMLCodes(votes) votes = votes.encode('utf-8') try: mpaa = client.parseDOM(item, 'span', attrs={'class': 'certificate'})[0] except: mpaa = '0' if mpaa == '' or mpaa == 'NOT_RATED': mpaa = '0' mpaa = mpaa.replace('_', '-') mpaa = client.replaceHTMLCodes(mpaa) mpaa = mpaa.encode('utf-8') try: director = re.findall('Director(?:s|):(.+?)(?:\||</div>)', item)[0] except: director = '0' director = client.parseDOM(director, 'a') director = ' / '.join(director) if director == '': director = '0' director = client.replaceHTMLCodes(director) director = director.encode('utf-8') try: cast = re.findall('Stars(?:s|):(.+?)(?:\||</div>)', item)[0] except: cast = '0' cast = client.replaceHTMLCodes(cast) cast = cast.encode('utf-8') cast = client.parseDOM(cast, 'a') if cast == []: cast = '0' plot = '0' try: plot = client.parseDOM(item, 'p', attrs={'class': 'text-muted'})[0] except: pass try: plot = client.parseDOM(item, 'div', attrs={'class': 'item_description'})[0] except: pass plot = plot.rsplit('<span>', 1)[0].strip() if plot == '': plot = '0' plot = client.replaceHTMLCodes(plot) plot = plot.encode('utf-8') self.list.append({ 'title': title, 'originaltitle': title, 'year': year, 'premiered': '0', 'studio': '0', 'genre': genre, 'duration': duration, 'rating': rating, 'votes': votes, 'mpaa': mpaa, 'director': director, 'writer': '0', 'cast': cast, 'plot': plot, 'code': imdb, 'imdb': imdb, 'tvdb': '0', 'tmdb': tmdb, 'poster': poster, 'banner': '0', 'fanart': '0', 'next': next }) except: pass return self.list
def sources(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: srcs = [] if url == None: return srcs start_url = '%s/watch?v=%s_%s' %(self.base_link,search_id.replace(' ','_'),year) if not str(url).startswith('http'): data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] if 'tvshowtitle' in data: url = '%s/tv-show/%s/season/%01d/episode/%01d' % (self.base_link, cleantitle.geturl(title), int(data['season']), int(data['episode'])) else: url = '%s/movie/%s' % (self.base_link, cleantitle.geturl(title)) result = client.request(url, limit='5') result = client.parseDOM(result, 'title')[0] if '%TITLE%' in result: raise Exception() r = client.request(url, output='extended') else: url = urlparse.urljoin(self.base_link, url) r = client.request(url, output='extended') cookie = r[4] ; headers = r[3] ; result = r[0] try: auth = re.findall('__utmx=(.+)', cookie)[0].split(';')[0] except: auth = 'false' auth = 'Bearer %s' % urllib.unquote_plus(auth) headers['Authorization'] = auth headers['X-Requested-With'] = 'XMLHttpRequest' headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8' headers['Accept'] = 'application/json, text/javascript, */*; q=0.01' headers['Cookie'] = cookie headers['Referer'] = url u = '/ajax/tnembeds.php' self.base_link = client.request(self.base_link, output='geturl') u = urlparse.urljoin(self.base_link, u) action = 'getEpisodeEmb' if '/episode/' in url else 'getMovieEmb' elid = urllib.quote(base64.encodestring(str(int(time.time()))).strip()) token = re.findall("var\s+tok\s*=\s*'([^']+)", result)[0] idEl = re.findall('elid\s*=\s*"([^"]+)', result)[0] post = {'action': action, 'idEl': idEl, 'token': token, 'elid': elid} post = urllib.urlencode(post) r = client.request(u, post=post, headers=headers) r = str(json.loads(r)) r = client.parseDOM(r, 'iframe', ret='.+?') + client.parseDOM(r, 'IFRAME', ret='.+?') links = [] for i in r: try: links += [{'source': 'gvideo', 'quality': directstream.googletag(i)[0]['quality'], 'url': i, 'direct': True}] except: pass links += [{'source': 'openload.co', 'quality': 'SD', 'url': i, 'direct': False} for i in r if 'openload.co' in i] for i in links: srcs.append({'source': i['source'], 'quality': i['quality'], 'scraper': self.name, 'url': i['url'], 'direct': i['direct']}) logger.debug('SOURCES [%s]' % srcs, __name__) return srcs except: return srcs