def sources_packs(self, data, hostDict, search_series=False, total_seasons=None, bypass_filter=False): self.sources = [] if not data: return self.sources try: self.search_series = search_series self.total_seasons = total_seasons self.bypass_filter = bypass_filter self.title = data['tvshowtitle'].replace('&', 'and').replace('Special Victims Unit', 'SVU') self.aliases = data['aliases'] self.imdb = data['imdb'] self.year = data['year'] self.season_x = data['season'] self.season_xx = self.season_x.zfill(2) self.headers = cache.get(self._get_token_and_cookies, 1) query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', self.title) queries = [ quote_plus(query + ' S%s' % self.season_xx), quote_plus(query + ' Season %s' % self.season_x)] if search_series: queries = [ quote_plus(query + ' Season'), quote_plus(query + ' Complete')] threads = [] for url in queries: link = ('%s%s' % (self.base_link, self.search_link % url)).replace('+', '-') threads.append(workers.Thread(self.get_sources_packs, link, url.replace('+', '-'))) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('BITLORD') return self.sources
def tvshow(self, imdb, tvdb, tvshowtitle, aliases, year): try: if (self.user == '' or self.password == ''): return url = cache.get(self.ororo_tvcache, 120, self.user) if not url: return url = [i[0] for i in url if imdb == i[1]] if not url: return url = self.show_link % url[0] return url except: source_utils.scraper_error('ORORO') return
def sources(self, data, hostDict): sources = [] if not data: return sources try: if (self.user == '' or self.password == ''): return sources url = cache.get(self.ororo_tvcache, 120, self.user) if not url: return sources url = [i[0] for i in url if data['imdb'] == i[1]] if not url: return sources url = self.show_link % url[0] url = urljoin(self.base_link, url) r = client.request(url, headers=self.headers) r = jsloads(r)['episodes'] r = [(str(i['id']), str(i['season']), str(i['number']), str(i['airdate'])) for i in r] url = [ i for i in r if data['season'] == i[1] and data['episode'] == i[2] ] url += [i for i in r if data['premiered'] == i[3]] if not url: return sources url = self.episode_link % url[0][0] url = urljoin(self.base_link, url) url = client.request(url, headers=self.headers) if not url: return sources url = jsloads(url)['url'] # log_utils.log('url = %s' % url, __name__) name = re.sub(r'(.*?)\/video/file/(.*?)/', '', url).split('.smil')[0].split('-')[0] quality, info = source_utils.get_release_quality(name) info = ' | '.join(info) sources.append({ 'provider': 'ororo', 'source': 'direct', 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': True, 'debridonly': False, 'size': 0 }) # Ororo does not return a file size return sources except: source_utils.scraper_error('ORORO') return sources
def movie(self, imdb, title, aliases, year): # seems Ororo does not provide Movies try: if (self.user == '' or self.password == ''): return url = cache.get(self.ororo_moviecache, 60, self.user) if not url: return url = [i[0] for i in url if imdb == i[1]] if not url: return url = self.movie_link % url[0] return url except: source_utils.scraper_error('ORORO') return
def request(url, close=True, redirect=True, error=False, proxy=None, post=None, headers=None, mobile=False, XHR=False, limit=None, referer=None, cookie=None, compression=True, output='', timeout='30', verifySsl=True, flare=True, ignoreErrors=None, as_bytes=False): try: if not url: return None if url.startswith('//'): url = 'http:' + url try: url = py_tools.ensure_text(url, errors='ignore') except: pass if isinstance(post, dict): post = bytes(urlencode(post), encoding='utf-8') elif isinstance(post, str) and py_tools.isPY3: post = bytes(post, encoding='utf-8') handlers = [] if proxy is not None: handlers += [ urllib2.ProxyHandler({'http': '%s' % (proxy)}), urllib2.HTTPHandler ] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) if output == 'cookie' or output == 'extended' or close is not True: cookies = cookielib.LWPCookieJar() handlers += [ urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(cookies) ] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) if not verifySsl and version_info >= (2, 7, 12): try: import ssl ssl_context = ssl._create_unverified_context() handlers += [urllib2.HTTPSHandler(context=ssl_context)] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) except: from fenomscrapers.modules import log_utils log_utils.error() if verifySsl and ((2, 7, 8) < version_info < (2, 7, 12)): # try: # import ssl # ssl_context = ssl.create_default_context() # ssl_context.check_hostname = False # ssl_context.verify_mode = ssl.CERT_NONE # handlers += [urllib2.HTTPSHandler(context=ssl_context)] # opener = urllib2.build_opener(*handlers) # urllib2.install_opener(opener) # except: # from fenomscrapers.modules import log_utils # log_utils.error() try: import ssl try: import _ssl CERT_NONE = _ssl.CERT_NONE except Exception: CERT_NONE = ssl.CERT_NONE ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = CERT_NONE handlers += [urllib2.HTTPSHandler(context=ssl_context)] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) except: from fenomscrapers.modules import log_utils log_utils.error() try: headers.update(headers) except: headers = {} if 'User-Agent' in headers: pass elif mobile is not True: headers['User-Agent'] = cache.get(randomagent, 12) else: headers['User-Agent'] = 'Apple-iPhone/701.341' if 'Referer' in headers: pass elif referer is not None: headers['Referer'] = referer if 'Accept-Language' not in headers: headers['Accept-Language'] = 'en-US' if 'X-Requested-With' in headers: pass elif XHR: headers['X-Requested-With'] = 'XMLHttpRequest' if 'Cookie' in headers: pass elif cookie: headers['Cookie'] = cookie if 'Accept-Encoding' in headers: pass elif compression and limit is None: headers['Accept-Encoding'] = 'gzip' # if redirect is False: # class NoRedirection(urllib2.HTTPErrorProcessor): # def http_response(self, request, response): # return response # opener = urllib2.build_opener(NoRedirection) # urllib2.install_opener(opener) # try: del headers['Referer'] # except: pass if redirect is False: class NoRedirectHandler(urllib2.HTTPRedirectHandler): def http_error_302(self, reqst, fp, code, msg, head): infourl = addinfourl(fp, head, reqst.get_full_url()) infourl.status = code infourl.code = code return infourl http_error_300 = http_error_302 http_error_301 = http_error_302 http_error_303 = http_error_302 http_error_307 = http_error_302 opener = urllib2.build_opener(NoRedirectHandler()) urllib2.install_opener(opener) try: del headers['Referer'] except: pass req = urllib2.Request(url, data=post) _add_request_header(req, headers) try: response = urllib2.urlopen(req, timeout=int(timeout)) except HTTPError as error_response: # if HTTPError, using "as response" will be reset after entire Exception code runs and throws error around line 247 as "local variable 'response' referenced before assignment", re-assign it response = error_response try: ignore = ignoreErrors and (int(response.code) == ignoreErrors or int( response.code) in ignoreErrors) except: ignore = False if not ignore: if response.code in [ 301, 307, 308, 503, 403 ]: # 403:Forbidden added 3/3/21 for cloudflare, fails on bad User-Agent cf_result = response.read(5242880) try: encoding = response.headers["Content-Encoding"] except: encoding = None if encoding == 'gzip': cf_result = gzip.GzipFile( fileobj=StringIO(cf_result)).read() if flare and 'cloudflare' in str(response.info()).lower(): from fenomscrapers.modules import log_utils log_utils.log( 'client module calling cfscrape: url=%s' % url, level=log_utils.LOGDEBUG) try: from fenomscrapers.modules import cfscrape if isinstance(post, dict): data = post else: try: data = parse_qs(post) except: data = None scraper = cfscrape.CloudScraper() if response.code == 403: # possible bad User-Agent in headers, let cfscrape assign response = scraper.request( method='GET' if post is None else 'POST', url=url, data=data, timeout=int(timeout)) else: response = scraper.request( method='GET' if post is None else 'POST', url=url, headers=headers, data=data, timeout=int(timeout)) result = response.content flare = 'cloudflare' # Used below try: cookies = response.request._cookies except: log_utils.error() if response.status_code == 403: # if cfscrape server still responds with 403 log_utils.log( 'cfscrape-Error url=(%s): %s' % (url, 'HTTP Error 403: Forbidden'), __name__, level=log_utils.LOGDEBUG) return None except: log_utils.error() elif 'cf-browser-verification' in cf_result: netloc = '%s://%s' % (urlparse(url).scheme, urlparse(url).netloc) ua = headers['User-Agent'] cf = cache.get(cfcookie().get, 168, netloc, ua, timeout) headers['Cookie'] = cf req = urllib2.Request(url, data=post) _add_request_header(req, headers) response = urllib2.urlopen(req, timeout=int(timeout)) else: if error is False: from fenomscrapers.modules import log_utils log_utils.error('Request-Error url=(%s)' % url) return None else: if error is False: from fenomscrapers.modules import log_utils log_utils.error('Request-Error url=(%s)' % url) return None elif error is True and response.code in [ 401, 404, 405 ]: # no point in continuing after this exception runs with these response.code's try: response_headers = dict( [(item[0].title(), item[1]) for item in list(response.info().items())] ) # behaves differently 18 to 19. 18 I had 3 "Set-Cookie:" it combined all 3 values into 1 key. In 19 only the last keys value was present. except: from fenomscrapers.modules import log_utils log_utils.error() response_headers = response.headers return (str(response), str(response.code), response_headers) if output == 'cookie': try: result = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: result = cf except: pass if close is True: response.close() return result elif output == 'geturl': result = response.geturl() if close is True: response.close() return result elif output == 'headers': result = response.headers if close is True: response.close() return result elif output == 'chunk': try: content = int(response.headers['Content-Length']) except: content = (2049 * 1024) if content < (2048 * 1024): return try: result = response.read(16 * 1024) except: result = response # testing if close is True: response.close() return result elif output == 'file_size': try: content = int(response.headers['Content-Length']) except: content = '0' if close is True: response.close() return content if flare != 'cloudflare': if limit == '0': result = response.read(224 * 1024) elif limit is not None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.headers["Content-Encoding"] except: encoding = None if encoding == 'gzip': result = gzip.GzipFile(fileobj=StringIO(result)).read() if not as_bytes: result = py_tools.ensure_text(result, errors='ignore') if 'sucuri_cloudproxy_js' in result: su = sucuri().get(result) headers['Cookie'] = su req = urllib2.Request(url, data=post) _add_request_header(req, headers) response = urllib2.urlopen(req, timeout=int(timeout)) if limit == '0': result = response.read(224 * 1024) elif limit is not None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.headers["Content-Encoding"] except: encoding = None if encoding == 'gzip': result = gzip.GzipFile(fileobj=StringIO(result)).read() if 'Blazingfast.io' in result and 'xhr.open' in result: netloc = '%s://%s' % (urlparse(url).scheme, urlparse(url).netloc) ua = headers['User-Agent'] headers['Cookie'] = cache.get(bfcookie().get, 168, netloc, ua, timeout) result = _basic_request(url, headers=headers, post=post, timeout=timeout, limit=limit) if output == 'extended': try: response_headers = dict( [(item[0].title(), item[1]) for item in list(response.info().items())] ) # behaves differently 18 to 19. 18 I had 3 "Set-Cookie:" it combined all 3 values into 1 key. In 19 only the last keys value was present. except: from fenomscrapers.modules import log_utils log_utils.error() response_headers = response.headers try: response_code = str(response.code) except: response_code = str(response.status_code ) # object from CFScrape Requests object. try: cookie = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: cookie = cf except: pass if close is True: response.close() return (result, response_code, response_headers, headers, cookie) else: if close is True: response.close() return result except: from fenomscrapers.modules import log_utils log_utils.error('Request-Error url=(%s)' % url) return None
def sources(self, data, hostDict): sources = [] if not data: return sources try: self.scraper = cfscrape.create_scraper() self.key = cache.get(self._get_token, 0.2) # 800 secs token is valid for title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year query = '%s %s' % (title, hdlr) query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', query) if 'tvshowtitle' in data: search_link = self.tvshowsearch.format(self.key, data['imdb'], hdlr) else: search_link = self.msearch.format(self.key, data['imdb']) sleep(2.1) rjson = self.scraper.get(search_link).content if not rjson or 'torrent_results' not in str(rjson): return sources files = jsloads(rjson)['torrent_results'] except: source_utils.scraper_error('TORRENTAPI') return sources for file in files: try: url = file["download"].split('&tr')[0] hash = re.search(r'btih:(.*?)&', url, re.I).group(1) name = source_utils.clean_name(unquote_plus(file["title"])) if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name( name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue if not episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [ r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?' ] if any( re.search(item, name.lower()) for item in ep_strings): continue try: seeders = int(file["seeders"]) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: dsize, isize = source_utils.convert_size(file["size"], to='GB') info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'torrentapi', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('TORRENTAPI') return sources
def sources_packs(self, data, hostDict, search_series=False, total_seasons=None, bypass_filter=False): sources = [] if not data: return sources if search_series: # torrentapi does not have showPacks return sources try: self.scraper = cfscrape.create_scraper() self.key = cache.get(self._get_token, 0.2) # 800 secs token is valid for self.bypass_filter = bypass_filter self.title = data['tvshowtitle'].replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.aliases = data['aliases'] self.year = data['year'] self.season_x = data['season'] self.season_xx = self.season_x.zfill(2) search_link = self.tvshowsearch.format(self.key, data['imdb'], 'S%s' % self.season_xx) # log_utils.log('search_link = %s' % str(search_link)) sleep(2.1) rjson = self.scraper.get(search_link).content if not rjson or 'torrent_results' not in str(rjson): return sources files = jsloads(rjson)['torrent_results'] except: source_utils.scraper_error('TORRENTAPI') return sources for file in files: try: url = file["download"].split('&tr')[0] hash = re.search(r'btih:(.*?)&', url, re.I).group(1) name = source_utils.clean_name(unquote_plus(file["title"])) if not self.bypass_filter: if not source_utils.filter_season_pack( self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' name_info = source_utils.info_from_name(name, self.title, self.year, season=self.season_x, pack=package) if source_utils.remove_lang(name_info): continue try: seeders = int(file["seeders"]) if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality( name_info, url) try: dsize, isize = source_utils.convert_size(file["size"], to='GB') info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'torrentapi', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package }) except: source_utils.scraper_error('TORRENTAPI') return sources
def base_link(self): if not self._base_link: self._base_link = cache.get(self.__get_base_url, 120, 'https://%s' % self.domains[0]) return self._base_link
def sources(self, data, hostDict): sources = [] if not data: return sources try: title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else year query = '%s %s' % (title, hdlr) query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', query) url = '%s%s' % (self.base_link, self.search_link % quote_plus(query)) # log_utils.log('url = %s' % url, __name__, log_utils.LOGDEBUG) api_url = '%s%s' % (self.base_link, self.api_search_link) headers = cache.get(self._get_token_and_cookies, 1) if not headers: return sources headers.update({'Referer': url}) query_data = { 'query': query, 'offset': 0, 'limit': 99, 'filters[field]': 'seeds', 'filters[sort]': 'desc', 'filters[time]': 4, 'filters[category]': 3 if 'tvshowtitle' not in data else 4, 'filters[adult]': False, 'filters[risky]': False} rjson = client.request(api_url, post=query_data, headers=headers, timeout='5') if not rjson: return sources files = jsloads(rjson) error = files.get('error') if error: return sources except: source_utils.scraper_error('BITLORD') return sources for file in files.get('content'): try: name = source_utils.clean_name(file.get('name')) if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name(name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue url = unquote_plus(file.get('magnet')).replace('&', '&').replace(' ', '.') url = re.sub(r'(&tr=.+)&dn=', '&dn=', url) # some links on bitlord &tr= before &dn= url = url.split('&tr=')[0].split('&xl=')[0] url = source_utils.strip_non_ascii_and_unprintable(url) hash = re.search(r'btih:(.*?)&', url, re.I).group(1) if not episode_title: #filter for eps returned in movie query (rare but movie and show exists for Run in 2020) ep_strings = [r'[.-]s\d{2}e\d{2}([.-]?)', r'[.-]s\d{2}([.-]?)', r'[.-]season[.-]?\d{1,2}[.-]?'] if any(re.search(item, name.lower()) for item in ep_strings): continue try: seeders = file.get('seeds') if self.min_seeders > seeders: continue except: seeders = 0 quality, info = source_utils.get_release_quality(name_info, url) try: size = file.get('size') size = str(size) + ' GB' if len(str(size)) <= 2 else str(size) + ' MB' # bitlord size is all over the place between MB and GB dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({'provider': 'bitlord', 'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('BITLORD') return sources