def normalize(title): try: return ''.join(c for c in unicodedata.normalize( 'NFKD', py_tools.ensure_text(py_tools.ensure_str(title))) if unicodedata.category(c) != 'Mn') except: error() return title
def normalize(title): try: title = ''.join(c for c in unicodedata.normalize( 'NFKD', py_tools.ensure_text(py_tools.ensure_str(title))) if unicodedata.category(c) != 'Mn') return str(title) except: from fenomscrapers.modules import log_utils log_utils.error() return title
def log(msg, caller=None, level=LOGNOTICE): debug_enabled = getSetting('debug.enabled') == 'true' if not debug_enabled: return debug_location = getSetting('debug.location') if isinstance(msg, int): msg = lang(msg) # for strings.po translations try: if py_tools.isPY3: if not msg.isprintable( ): # ex. "\n" is not a printable character so returns False on those sort of cases msg = '%s (NORMALIZED by log_utils.log())' % normalize(msg) if isinstance(msg, py_tools.binary_type): msg = '%s (ENCODED by log_utils.log())' % (py_tools.ensure_str( msg, errors='replace')) else: if not is_printable( msg ): # if not all(c in printable for c in msg): # isprintable() not available in py2 msg = normalize(msg) if isinstance(msg, py_tools.binary_type): msg = '%s (ENCODED by log_utils.log())' % ( py_tools.ensure_text(msg)) if caller == 'scraper_error': pass elif caller is not None and level != LOGERROR: func = inspect.currentframe().f_back.f_code line_number = inspect.currentframe().f_back.f_lineno caller = "%s.%s()" % (caller, func.co_name) msg = 'From func name: %s Line # :%s\n msg : %s' % ( caller, line_number, msg) elif caller is not None and level == LOGERROR: msg = 'From func name: %s.%s() Line # :%s\n msg : %s' % ( caller[0], caller[1], caller[2], msg) if debug_location == '1': log_file = joinPath(LOGPATH, 'fenomscrapers.log') if not existsPath(log_file): f = open(log_file, 'w') f.close() with open(log_file, 'a', encoding='utf-8') as f: #with auto cleans up and closes line = '[%s %s] %s: %s' % ( datetime.now().date(), str(datetime.now().time())[:8], DEBUGPREFIX % debug_list[level], msg) f.write(line.rstrip('\r\n') + '\n') # f.writelines([line1, line2]) ## maybe an option for the 2 lines without using "\n" else: xbmc.log('%s: %s' % (DEBUGPREFIX % debug_list[level], msg, level)) except Exception as e: import traceback traceback.print_exc() xbmc.log( '[ script.module.fenomonscrapers ] log_utils.log() Logging Failure: %s' % (e), LOGERROR)
def sources(self, data, hostDict): sources = [] if not data: return sources try: title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year query = '%s %s' % (title, hdlr) query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = '%s%s' % (self.base_link, self.search_link % quote_plus(query)) # log_utils.log('url = %s' % url, __name__, log_utils.LOGDEBUG) r = client.request(url, timeout='5') if not r or 'Error 404' in r: return sources r = client.parseDOM(r, 'div', attrs={'id': 'content'}) r1 = client.parseDOM(r, 'h2') posts = zip(client.parseDOM(r1, 'a', ret='href'), client.parseDOM(r1, 'a')) except: source_utils.scraper_error('MYVIDEOLINK') return sources items = [] for post in posts: try: name = source_utils.strip_non_ascii_and_unprintable(post[1]) if '<' in name: name = re.sub(r'<.*?>', '', name) name = client.replaceHTMLCodes(name) name = source_utils.clean_name(name) if 'tvshowtitle' in data: if not source_utils.check_title(title, aliases, name, hdlr, year): if not source_utils.check_title( title, aliases, name, 'S%02d' % int(data['season']), year): if not source_utils.check_title( title, aliases, name, 'Season.%d' % int(data['season']), year): if not source_utils.check_title( title, aliases, name, 'S%d' % int(data['season']), year): continue else: if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name( name, title, year, hdlr, episode_title) link = post[0] results = client.request(link, timeout='5') results = client.parseDOM(results, 'div', attrs={'class': 'entry-content cf'})[0] if 'tvshowtitle' in data: isSeasonList = False if 'Season' in name or 'S%02d' % int( data['season']) in name: isSeasonList = True results = re.sub(r'\n', '', results) results = re.sub(r'\t', '', results).replace('> <', '><') test = re.findall( r'<p><b>(.*?)</ul>', results, re.DOTALL ) # parsing this site for episodes is a bitch, f**k it this is close as I'm doing for x in test: test2 = re.search(r'(.*?)</b>', x).group(1) if hdlr in test2: if isSeasonList: name = re.sub(r'\.Season\.\d+', '.%s.' % test2.replace(' ', '.'), name) name = re.sub(r'\.S\d+', '.%s' % test2.replace(' ', '.'), name) else: name = test2 links = client.parseDOM(x, 'a', ret='href') break else: try: test3 = re.search(r'<p><b>(.*?)</b></p>', x).group(1) except: continue if hdlr in test3: if isSeasonList: name = re.sub( r'\.Season\.\d+', '.%s.' % test3.replace(' ', '.'), name) name = re.sub( r'\.S\d+', '.%s' % test3.replace(' ', '.'), name) else: name = test3 links = client.parseDOM(x, 'a', ret='href') break else: links = client.parseDOM(results, 'a', attrs={'class': 'autohyperlink'}, ret='href') for link in links: try: url = py_tools.ensure_text(client.replaceHTMLCodes( str(link)), errors='replace') if url.endswith(('.rar', '.zip', '.iso', '.part', '.png', '.jpg', '.bmp', '.gif')): continue if url in str(sources): continue valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue quality, info = source_utils.get_release_quality( name_info, url) try: size = re.search( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', results).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'myvideolink', 'source': host, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('MYVIDEOLINK') except: source_utils.scraper_error('MYVIDEOLINK') return sources
def request(url, close=True, redirect=True, error=False, proxy=None, post=None, headers=None, mobile=False, XHR=False, limit=None, referer=None, cookie=None, compression=True, output='', timeout='30', verifySsl=True, flare=True, ignoreErrors=None, as_bytes=False): try: if not url: return None if url.startswith('//'): url = 'http:' + url try: url = py_tools.ensure_text(url, errors='ignore') except: pass if isinstance(post, dict): post = bytes(urlencode(post), encoding='utf-8') elif isinstance(post, str) and py_tools.isPY3: post = bytes(post, encoding='utf-8') handlers = [] if proxy is not None: handlers += [ urllib2.ProxyHandler({'http': '%s' % (proxy)}), urllib2.HTTPHandler ] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) if output == 'cookie' or output == 'extended' or close is not True: cookies = cookielib.LWPCookieJar() handlers += [ urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(cookies) ] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) if not verifySsl and version_info >= (2, 7, 12): try: import ssl ssl_context = ssl._create_unverified_context() handlers += [urllib2.HTTPSHandler(context=ssl_context)] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) except: from fenomscrapers.modules import log_utils log_utils.error() if verifySsl and ((2, 7, 8) < version_info < (2, 7, 12)): # try: # import ssl # ssl_context = ssl.create_default_context() # ssl_context.check_hostname = False # ssl_context.verify_mode = ssl.CERT_NONE # handlers += [urllib2.HTTPSHandler(context=ssl_context)] # opener = urllib2.build_opener(*handlers) # urllib2.install_opener(opener) # except: # from fenomscrapers.modules import log_utils # log_utils.error() try: import ssl try: import _ssl CERT_NONE = _ssl.CERT_NONE except Exception: CERT_NONE = ssl.CERT_NONE ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = CERT_NONE handlers += [urllib2.HTTPSHandler(context=ssl_context)] opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) except: from fenomscrapers.modules import log_utils log_utils.error() try: headers.update(headers) except: headers = {} if 'User-Agent' in headers: pass elif mobile is not True: headers['User-Agent'] = cache.get(randomagent, 12) else: headers['User-Agent'] = 'Apple-iPhone/701.341' if 'Referer' in headers: pass elif referer is not None: headers['Referer'] = referer if 'Accept-Language' not in headers: headers['Accept-Language'] = 'en-US' if 'X-Requested-With' in headers: pass elif XHR: headers['X-Requested-With'] = 'XMLHttpRequest' if 'Cookie' in headers: pass elif cookie: headers['Cookie'] = cookie if 'Accept-Encoding' in headers: pass elif compression and limit is None: headers['Accept-Encoding'] = 'gzip' # if redirect is False: # class NoRedirection(urllib2.HTTPErrorProcessor): # def http_response(self, request, response): # return response # opener = urllib2.build_opener(NoRedirection) # urllib2.install_opener(opener) # try: del headers['Referer'] # except: pass if redirect is False: class NoRedirectHandler(urllib2.HTTPRedirectHandler): def http_error_302(self, reqst, fp, code, msg, head): infourl = addinfourl(fp, head, reqst.get_full_url()) infourl.status = code infourl.code = code return infourl http_error_300 = http_error_302 http_error_301 = http_error_302 http_error_303 = http_error_302 http_error_307 = http_error_302 opener = urllib2.build_opener(NoRedirectHandler()) urllib2.install_opener(opener) try: del headers['Referer'] except: pass req = urllib2.Request(url, data=post) _add_request_header(req, headers) try: response = urllib2.urlopen(req, timeout=int(timeout)) except HTTPError as error_response: # if HTTPError, using "as response" will be reset after entire Exception code runs and throws error around line 247 as "local variable 'response' referenced before assignment", re-assign it response = error_response try: ignore = ignoreErrors and (int(response.code) == ignoreErrors or int( response.code) in ignoreErrors) except: ignore = False if not ignore: if response.code in [ 301, 307, 308, 503, 403 ]: # 403:Forbidden added 3/3/21 for cloudflare, fails on bad User-Agent cf_result = response.read(5242880) try: encoding = response.headers["Content-Encoding"] except: encoding = None if encoding == 'gzip': cf_result = gzip.GzipFile( fileobj=StringIO(cf_result)).read() if flare and 'cloudflare' in str(response.info()).lower(): from fenomscrapers.modules import log_utils log_utils.log( 'client module calling cfscrape: url=%s' % url, level=log_utils.LOGDEBUG) try: from fenomscrapers.modules import cfscrape if isinstance(post, dict): data = post else: try: data = parse_qs(post) except: data = None scraper = cfscrape.CloudScraper() if response.code == 403: # possible bad User-Agent in headers, let cfscrape assign response = scraper.request( method='GET' if post is None else 'POST', url=url, data=data, timeout=int(timeout)) else: response = scraper.request( method='GET' if post is None else 'POST', url=url, headers=headers, data=data, timeout=int(timeout)) result = response.content flare = 'cloudflare' # Used below try: cookies = response.request._cookies except: log_utils.error() if response.status_code == 403: # if cfscrape server still responds with 403 log_utils.log( 'cfscrape-Error url=(%s): %s' % (url, 'HTTP Error 403: Forbidden'), __name__, level=log_utils.LOGDEBUG) return None except: log_utils.error() elif 'cf-browser-verification' in cf_result: netloc = '%s://%s' % (urlparse(url).scheme, urlparse(url).netloc) ua = headers['User-Agent'] cf = cache.get(cfcookie().get, 168, netloc, ua, timeout) headers['Cookie'] = cf req = urllib2.Request(url, data=post) _add_request_header(req, headers) response = urllib2.urlopen(req, timeout=int(timeout)) else: if error is False: from fenomscrapers.modules import log_utils log_utils.error('Request-Error url=(%s)' % url) return None else: if error is False: from fenomscrapers.modules import log_utils log_utils.error('Request-Error url=(%s)' % url) return None elif error is True and response.code in [ 401, 404, 405 ]: # no point in continuing after this exception runs with these response.code's try: response_headers = dict( [(item[0].title(), item[1]) for item in list(response.info().items())] ) # behaves differently 18 to 19. 18 I had 3 "Set-Cookie:" it combined all 3 values into 1 key. In 19 only the last keys value was present. except: from fenomscrapers.modules import log_utils log_utils.error() response_headers = response.headers return (str(response), str(response.code), response_headers) if output == 'cookie': try: result = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: result = cf except: pass if close is True: response.close() return result elif output == 'geturl': result = response.geturl() if close is True: response.close() return result elif output == 'headers': result = response.headers if close is True: response.close() return result elif output == 'chunk': try: content = int(response.headers['Content-Length']) except: content = (2049 * 1024) if content < (2048 * 1024): return try: result = response.read(16 * 1024) except: result = response # testing if close is True: response.close() return result elif output == 'file_size': try: content = int(response.headers['Content-Length']) except: content = '0' if close is True: response.close() return content if flare != 'cloudflare': if limit == '0': result = response.read(224 * 1024) elif limit is not None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.headers["Content-Encoding"] except: encoding = None if encoding == 'gzip': result = gzip.GzipFile(fileobj=StringIO(result)).read() if not as_bytes: result = py_tools.ensure_text(result, errors='ignore') if 'sucuri_cloudproxy_js' in result: su = sucuri().get(result) headers['Cookie'] = su req = urllib2.Request(url, data=post) _add_request_header(req, headers) response = urllib2.urlopen(req, timeout=int(timeout)) if limit == '0': result = response.read(224 * 1024) elif limit is not None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.headers["Content-Encoding"] except: encoding = None if encoding == 'gzip': result = gzip.GzipFile(fileobj=StringIO(result)).read() if 'Blazingfast.io' in result and 'xhr.open' in result: netloc = '%s://%s' % (urlparse(url).scheme, urlparse(url).netloc) ua = headers['User-Agent'] headers['Cookie'] = cache.get(bfcookie().get, 168, netloc, ua, timeout) result = _basic_request(url, headers=headers, post=post, timeout=timeout, limit=limit) if output == 'extended': try: response_headers = dict( [(item[0].title(), item[1]) for item in list(response.info().items())] ) # behaves differently 18 to 19. 18 I had 3 "Set-Cookie:" it combined all 3 values into 1 key. In 19 only the last keys value was present. except: from fenomscrapers.modules import log_utils log_utils.error() response_headers = response.headers try: response_code = str(response.code) except: response_code = str(response.status_code ) # object from CFScrape Requests object. try: cookie = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: cookie = cf except: pass if close is True: response.close() return (result, response_code, response_headers, headers, cookie) else: if close is True: response.close() return result except: from fenomscrapers.modules import log_utils log_utils.error('Request-Error url=(%s)' % url) return None
def get_sources(self, name, url): try: # r = self.scraper.get(url, headers=self.headers).content r = py_tools.ensure_str(self.scraper.get( url, headers=self.headers).content, errors='replace') name = client.replaceHTMLCodes(name) if name.startswith('['): name = name.split(']')[1] name = name.strip().replace(' ', '.') name_info = source_utils.info_from_name(name, self.title, self.year, self.hdlr, self.episode_title) if source_utils.remove_lang(name_info): return self.sources l = dom_parser.parse_dom(r, 'pre', {'class': 'links'}) if l == []: return s = '' for i in l: s += i.content urls = re.findall( r'''((?:http|ftp|https)://[\w_-]+(?:(?:\.[\w_-]+)+)[\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])''', i.content, flags=re.M | re.S) urls = [ i for i in urls if not i.endswith(('.rar', '.zip', '.iso', '.idx', '.sub', '.srt')) ] for link in urls: url = py_tools.ensure_text(client.replaceHTMLCodes(str(link)), errors='replace') if url in str(self.sources): continue valid, host = source_utils.is_host_valid(url, self.hostDict) if not valid: continue quality, info = source_utils.get_release_quality(name, url) try: size = re.search( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', name).group(0) dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) self.sources.append({ 'provider': 'rapidmoviez', 'source': host, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('RAPIDMOVIEZ')
def sources(self, url, hostDict): sources = [] if not url: return sources try: data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year query = '%s %s' % (title, hdlr) query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) if not r: return sources posts = client.parseDOM(r, 'h2') urls = [] for item in posts: if not item.startswith('<a href'): continue try: name = client.parseDOM(item, "a")[0] if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name( name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue quality, info = source_utils.get_release_quality( name_info, item[0]) try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', item)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) item = client.parseDOM(item, 'a', ret='href') url = item links = self.links(url) if links is None: continue urls += [(i, name, name_info, quality, info, dsize) for i in links] except: source_utils.scraper_error('300MBFILMS') for item in urls: if 'earn-money' in item[0]: continue url = py_tools.ensure_text(client.replaceHTMLCodes(item[0]), errors='replace') valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue sources.append({ 'provider': '300mbfilms', 'source': host, 'name': item[1], 'name_info': item[2], 'quality': item[3], 'language': 'en', 'url': url, 'info': item[4], 'direct': False, 'debridonly': True, 'size': item[5] }) return sources except: source_utils.scraper_error('300MBFILMS') return sources
def sources(self, url, hostDict): sources = [] if not url: return sources try: scraper = cfscrape.create_scraper() data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year query = '%s %s' % (title, hdlr) query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) # r = scraper.get(url).content r = py_tools.ensure_str(scraper.get(url).content, errors='replace') posts = client.parseDOM(r, 'div', attrs={'class': 'post'}) if not posts: return sources except: source_utils.scraper_error('SCENERLS') return sources items = [] for post in posts: try: content = client.parseDOM(post, "div", attrs={"class": "postContent"}) size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', content[0])[0] u = client.parseDOM(content, "h2") u = client.parseDOM(u, 'a', ret='href') u = [(i.strip('/').split('/')[-1], i, size) for i in u] items += u except: source_utils.scraper_error('SCENERLS') return sources for item in items: try: name = item[0] name = client.replaceHTMLCodes(name) if not source_utils.check_title(title, aliases, name, hdlr, year): continue name_info = source_utils.info_from_name( name, title, year, hdlr, episode_title) if source_utils.remove_lang(name_info): continue # check year for reboot/remake show issues if year is available-crap shoot # if 'tvshowtitle' in data: # if re.search(r'([1-3][0-9]{3})', name): # if not any(value in name for value in [year, str(int(year)+1), str(int(year)-1)]): # continue url = py_tools.ensure_text(client.replaceHTMLCodes(str( item[1])), errors='replace') if url in str(sources): continue valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue quality, info = source_utils.get_release_quality( name_info, url) try: dsize, isize = source_utils._size(item[2]) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'scenerls', 'source': host, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) except: source_utils.scraper_error('SCENERLS') return sources
def sources(self, url, hostDict): sources = [] if not url: return sources try: scraper = cfscrape.create_scraper(delay=5) data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') aliases = data['aliases'] episode_title = data['title'] if 'tvshowtitle' in data else None year = data['year'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else year isSeasonQuery = False query = '%s %s' % (title, hdlr) query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) # query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', query) query = re.sub(r'\s', '-', query) if int(year) >= 2021: self.base_link = self.base_new else: self.base_link = self.base_old url = urljoin(self.base_link, query) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) # r = scraper.get(url).content r = py_tools.ensure_str(scraper.get(url).content, errors='replace') if not r or 'nothing was found' in r: if 'tvshowtitle' in data: season = re.search(r'S(.*?)E', hdlr).group(1) query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', title) # query = re.sub(r'[^A-Za-z0-9\s\.-]+', '', title) query = re.sub(r'\s', '-', query) query = query + "-S" + season url = urljoin(self.base_link, query) # r = scraper.get(url).content r = py_tools.ensure_str(scraper.get(url).content, errors='replace') isSeasonQuery = True else: return sources if not r or 'nothing was found' in r: return sources # may need to add fallback to use self.search_link if nothing found posts = client.parseDOM(r, "div", attrs={"class": "content"}) if not posts: return sources except: source_utils.scraper_error('RLSBB') return sources release_title = re.sub(r'[^A-Za-z0-9\s\.-]+', '', title).replace(' ', '.') items = [] count = 0 for post in posts: if count >= 300: break # to limit large link list and slow scrape time try: post_titles = re.findall( r'(?:.*>|>\sRelease Name.*|\s)(%s.*?)<' % release_title, post, re.I ) #parse all matching release_titles in each post(content) group items = [] if len(post_titles) > 1: index = 0 for name in post_titles: start = post_titles[index].replace('[', '\\[').replace( '(', '\\(').replace(')', '\\)').replace( '+', '\\+').replace(' \\ ', ' \\\\ ') end = (post_titles[index + 1].replace( '[', '\\[').replace('(', '\\(').replace( ')', '\\)').replace('+', '\\+')).replace( ' \\ ', ' \\\\ ' ) if index + 1 < len(post_titles) else '' try: container = re.findall( r'(?:%s)([\S\s]+)(?:%s)' % (start, end), post, re.I )[0] #parse all data between release_titles in multi post(content) group except: source_utils.scraper_error('RLSBB') continue try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', container)[0].replace(',', '.') except: size = '0' container = client.parseDOM(container, 'a', ret='href') items.append((name, size, container)) index += 1 elif len(post_titles) == 1: name = post_titles[0] container = client.parseDOM( post, 'a', ret='href' ) #parse all links in a single post(content) group try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', post)[0].replace(',', '.') except: size = '0' items.append((name, size, container)) else: continue for group_name, size, links in items: for i in links: name = group_name # if isSeasonQuery and hdlr not in name.upper(): # name = i.rsplit("/", 1)[-1] # if hdlr not in name.upper(): continue if hdlr not in name.upper(): name = i.rsplit("/", 1)[-1] if hdlr not in name.upper(): continue name = client.replaceHTMLCodes(name) name = source_utils.strip_non_ascii_and_unprintable( name) name_info = source_utils.info_from_name( name, title, year, hdlr, episode_title) url = py_tools.ensure_text(client.replaceHTMLCodes( str(i)), errors='replace') if url in str(sources): continue if url.endswith(('.rar', '.zip', '.iso', '.part', '.png', '.jpg', '.bmp', '.gif')): continue valid, host = source_utils.is_host_valid(url, hostDict) if not valid: continue quality, info = source_utils.get_release_quality( name, url) try: if size == '0': try: size = re.findall( r'((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GB|GiB|Gb|MB|MiB|Mb))', name)[0].replace(',', '.') except: raise Exception() dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 info = ' | '.join(info) sources.append({ 'provider': 'rlsbb', 'source': host, 'name': name, 'name_info': name_info, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize }) count += 1 except: source_utils.scraper_error('RLSBB') return sources
def sources(self, url, hostDict): sources = [] if not url: return sources try: data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) content_type = 'episode' if 'tvshowtitle' in data else 'movie' years = (data['year'], str(int(data['year']) + 1), str(int(data['year']) - 1)) if content_type == 'movie': title = cleantitle.get_simple(data['title']).lower() ids = [data['imdb']] r = control.jsonrpc( '{"jsonrpc": "2.0", "method": "VideoLibrary.GetMovies", "params": {"filter":{"or": [{"field": "year", "operator": "is", "value": "%s"}, {"field": "year", "operator": "is", "value": "%s"}, {"field": "year", "operator": "is", "value": "%s"}]}, "properties": ["imdbnumber", "title", "originaltitle", "file"]}, "id": 1}' % years) r = py_tools.ensure_text(r, errors='replace') if 'movies' not in r: return sources r = jsloads(r)['result']['movies'] r = [ i for i in r if str(i['imdbnumber']) in ids or title in [ cleantitle.get_simple(i['title']), cleantitle.get_simple(i['originaltitle']) ] ] try: r = [ i for i in r if not i['file'].encode('utf-8').endswith('.strm') ] except: r = [i for i in r if not i['file'].endswith('.strm')] if not r: return sources r = r[0] r = control.jsonrpc( '{"jsonrpc": "2.0", "method": "VideoLibrary.GetMovieDetails", "params": {"properties": ["streamdetails", "file"], "movieid": %s }, "id": 1}' % str(r['movieid'])) r = py_tools.ensure_text(r, errors='replace') r = jsloads(r)['result']['moviedetails'] elif content_type == 'episode': title = cleantitle.get_simple(data['tvshowtitle']).lower() season, episode = data['season'], data['episode'] r = control.jsonrpc( '{"jsonrpc": "2.0", "method": "VideoLibrary.GetTVShows", "params": {"filter":{"or": [{"field": "year", "operator": "is", "value": "%s"}, {"field": "year", "operator": "is", "value": "%s"}, {"field": "year", "operator": "is", "value": "%s"}]}, "properties": ["imdbnumber", "title"]}, "id": 1}' % years) r = py_tools.ensure_text(r, errors='replace') if 'tvshows' not in r: return sources r = jsloads(r)['result']['tvshows'] r = [ i for i in r if title in (cleantitle.get_simple(i['title']).lower( ) if not ' (' in i['title'] else cleantitle. get_simple(i['title']).split(' (')[0]) ] if not r: return sources else: r = r[0] r = control.jsonrpc( '{"jsonrpc": "2.0", "method": "VideoLibrary.GetEpisodes", "params": {"filter":{"and": [{"field": "season", "operator": "is", "value": "%s"}, {"field": "episode", "operator": "is", "value": "%s"}]}, "properties": ["file"], "tvshowid": %s }, "id": 1}' % (str(season), str(episode), str(r['tvshowid']))) r = py_tools.ensure_text(r, errors='replace') r = jsloads(r)['result']['episodes'] if not r: return sources try: r = [ i for i in r if not i['file'].encode('utf-8').endswith('.strm') ] except: r = [i for i in r if not i['file'].endswith('.strm')] if not r: return sources r = r[0] r = control.jsonrpc( '{"jsonrpc": "2.0", "method": "VideoLibrary.GetEpisodeDetails", "params": {"properties": ["streamdetails", "file"], "episodeid": %s }, "id": 1}' % str(r['episodeid'])) r = py_tools.ensure_text(r, errors='replace') r = jsloads(r)['result']['episodedetails'] url = py_tools.ensure_text(r['file'], errors='replace') try: quality = int(r['streamdetails']['video'][0]['width']) except: source_utils.scraper_error('LIBRARY') quality = -1 if quality > 1920: quality = '4K' if quality >= 1920: quality = '1080p' if 1280 <= quality < 1900: quality = '720p' if quality < 1280: quality = 'SD' info = [] try: f = control.openFile(url) s = f.size() f.close() dsize = float(s) / 1073741824 isize = '%.2f GB' % dsize info.insert(0, isize) except: source_utils.scraper_error('LIBRARY') dsize = 0 try: c = r['streamdetails']['video'][0]['codec'] if c == 'avc1': c = 'h264' info.append(c) except: source_utils.scraper_error('LIBRARY') try: ac = r['streamdetails']['audio'][0]['codec'] if ac == 'dca': ac = 'dts' if ac == 'dtshd_ma': ac = 'dts-hd ma' info.append(ac) except: source_utils.scraper_error('LIBRARY') try: ach = r['streamdetails']['audio'][0]['channels'] if ach == 1: ach = 'mono' if ach == 2: ach = '2.0' if ach == 6: ach = '5.1' if ach == 8: ach = '7.1' info.append(ach) except: source_utils.scraper_error('LIBRARY') info = ' | '.join(info) info = py_tools.ensure_text(info, errors='replace') sources.append({ 'provider': 'library', 'source': 'local', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'local': True, 'direct': True, 'debridonly': False, 'size': dsize }) return sources except: source_utils.scraper_error('LIBRARY') return sources