def __init__(self, info=None, request_charset='utf-8', response_charset=None): self._counter = 0 self._cookies_filename = '' self._cookies = LWPCookieJar() self.url = None self.user_agent = USER_AGENT self.content = None self.status = None self.token = None self.passkey = None self.info = info self.proxy_url = None self.request_charset = request_charset self.response_charset = response_charset self.needs_proxylock = False self.headers = dict() self.request_headers = None self.session = requests.session() self.session.verify = False # Enabling retrying on failed requests retries = Retry(total=2, read=2, connect=2, redirect=3, backoff_factor=0.1, status_forcelist=[429, 500, 502, 503, 504]) self.session.mount('http://', HTTPAdapter(max_retries=retries)) self.session.mount('https://', HTTPAdapter(max_retries=retries)) # self.session = cfscrape.create_scraper() # self.scraper = cfscrape.create_scraper() # self.session = self.scraper.session() global dns_public_list global dns_opennic_list dns_public_list = get_setting("public_dns_list", unicode).replace(" ", "").split(",") dns_opennic_list = get_setting("opennic_dns_list", unicode).replace(" ", "").split(",") # socket.setdefaulttimeout(60) # Parsing proxy information proxy = { 'enabled': get_setting("proxy_enabled", bool), 'use_type': get_setting("proxy_use_type", int), 'type': proxy_types[0], 'host': get_setting("proxy_host", unicode), 'port': get_setting("proxy_port", int), 'login': get_setting("proxy_login", unicode), 'password': get_setting("proxy_password", unicode), } try: proxy['type'] = proxy_types[get_setting("proxy_type", int)] except: pass if get_setting("use_public_dns", bool): connection.create_connection = patched_create_connection if get_setting("use_elementum_proxy", bool): elementum_addon = xbmcaddon.Addon(id='plugin.video.elementum') if elementum_addon and elementum_addon.getSetting( 'internal_proxy_enabled') == "true": self.proxy_url = "{0}://{1}:{2}".format( "http", "127.0.0.1", "65222") if info and "internal_proxy_url" in info: self.proxy_url = info["internal_proxy_url"] self.session.proxies = { 'http': self.proxy_url, 'https': self.proxy_url, } elif proxy['enabled']: if proxy['use_type'] == 0 and info and "proxy_url" in info: log.debug("Setting proxy from Elementum: %s" % (info["proxy_url"])) elif proxy['use_type'] == 1: log.debug("Setting proxy with custom settings: %s" % (repr(proxy))) if proxy['login'] or proxy['password']: self.proxy_url = "{0}://{1}:{2}@{3}:{4}".format( proxy['type'], proxy['login'], proxy['password'], proxy['host'], proxy['port']) else: self.proxy_url = "{0}://{1}:{2}".format( proxy['type'], proxy['host'], proxy['port']) if self.proxy_url: self.session.proxies = { 'http': self.proxy_url, 'https': self.proxy_url, }
def process(provider, generator, filtering, has_special, verify_name=True, verify_size=True): """ Method for processing provider results using its generator and Filtering class instance Args: provider (str): Provider ID generator (function): Generator method, can be either ``extract_torrents`` or ``extract_from_api`` filtering (Filtering): Filtering class instance has_special (bool): Whether title contains special chars verify_name (bool): Whether to double-check the results' names match the query or not verify_size (bool): Whether to check the results' file sizes """ log.debug("execute_process for %s with %s" % (provider, repr(generator))) definition = definitions[provider] definition = get_alias(definition, get_setting("%s_alias" % provider)) client = Client() token = None logged_in = False token_auth = False if get_setting("use_cloudhole", bool): client.clearance = get_setting('clearance') client.user_agent = get_setting('user_agent') if get_setting('kodi_language', bool): kodi_language = xbmc.getLanguage(xbmc.ISO_639_1) if kodi_language: filtering.kodi_language = kodi_language language_exceptions = get_setting('language_exceptions') if language_exceptions.strip().lower(): filtering.language_exceptions = re.split(r',\s?', language_exceptions) log.debug("[%s] Queries: %s" % (provider, filtering.queries)) log.debug("[%s] Extras: %s" % (provider, filtering.extras)) for query, extra in zip(filtering.queries, filtering.extras): log.debug("[%s] Before keywords - Query: %s - Extra: %s" % (provider, repr(query), repr(extra))) if has_special: # Removing quotes, surrounding {title*} keywords, when title contains special chars query = re.sub("[\"']({title.*?})[\"']", '\\1', query) query = filtering.process_keywords(provider, query) extra = filtering.process_keywords(provider, extra) if 'charset' in definition and 'utf' not in definition[ 'charset'].lower(): try: query = urllib.quote(query.encode(definition['charset'])) extra = urllib.quote(extra.encode(definition['charset'])) except: pass log.debug("[%s] After keywords - Query: %s - Extra: %s" % (provider, repr(query), repr(extra))) if not query: return filtering.results url_search = filtering.url.replace('QUERY', query) if extra: url_search = url_search.replace('EXTRA', extra) else: url_search = url_search.replace('EXTRA', '') url_search = url_search.replace(' ', definition['separator']) # MagnetDL fix... url_search = url_search.replace('FIRSTLETTER', query[:1]) # Creating the payload for POST method payload = dict() for key, value in filtering.post_data.iteritems(): if 'QUERY' in value: payload[key] = filtering.post_data[key].replace('QUERY', query) else: payload[key] = filtering.post_data[key] # Creating the payload for GET method data = None if filtering.get_data: data = dict() for key, value in filtering.get_data.iteritems(): if 'QUERY' in value: data[key] = filtering.get_data[key].replace('QUERY', query) else: data[key] = filtering.get_data[key] log.debug("- %s query: %s" % (provider, repr(query))) log.debug("-- %s url_search before token: %s" % (provider, repr(url_search))) log.debug("--- %s using POST payload: %s" % (provider, repr(payload))) log.debug("----%s filtering with post_data: %s" % (provider, repr(filtering.post_data))) # Set search's "title" in filtering to double-check results' names if 'filter_title' in definition and definition['filter_title']: filtering.filter_title = True filtering.title = query if token: log.info('[%s] Reusing existing token' % provider) url_search = url_search.replace('TOKEN', token) elif 'token' in definition: token_url = definition['base_url'] + definition['token'] log.debug("Getting token for %s at %s" % (provider, repr(token_url))) client.open(token_url.encode('utf-8')) try: token_data = json.loads(client.content) except: log.error('%s: Failed to get token for %s' % (provider, repr(url_search))) return filtering.results log.debug("Token response for %s: %s" % (provider, repr(token_data))) if 'token' in token_data: token = token_data['token'] log.debug("Got token for %s: %s" % (provider, repr(token))) url_search = url_search.replace('TOKEN', token) else: log.warning('%s: Unable to get token for %s' % (provider, repr(url_search))) if logged_in: log.info("[%s] Reusing previous login" % provider) elif token_auth: log.info("[%s] Reusing previous token authorization" % provider) elif 'private' in definition and definition['private']: username = get_setting('%s_username' % provider) password = get_setting('%s_password' % provider) passkey = get_setting('%s_passkey' % provider) if not username and not password and not passkey: for addon_name in ('script.magnetic.%s' % provider, 'script.magnetic.%s-mc' % provider): for setting in ('username', 'password'): try: value = xbmcaddon.Addon(addon_name).getSetting( setting) set_setting('%s_%s' % (provider, setting), value) if setting == 'username': username = value if setting == 'password': password = value except: pass if passkey: logged_in = True client.passkey = passkey url_search = url_search.replace('PASSKEY', passkey) elif 'login_object' in definition and definition['login_object']: logged_in = False login_object = definition['login_object'].replace( 'USERNAME', '"%s"' % username).replace('PASSWORD', '"%s"' % password) # TODO generic flags in definitions for those... if provider == 'hd-torrents': client.open(definition['root_url'] + definition['login_path']) if client.content: csrf_token = re.search( r'name="csrfToken" value="(.*?)"', client.content) if csrf_token: login_object = login_object.replace( 'CSRF_TOKEN', '"%s"' % csrf_token.group(1)) else: logged_in = True if provider == 'lostfilm': client.open(definition['root_url'] + '/v_search.php?c=111&s=1&e=1') if client.content is not 'log in first': logged_in = True if 'token_auth' in definition: # log.debug("[%s] logging in with: %s" % (provider, login_object)) if client.open(definition['root_url'] + definition['token_auth'], post_data=eval(login_object)): try: token_data = json.loads(client.content) except: log.error('%s: Failed to get token from %s' % (provider, definition['token_auth'])) return filtering.results log.debug("Token response for %s: %s" % (provider, repr(token_data))) if 'token' in token_data: client.token = token_data['token'] log.debug("Auth token for %s: %s" % (provider, repr(client.token))) else: log.error('%s: Unable to get auth token for %s' % (provider, repr(url_search))) return filtering.results log.info('[%s] Token auth successful' % provider) token_auth = True else: log.error("[%s] Token auth failed with response: %s" % (provider, repr(client.content))) return filtering.results elif not logged_in and client.login( definition['root_url'] + definition['login_path'], eval(login_object), definition['login_failed']): log.info('[%s] Login successful' % provider) logged_in = True elif not logged_in: log.error("[%s] Login failed: %s", provider, client.status) log.debug("[%s] Failed login content: %s", provider, repr(client.content)) return filtering.results if logged_in: if provider == 'hd-torrents': client.open(definition['root_url'] + '/torrents.php') csrf_token = re.search( r'name="csrfToken" value="(.*?)"', client.content) url_search = url_search.replace( "CSRF_TOKEN", csrf_token.group(1)) if provider == 'lostfilm': log.info('[%s] Need open page before search', provider) client.open(url_search.encode('utf-8'), post_data=payload, get_data=data) search_info = re.search(r'PlayEpisode\((.*?)\)">', client.content) if search_info: series_details = re.search( '\'(\d+)\',\'(\d+)\',\'(\d+)\'', search_info.group(1)) client.open(definition['root_url'] + '/v_search.php?c=%s&s=%s&e=%s' % (series_details.group(1), series_details.group(2), series_details.group(3))) redirect_url = re.search(ur'url=(.*?)">', client.content) if redirect_url is not None: url_search = redirect_url.group(1) else: return filtering.results log.info("> %s search URL: %s" % (definition['name'].rjust(longest), url_search)) client.open(url_search.encode('utf-8'), post_data=payload, get_data=data) filtering.results.extend( generate_payload(provider, generator(provider, client), filtering, verify_name, verify_size)) return filtering.results
def get_magnet_from_jackett(original_uri): magnet_prefix = 'magnet:' uri = original_uri while True: if len(uri) >= len(magnet_prefix) and uri[0:7] == magnet_prefix: return uri response = requests.get(uri, allow_redirects=False) if response.is_redirect: uri = response.headers['Location'] elif response.status_code == httplib.OK and response.headers.get( 'Content-Type') == 'application/x-bittorrent': torrent = Torrent.from_string(response.content) return torrent.get_magnet(True) else: log.warning( "Could not get final redirect location for URI %s. Response was: %d %s", original_uri, response.status_code, response.reason) log.debug("Response for failed redirect %s is", original_uri) log.debug("=" * 50) [ log.debug("%s: %s", h, k) for (h, k) in response.headers.iteritems() ] log.debug("") log.debug("%s", base64.standard_b64encode(response.content)) log.debug("=" * 50) break return None
def open(self, url, language='en', post_data=None, get_data=None, headers=None): """ Opens a connection to a webpage and saves its HTML content in ``self.content`` Args: url (str): The URL to open language (str): The language code for the ``Content-Language`` header post_data (dict): POST data for the request get_data (dict): GET data for the request """ if get_data: url += '?' + urlencode(get_data) log.debug("Opening URL: %s" % repr(url)) if self.session.proxies: log.debug("Proxies: %s" % (repr(self.session.proxies))) self._read_cookies(url) self.session.cookies = self._cookies log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies))) # Default headers for any request. Pretend like we are the usual browser. req_headers = { 'User-Agent': self.user_agent, 'Content-Language': language, 'Cache-Control': 'no-cache', 'Accept-Encoding': 'deflate, compress, gzip', 'Origin': url, 'Referer': url } # If headers passed to open() call - we overwrite headers. if headers: for key, value in headers.iteritems(): if value: req_headers[key] = value elif key.capitalize() in req_headers: del req_headers[key.capitalize()] if self.token: req_headers["Authorization"] = self.token req = None if post_data: req = requests.Request('POST', url, data=post_data, headers=req_headers) else: req = requests.Request('GET', url, headers=req_headers) prepped = self.session.prepare_request(req) self.request_headers = prepped.headers try: self._good_spider() with self.session.send(prepped) as response: self.headers = response.headers self.status = response.status_code self.url = response.url self._save_cookies() if self.response_charset: self.content = response.content.decode( self.response_charset, 'ignore') else: self.content = response.text except Exception as e: import traceback log.error("%s failed with %s:" % (repr(url), repr(e))) map(log.debug, traceback.format_exc().split("\n")) log.debug("Status for %s : %s" % (repr(url), str(self.status))) return self.status == 200
def cleanup_results(results_list): """ Remove duplicate results, hash results without an info_hash, and sort by seeders Args: results_list (list): Results to clean-up Returns: list: De-duplicated, hashed and sorted results """ if len(results_list) == 0: return [] hashes = [] filtered_list = [] for result in results_list: if not result['seeds'] and not use_allow_noseeds: log.debug('[%s] Skipping due to no seeds: %s' % (result['provider'][16:-8], repr(result['name']))) continue if not result['uri']: log.debug('[%s] Skipping due to empty uri: %s' % (result['provider'][16:-8], repr(result))) continue hash_ = result['info_hash'].upper() if not hash_: try: if result['uri'] and result['uri'].startswith('magnet'): hash_ = Magnet(result['uri']).info_hash.upper() else: hash_ = py2_encode(result['uri'].split("|")[0]) try: hash_ = hash_.encode() except: pass hash_ = hashlib.md5(hash_).hexdigest() except: pass # Make sure all are upper-case and provider-scoped hash_ = result['provider'] + hash_.upper() # try: # log.debug("[%s] Hash for %s: %s" % (result['provider'][16:-8], repr(result['name']), hash_)) # except Exception as e: # import traceback # log.warning("%s logging failed with: %s" % (result['provider'], repr(e))) # map(log.debug, traceback.format_exc().split("\n")) if not any(existing == hash_ for existing in hashes): filtered_list.append(result) hashes.append(hash_) else: log.debug('[%s] Skipping due to repeating hash: %s' % (result['provider'][16:-8], repr(result))) return sorted(filtered_list, key=lambda r: (get_int(r['seeds'])), reverse=True)
def extract_torrents(provider, client): """ Main torrent extraction generator for non-API based providers Args: provider (str): Provider ID client (Client): Client class instance Yields: tuple: A torrent result """ definition = definitions[provider] definition = get_alias(definition, get_setting("%s_alias" % provider)) log.debug("Extracting torrents from %s using definitions: %s" % (provider, repr(definition))) if not client.content: if get_setting("use_debug_parser", bool): log.debug("[%s] Parser debug | Page content is empty" % provider) raise StopIteration dom = Html().feed(client.content) key_search = get_search_query(definition, "key") row_search = get_search_query(definition, "row") name_search = get_search_query(definition, "name") torrent_search = get_search_query(definition, "torrent") info_hash_search = get_search_query(definition, "infohash") size_search = get_search_query(definition, "size") seeds_search = get_search_query(definition, "seeds") peers_search = get_search_query(definition, "peers") referer_search = get_search_query(definition, "referer") log.debug("[%s] Parser: %s" % (provider, repr(definition['parser']))) q = Queue() threads = [] needs_subpage = 'subpage' in definition and definition['subpage'] if needs_subpage: def extract_subpage(q, name, torrent, size, seeds, peers, info_hash, referer): try: log.debug("[%s] Getting subpage at %s" % (provider, repr(torrent))) except Exception as e: import traceback log.error("[%s] Subpage logging failed with: %s" % (provider, repr(e))) map(log.debug, traceback.format_exc().split("\n")) # New client instance, otherwise it's race conditions all over the place subclient = Client() subclient.passkey = client.passkey headers = {} if "subpage_mode" in definition: if definition["subpage_mode"] == "xhr": headers['X-Requested-With'] = 'XMLHttpRequest' headers['Content-Language'] = '' if referer: headers['Referer'] = referer uri = torrent.split('|') # Split cookies for private trackers subclient.open(uri[0].encode('utf-8'), headers=headers) if 'bittorrent' in subclient.headers.get('content-type', ''): log.debug('[%s] bittorrent content-type for %s' % (provider, repr(torrent))) if len(uri) > 1: # Stick back cookies if needed torrent = '%s|%s' % (torrent, uri[1]) else: try: torrent = extract_from_page(provider, subclient.content) if torrent and not torrent.startswith('magnet') and len( uri) > 1: # Stick back cookies if needed torrent = '%s|%s' % (torrent, uri[1]) except Exception as e: import traceback log.error( "[%s] Subpage extraction for %s failed with: %s" % (provider, repr(uri[0]), repr(e))) map(log.debug, traceback.format_exc().split("\n")) ret = (name, info_hash, torrent, size, seeds, peers) q.put_nowait(ret) if not dom: if get_setting("use_debug_parser", bool): log.debug( "[%s] Parser debug | Could not parse DOM from page content" % provider) raise StopIteration if get_setting("use_debug_parser", bool): log.debug( "[%s] Parser debug | Page content: %s" % (provider, client.content.replace('\r', '').replace('\n', ''))) key = eval(key_search) if key_search else "" if key_search and get_setting("use_debug_parser", bool): key_str = key.__str__() log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'key', key_search, key_str.replace('\r', '').replace( '\n', ''))) items = eval(row_search) if get_setting("use_debug_parser", bool): log.debug("[%s] Parser debug | Matched %d items for '%s' query '%s'" % (provider, len(items), 'row', row_search)) for item in items: if get_setting("use_debug_parser", bool): item_str = item.__str__() log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'row', row_search, item_str.replace( '\r', '').replace('\n', ''))) if not item: continue name = eval(name_search) if name_search else "" torrent = eval(torrent_search) if torrent_search else "" size = eval(size_search) if size_search else "" seeds = eval(seeds_search) if seeds_search else "" peers = eval(peers_search) if peers_search else "" info_hash = eval(info_hash_search) if info_hash_search else "" referer = eval(referer_search) if referer_search else "" if 'magnet:?' in torrent: torrent = torrent[torrent.find('magnet:?'):] if get_setting("use_debug_parser", bool): log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'name', name_search, name)) log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'torrent', torrent_search, torrent)) log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'size', size_search, size)) log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'seeds', seeds_search, seeds)) log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'peers', peers_search, peers)) if info_hash_search: log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'info_hash', info_hash_search, info_hash)) if referer_search: log.debug( "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'info_hash', referer_search, referer)) # Pass client cookies with torrent if private if definition['private'] and not torrent.startswith('magnet'): user_agent = USER_AGENT if client.passkey: torrent = torrent.replace('PASSKEY', client.passkey) elif client.token: headers = { 'Authorization': client.token, 'User-Agent': user_agent } log.debug("[%s] Appending headers: %s" % (provider, repr(headers))) torrent = append_headers(torrent, headers) log.debug("[%s] Torrent with headers: %s" % (provider, repr(torrent))) else: log.debug("[%s] Cookies: %s" % (provider, repr(client.cookies()))) parsed_url = urlparse(definition['root_url']) cookie_domain = '{uri.netloc}'.format(uri=parsed_url) cookie_domain = re.sub('www\d*\.', '', cookie_domain) cookies = [] for cookie in client._cookies: if cookie_domain in cookie.domain: cookies.append(cookie) headers = {} if cookies: headers = { 'Cookie': ";".join( ["%s=%s" % (c.name, c.value) for c in cookies]), 'User-Agent': user_agent } if client.request_headers: headers.update(client.request_headers) if client.url: headers['Referer'] = client.url headers['Origin'] = client.url else: headers = {'User-Agent': user_agent} torrent = append_headers(torrent, headers) if name and torrent and needs_subpage and not torrent.startswith( 'magnet'): if not torrent.startswith('http'): torrent = definition['root_url'] + torrent.encode('utf-8') t = Thread(target=extract_subpage, args=(q, name, torrent, size, seeds, peers, info_hash, referer)) threads.append(t) else: yield (name, info_hash, torrent, size, seeds, peers) if needs_subpage: log.debug("[%s] Starting subpage threads..." % provider) for t in threads: t.start() for t in threads: t.join() log.debug("[%s] Threads returned: %s" % (provider, repr(threads))) for i in range(q.qsize()): ret = q.get_nowait() log.debug("[%s] Queue %d got: %s" % (provider, i, repr(ret))) yield ret
def open(self, url, language='en', post_data=None, get_data=None): """ Opens a connection to a webpage and saves its HTML content in ``self.content`` Args: url (str): The URL to open language (str): The language code for the ``Content-Language`` header post_data (dict): POST data for the request get_data (dict): GET data for the request """ if not post_data: post_data = {} if get_data: url += '?' + urlencode(get_data) log.debug("Opening URL: %s" % repr(url)) result = False data = urlencode(post_data) if len(post_data) > 0 else None req = urllib2.Request(url, data) self._read_cookies(url) log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies))) opener = urllib2.build_opener( urllib2.HTTPCookieProcessor(self._cookies)) req.add_header('User-Agent', self.user_agent) req.add_header('Content-Language', language) req.add_header("Accept-Encoding", "gzip") req.add_header("Origin", url) req.add_header("Referer", url) if self.token: req.add_header("Authorization", self.token) try: self._good_spider() with closing(opener.open(req)) as response: self.headers = response.headers self._save_cookies() if response.headers.get("Content-Encoding", "") == "gzip": import zlib self.content = zlib.decompressobj( 16 + zlib.MAX_WBITS).decompress(response.read()) else: self.content = response.read() charset = response.headers.getparam('charset') if not charset: match = re.search( """<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"']*([^\s"'/>]*)""", self.content) if match: charset = match.group(1) if charset and charset.lower() == 'utf-8': charset = 'utf-8-sig' # Changing to utf-8-sig to remove BOM if found on decode from utf-8 if charset: log.debug('Decoding charset from %s for %s' % (charset, repr(url))) self.content = self.content.decode(charset, 'replace') self.status = response.getcode() result = True except urllib2.HTTPError as e: self.status = e.code log.warning("Status for %s : %s" % (repr(url), str(self.status))) if e.code == 403 or e.code == 503: log.warning("CloudFlared at %s, try enabling CloudHole" % url) except urllib2.URLError as e: self.status = repr(e.reason) log.warning("Status for %s : %s" % (repr(url), self.status)) except Exception as e: import traceback log.error("%s failed with %s:" % (repr(url), repr(e))) map(log.debug, traceback.format_exc().split("\n")) log.debug("Status for %s : %s" % (repr(url), str(self.status))) return result
def process_keywords(self, provider, text): """ Processes the query payload from a provider's keyword definitions Args: provider (str): Provider ID text (str): Keyword placeholders from definitions, ie. {title} Returns: str: Processed query keywords """ keywords = self.read_keywords(text) replacing = get_setting("filter_quotes", bool) for keyword in keywords: keyword = keyword.lower() if 'title' in keyword: title = self.info["title"] language = definitions[provider]['language'] use_language = None if ':' in keyword: use_language = keyword.split(':')[1].lower() if provider not in self.language_exceptions and \ (use_language or self.kodi_language) and \ 'titles' in self.info and self.info['titles']: try: if self.kodi_language and self.kodi_language in self.info[ 'titles']: use_language = self.kodi_language if use_language not in self.info['titles']: use_language = language if 'original' in self.info['titles']: title = self.info['titles']['original'] if use_language in self.info['titles'] and self.info[ 'titles'][use_language]: title = self.info['titles'][use_language] title = normalize_string(title) log.info("[%s] Using translated '%s' title %s" % (provider, use_language, repr(title))) log.debug( "[%s] Translated titles from Elementum: %s" % (provider, repr(self.info['titles']))) except Exception as e: import traceback log.error("%s failed with: %s" % (provider, repr(e))) map(log.debug, traceback.format_exc().split("\n")) text = text.replace('{%s}' % keyword, title) if 'year' in keyword: text = text.replace('{%s}' % keyword, str(self.info["year"])) if 'season' in keyword: if '+' in keyword: keys = keyword.split('+') season = str(self.info["season"] + get_int(keys[1])) elif ':' in keyword: keys = keyword.split(':') season = ('%%.%sd' % keys[1]) % self.info["season"] else: season = '%s' % self.info["season"] text = text.replace('{%s}' % keyword, season) if 'episode' in keyword: if '+' in keyword: keys = keyword.split('+') episode = str(self.info["episode"] + get_int(keys[1])) elif ':' in keyword: keys = keyword.split(':') episode = ('%%.%sd' % keys[1]) % self.info["episode"] else: episode = '%s' % self.info["episode"] text = text.replace('{%s}' % keyword, episode) if replacing: text = text.replace(u"'", '') return text
import json import urllib2 from time import sleep from urlparse import urlparse from contextlib import closing from elementum.provider import log, get_setting from cookielib import Cookie, LWPCookieJar from urllib import urlencode from utils import encode_dict from xbmc import translatePath try: ssl._create_default_https_context = ssl._create_unverified_context except: log.debug("Skipping SSL workaround due to old Python version") pass USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 " \ "(KHTML, like Gecko) Chrome/53.0.2785.21 Safari/537.36" try: PATH_TEMP = translatePath("special://temp").decode( sys.getfilesystemencoding(), 'ignore') except: PATH_TEMP = translatePath("special://temp").decode('utf-8') if get_setting("use_opennic_dns", bool): import socket prv_getaddrinfo = socket.getaddrinfo dns_cache = { ('nnm-club.lib', 80, 0, 1): [(2, 1, 0, '', ('81.17.30.22', 80))],
def _save_cookies(self): try: self._cookies.save(self._cookies_filename) except Exception as e: log.debug("Saving cookies error: %s" % repr(e))
def __init__(self, proxy_url=None, request_charset='utf-8', response_charset=None): self._counter = 0 self._cookies_filename = '' self._cookies = LWPCookieJar() self.url = None self.user_agent = USER_AGENT self.clearance = None self.content = None self.status = None self.token = None self.passkey = None self.proxy_url = proxy_url self.request_charset = request_charset self.response_charset = response_charset self.use_antizapret = False self.needs_proxylock = False self.headers = dict() self.request_headers = None self.session = requests.session() self.session.verify = False # Enabling retrying on failed requests retries = Retry(total=2, read=2, connect=2, redirect=3, backoff_factor=0.1 # status_forcelist=[ 500, 502, 503, 504 ]) ) self.session.mount('http://', HTTPAdapter(max_retries=retries)) self.session.mount('https://', HTTPAdapter(max_retries=retries)) # self.session = cfscrape.create_scraper() # self.scraper = cfscrape.create_scraper() # self.session = self.scraper.session() global dns_public_list global dns_opennic_list dns_public_list = get_setting("public_dns_list", unicode).replace(" ", "").split(",") dns_opennic_list = get_setting("opennic_dns_list", unicode).replace(" ", "").split(",") # socket.setdefaulttimeout(60) # Parsing proxy information proxy = { 'enabled': get_setting("proxy_enabled", bool), 'use_type': get_setting("proxy_use_type", int), 'type': proxy_types[0], 'host': get_setting("proxy_host", unicode), 'port': get_setting("proxy_port", int), 'login': get_setting("proxy_login", unicode), 'password': get_setting("proxy_password", unicode), } try: proxy['type'] = proxy_types[get_setting("proxy_type", int)] except: pass if get_setting("use_public_dns", bool): connection.create_connection = patched_create_connection if proxy['enabled']: if proxy['use_type'] == 0 and proxy_url: log.debug("Setting proxy from Elementum: %s" % (proxy_url)) elif proxy['use_type'] == 1: log.debug("Setting proxy with custom settings: %s" % (repr(proxy))) if proxy['login'] or proxy['password']: proxy_url = "{}://{}:{}@{}:{}".format( proxy['type'], proxy['login'], proxy['password'], proxy['host'], proxy['port']) else: proxy_url = "{}://{}:{}".format(proxy['type'], proxy['host'], proxy['port']) elif proxy['use_type'] == 2: log.debug("Setting proxy to Antizapret resolver") self.use_antizapret = True proxy_url = None if proxy_url: self.session.proxies = { 'http': proxy_url, 'https': proxy_url, }
def process(provider, generator, filtering, has_special, verify_name=True, verify_size=True): """ Method for processing provider results using its generator and Filtering class instance Args: provider (str): Provider ID generator (function): Generator method, can be either ``extract_torrents`` or ``extract_from_api`` filtering (Filtering): Filtering class instance has_special (bool): Whether title contains special chars verify_name (bool): Whether to double-check the results' names match the query or not verify_size (bool): Whether to check the results' file sizes """ log.debug("execute_process for %s with %s" % (provider, repr(generator))) definition = definitions[provider] definition = get_alias(definition, get_setting("%s_alias" % provider)) client = Client(info=filtering.info) logged_in = False if get_setting('kodi_language', bool): kodi_language = xbmc.getLanguage(xbmc.ISO_639_1) if kodi_language: filtering.kodi_language = kodi_language language_exceptions = get_setting('language_exceptions') if language_exceptions.strip().lower(): filtering.language_exceptions = re.split(r',\s?', language_exceptions) log.debug("[%s] Queries: %s" % (provider, filtering.queries)) log.debug("[%s] Extras: %s" % (provider, filtering.extras)) for query, extra in zip(filtering.queries, filtering.extras): log.debug("[%s] Before keywords - Query: %s - Extra: %s" % (provider, repr(query), repr(extra))) if has_special: # Removing quotes, surrounding {title*} keywords, when title contains special chars query = re.sub("[\"']({title.*?})[\"']", '\\1', query) query = filtering.process_keywords(provider, query) extra = filtering.process_keywords(provider, extra) if 'charset' in definition and 'utf' not in definition[ 'charset'].lower(): try: query = urllib.quote(query.encode(definition['charset'])) extra = urllib.quote(extra.encode(definition['charset'])) except: pass log.debug("[%s] After keywords - Query: %s - Extra: %s" % (provider, repr(query), repr(extra))) if not query: return filtering.results url_search = filtering.url.replace('QUERY', query) if extra: url_search = url_search.replace('EXTRA', extra) else: url_search = url_search.replace('EXTRA', '') url_search = url_search.replace(' ', definition['separator']) if 'post_data' in definition and not filtering.post_data: filtering.post_data = eval(definition['post_data']) # Creating the payload for POST method payload = dict() for key, value in filtering.post_data.iteritems(): if 'QUERY' in value: payload[key] = filtering.post_data[key].replace('QUERY', query) else: payload[key] = filtering.post_data[key] # Creating the payload for GET method data = None if filtering.get_data: data = dict() for key, value in filtering.get_data.iteritems(): if 'QUERY' in value: data[key] = filtering.get_data[key].replace('QUERY', query) else: data[key] = filtering.get_data[key] log.debug("- %s query: %s" % (provider, repr(query))) log.debug("-- %s url_search before token: %s" % (provider, repr(url_search))) log.debug("--- %s using POST payload: %s" % (provider, repr(payload))) log.debug("----%s filtering with post_data: %s" % (provider, repr(filtering.post_data))) # Set search's "title" in filtering to double-check results' names if 'filter_title' in definition and definition['filter_title']: filtering.filter_title = True filtering.title = query if logged_in: log.info("[%s] Reusing previous login" % provider) elif 'private' in definition and definition['private']: username = get_setting('%s_username' % provider) password = get_setting('%s_password' % provider) if 'login_object' in definition and definition['login_object']: logged_in = False try: login_object = definition['login_object'].replace( 'USERNAME', '"%s"' % username).replace('PASSWORD', '"%s"' % password) except Exception, e: log.error("[{0}] Make login_object fail: {1}".format( provider, e)) return filtering.results # TODO generic flags in definitions for those... if provider == 'lostfilm': client.open(definition['root_url'] + '/v_search.php?c=110&s=1&e=1') if u'Вход. – LostFilm.TV.' in client.content: pass else: log.info('[%s] Login successful' % provider) logged_in = True if not logged_in and client.login( definition['root_url'] + definition['login_path'], eval(login_object), definition['login_failed']): log.info('[%s] Login successful' % provider) logged_in = True elif not logged_in: log.error("[%s] Login failed: %s", provider, client.status) log.debug("[%s] Failed login content: %s", provider, repr(client.content)) notify(translation(32089).format(provider), image=get_icon_path()) return filtering.results if logged_in: if provider == 'lostfilm': log.info('[%s] Search lostfilm serial ID...', provider) url_search = fix_lf(url_search) client.open(url_search.encode('utf-8'), post_data=payload, get_data=data) series_details = re.search( r'"mark-rate-pane" rel="(\d+),(\d+),(\d+)">', client.content) if series_details: client.open(definition['root_url'] + '/v_search.php?a=%s%s%s' % (series_details.group(1), series_details.group(2).zfill(3), series_details.group(3).zfill(3))) redirect_url = re.search(ur'url=(.*?)">', client.content) if redirect_url is not None: url_search = redirect_url.group(1) else: log.info('[%s] Not found ID in %s' % (provider, url_search)) return filtering.results log.info("> %s search URL: %s" % (definition['name'].rjust(longest), url_search)) client.open(url_search.encode('utf-8'), post_data=payload, get_data=data) filtering.results.extend( generate_payload(provider, generator(provider, client), filtering, verify_name, verify_size))
def open(self, url, language='en', post_data=None, get_data=None): """ Opens a connection to a webpage and saves its HTML content in ``self.content`` Args: url (str): The URL to open language (str): The language code for the ``Content-Language`` header post_data (dict): POST data for the request get_data (dict): GET data for the request """ if not post_data: post_data = {} if get_data: url += '?' + urlencode(get_data) log.debug("Opening URL: %s" % repr(url)) result = False data = urlencode(post_data) if len(post_data) > 0 else None req = urllib2.Request(url, data) self._read_cookies(url) log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies))) handlers = [] if get_setting("use_elementum_proxy", bool) and self.proxy_url: if self.proxy_type: if self.proxy_type == 2: proxyHandler = urllib2.ProxyHandler({ 'http': self.proxy_url, 'https': self.proxy_url, }) handlers.append(proxyHandler) elif self.proxy_type == 1: import proxy.socks as socks from proxy.sockshandler import SocksiPyHandler prx_info = self.proxy_url.split(':') handlers.append( SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, prx_info[1].replace("//", ''), int(prx_info[2]))) else: proxyHandler = urllib2.ProxyHandler({ 'http': self.proxy_url, 'https': self.proxy_url, }) handlers.append(proxyHandler) cookieHandler = urllib2.HTTPCookieProcessor(self._cookies) handlers.append(cookieHandler) opener = urllib2.build_opener(*handlers) req.add_header('User-Agent', self.user_agent) req.add_header('Content-Language', language) req.add_header("Accept-Encoding", "gzip") req.add_header("Origin", url) req.add_header("Referer", url) try: self._good_spider() with closing(opener.open(req)) as response: self.headers = response.headers self._save_cookies() if response.headers.get("Content-Encoding", "") == "gzip": import zlib self.content = zlib.decompressobj( 16 + zlib.MAX_WBITS).decompress(response.read()) else: self.content = response.read() charset = response.headers.getparam('charset') if not charset: match = re.search( """<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"']*([^\s"'/>]*)""", self.content) if match: charset = match.group(1) if charset and charset.lower() == 'utf-8': charset = 'utf-8-sig' # Changing to utf-8-sig to remove BOM if found on decode from utf-8 if charset: log.debug('Decoding charset from %s for %s' % (charset, repr(url))) self.content = self.content.decode(charset, 'replace') self.status = response.getcode() result = True except urllib2.HTTPError as e: self.status = e.code log.warning("Status for %s : %s" % (repr(url), str(self.status))) except urllib2.URLError as e: self.status = repr(e.reason) log.warning("Status for %s : %s" % (repr(url), self.status)) except Exception as e: import traceback log.error("%s failed with %s:" % (repr(url), repr(e))) map(log.debug, traceback.format_exc().split("\n")) log.debug("Status for %s : %s" % (repr(url), str(self.status))) return result
def extract_from_api(provider, client): """ Main API parsing generator for API-based providers An almost clever API parser, mostly just for YTS, RARBG and T411 Args: provider (str): Provider ID client (Client): Client class instance Yields: tuple: A torrent result """ try: data = json.loads(client.content) except: data = [] log.debug("[%s] JSON response from API: %s" % (unquote(provider), repr(data))) definition = definitions[provider] definition = get_alias(definition, get_setting("%s_alias" % provider)) api_format = definition['api_format'] results = [] # If 'results' is empty - then we can try to take all the data as an array of results. # Usable when api returns results without any other data. if not api_format['results']: results = data else: result_keys = api_format['results'].split('.') log.debug("[%s] result_keys: %s" % (provider, repr(result_keys))) for key in result_keys: if key in data: data = data[key] else: data = [] results = data log.debug("[%s] results: %s" % (provider, repr(results))) if 'subresults' in api_format: from copy import deepcopy for result in results: # A little too specific to YTS but who cares... result['name'] = result[api_format['name']] subresults = [] subresults_keys = api_format['subresults'].split('.') for key in subresults_keys: for result in results: if key in result: for subresult in result[key]: sub = deepcopy(result) sub.update(subresult) subresults.append(sub) results = subresults log.debug("[%s] with subresults: %s" % (provider, repr(results))) for result in results: if not result or not isinstance(result, dict): continue name = '' info_hash = '' torrent = '' size = '' seeds = '' peers = '' if 'name' in api_format: name = result[api_format['name']] if 'description' in api_format: if name: name += ' ' name += result[api_format['description']] if 'torrent' in api_format: torrent = result[api_format['torrent']] if 'download_path' in definition: torrent = definition['base_url'] + definition[ 'download_path'] + torrent if client.token: user_agent = USER_AGENT headers = { 'Authorization': client.token, 'User-Agent': user_agent } log.debug("[%s] Appending headers: %s" % (provider, repr(headers))) torrent = append_headers(torrent, headers) log.debug("[%s] Torrent with headers: %s" % (provider, repr(torrent))) if 'info_hash' in api_format: info_hash = result[api_format['info_hash']] if 'quality' in api_format: # Again quite specific to YTS... name = "%s - %s" % (name, result[api_format['quality']]) if 'size' in api_format: size = result[api_format['size']] if isinstance(size, (long, int)): size = sizeof(size) elif isinstance(size, basestring) and size.isdigit(): size = sizeof(int(size)) if 'seeds' in api_format: seeds = result[api_format['seeds']] if isinstance(seeds, basestring) and seeds.isdigit(): seeds = int(seeds) if 'peers' in api_format: peers = result[api_format['peers']] if isinstance(peers, basestring) and peers.isdigit(): peers = int(peers) yield (name, info_hash, torrent, size, seeds, peers)
def extract_torrents(provider, client): """ Main torrent extraction generator for non-API based providers Args: provider (str): Provider ID client (Client): Client class instance Yields: tuple: A torrent result """ definition = definitions[provider] definition = get_alias(definition, get_setting("%s_alias" % provider)) log.debug("Extracting torrents from %s using definitions: %s" % (provider, repr(definition))) if not client.content: raise StopIteration dom = Html().feed(client.content) row_search = "dom." + definition['parser']['row'] name_search = definition['parser']['name'] torrent_search = definition['parser']['torrent'] info_hash_search = definition['parser']['infohash'] size_search = definition['parser']['size'] seeds_search = definition['parser']['seeds'] peers_search = definition['parser']['peers'] log.debug("[%s] Parser: %s" % (provider, repr(definition['parser']))) q = Queue() threads = [] needs_subpage = 'subpage' in definition and definition['subpage'] if needs_subpage: def extract_subpage(q, name, torrent, size, seeds, peers, info_hash): try: log.debug("[%s] Getting subpage at %s" % (provider, repr(torrent))) except Exception as e: import traceback log.error("[%s] Subpage logging failed with: %s" % (provider, repr(e))) map(log.debug, traceback.format_exc().split("\n")) # New client instance, otherwise it's race conditions all over the place subclient = Client() subclient.passkey = client.passkey if get_setting("use_cloudhole", bool): subclient.clearance = get_setting('clearance') subclient.user_agent = get_setting('user_agent') uri = torrent.split('|') # Split cookies for private trackers subclient.open(uri[0].encode('utf-8')) if 'bittorrent' in subclient.headers.get('content-type', ''): log.debug('[%s] bittorrent content-type for %s' % (provider, repr(torrent))) if len(uri) > 1: # Stick back cookies if needed torrent = '%s|%s' % (torrent, uri[1]) else: try: torrent = extract_from_page(provider, subclient.content) if torrent and not torrent.startswith('magnet') and len( uri) > 1: # Stick back cookies if needed torrent = '%s|%s' % (torrent, uri[1]) except Exception as e: import traceback log.error( "[%s] Subpage extraction for %s failed with: %s" % (provider, repr(uri[0]), repr(e))) map(log.debug, traceback.format_exc().split("\n")) ret = (name, info_hash, torrent, size, seeds, peers) q.put_nowait(ret) if not dom: raise StopIteration for item in eval(row_search): if not item: continue name = eval(name_search) torrent = eval(torrent_search) if torrent_search else "" size = eval(size_search) if size_search else "" seeds = eval(seeds_search) if seeds_search else "" peers = eval(peers_search) if peers_search else "" info_hash = eval(info_hash_search) if info_hash_search else "" # Pass client cookies with torrent if private if (definition['private'] or get_setting( "use_cloudhole", bool)) and not torrent.startswith('magnet'): user_agent = USER_AGENT if get_setting("use_cloudhole", bool): user_agent = get_setting("user_agent") if client.passkey: torrent = torrent.replace('PASSKEY', client.passkey) elif client.token: headers = { 'Authorization': client.token, 'User-Agent': user_agent } log.debug("[%s] Appending headers: %s" % (provider, repr(headers))) torrent = append_headers(torrent, headers) log.debug("[%s] Torrent with headers: %s" % (provider, repr(torrent))) else: log.debug("[%s] Cookies: %s" % (provider, repr(client.cookies()))) parsed_url = urlparse(definition['root_url']) cookie_domain = '{uri.netloc}'.format(uri=parsed_url).replace( 'www.', '') cookies = [] log.debug("[%s] cookie_domain: %s" % (provider, cookie_domain)) for cookie in client._cookies: log.debug( "[%s] cookie for domain: %s (%s=%s)" % (provider, cookie.domain, cookie.name, cookie.value)) if cookie_domain in cookie.domain: cookies.append(cookie) if cookies: headers = { 'Cookie': ";".join( ["%s=%s" % (c.name, c.value) for c in cookies]), 'User-Agent': user_agent } log.debug("[%s] Appending headers: %s" % (provider, repr(headers))) torrent = append_headers(torrent, headers) log.debug("[%s] Torrent with headers: %s" % (provider, repr(torrent))) if name and torrent and needs_subpage: if not torrent.startswith('http'): torrent = definition['root_url'] + torrent.encode('utf-8') t = Thread(target=extract_subpage, args=(q, name, torrent, size, seeds, peers, info_hash)) threads.append(t) else: yield (name, info_hash, torrent, size, seeds, peers) if needs_subpage: log.debug("[%s] Starting subpage threads..." % provider) for t in threads: t.start() for t in threads: t.join() log.debug("[%s] Threads returned: %s" % (provider, repr(threads))) for i in range(q.qsize()): ret = q.get_nowait() log.debug("[%s] Queue %d got: %s" % (provider, i, repr(ret))) yield ret
def extract_from_page(provider, content): """ Sub-page extraction method Args: provider (str): Provider ID content (str): Page content from Client instance Returns: str: Torrent or magnet link extracted from sub-page """ definition = definitions[provider] definition = get_alias(definition, get_setting("%s_alias" % provider)) try: matches = re.findall(r'magnet:\?[^\'"\s<>\[\]]+', content) if matches: result = matches[0] log.debug('[%s] Matched magnet link: %s' % (provider, repr(result))) return result matches = re.findall('http(.*?).torrent["\']', content) if matches: result = 'http' + matches[0] + '.torrent' result = result.replace('torcache.net', 'itorrents.org') log.debug('[%s] Matched torrent link: %s' % (provider, repr(result))) return result matches = re.findall('/download\?token=[A-Za-z0-9%]+', content) if matches: result = definition['root_url'] + matches[0] log.debug('[%s] Matched download link with token: %s' % (provider, repr(result))) return result matches = re.findall('"(/download/[A-Za-z0-9]+)"', content) if matches: result = definition['root_url'] + matches[0] log.debug('[%s] Matched download link: %s' % (provider, repr(result))) return result matches = re.findall('/torrents/download/\?id=[a-z0-9-_.]+', content) # t411 if matches: result = definition['root_url'] + matches[0] log.debug('[%s] Matched download link with an ID: %s' % (provider, repr(result))) return result matches = re.findall('\: ([A-Fa-f0-9]{40})', content) if matches: result = "magnet:?xt=urn:btih:" + matches[0] log.debug('[%s] Matched magnet info_hash search: %s' % (provider, repr(result))) return result matches = re.findall('/download.php\?id=([A-Za-z0-9]{40})\W', content) if matches: result = "magnet:?xt=urn:btih:" + matches[0] log.debug('[%s] Matched download link: %s' % (provider, repr(result))) return result matches = re.findall('(/download.php\?id=[A-Za-z0-9]+[^\s\'"]*)', content) if matches: result = definition['root_url'] + matches[0] log.debug('[%s] Matched download link: %s' % (provider, repr(result))) return result except: pass return None
def generate_payload(provider, generator, filtering, verify_name=True, verify_size=True): """ Payload formatter to format results the way Elementum expects them Args: provider (str): Provider ID generator (function): Generator method, can be either ``extract_torrents`` or ``extract_from_api`` filtering (Filtering): Filtering class instance verify_name (bool): Whether to double-check the results' names match the query or not verify_size (bool): Whether to check the results' file sizes Returns: list: Formatted results """ filtering.information(provider) results = [] definition = definitions[provider] definition = get_alias(definition, get_setting("%s_alias" % provider)) for id, name, info_hash, uri, size, seeds, peers in generator: size = clean_size(size) # uri, info_hash = clean_magnet(uri, info_hash) v_name = name if verify_name else filtering.title v_size = size if verify_size else None if filtering.verify(provider, v_name, v_size): sort_seeds = get_int(seeds) sort_resolution = filtering.determine_resolution(v_name)[1] + 1 sort_balance = (sort_seeds + 1) * 3 * sort_resolution results.append({ "id": id, "name": name, "uri": uri, "info_hash": info_hash, "size": size, "seeds": sort_seeds, "peers": get_int(peers), "language": definition["language"] if 'language' in definition else 'en', "provider": '[COLOR %s]%s[/COLOR]' % (definition['color'], definition['name']), "icon": os.path.join(ADDON_PATH, 'burst', 'providers', 'icons', '%s.png' % provider), "sort_resolution": sort_resolution, "sort_balance": sort_balance }) else: log.debug(filtering.reason) log.debug('[%s] >>>>>> %s would send %d torrents to Elementum <<<<<<<' % (provider, provider, len(results))) results = cleanup_results(results) log.debug( '[%s] >>>>>> %s would send %d torrents to Elementum after cleanup <<<<<<<' % (provider, provider, len(results))) return results
def process_keywords(self, provider, text, definition): """ Processes the query payload from a provider's keyword definitions Args: provider (str): Provider ID text (str): Keyword placeholders from definitions, ie. {title} Returns: str: Processed query keywords """ keywords = self.read_keywords(text) replacing = use_filter_quotes for keyword in keywords: keyword = keyword.lower() if 'title' in keyword: title = self.info["title"] language = definitions[provider]['language'] use_language = None if ':' in keyword: use_language = keyword.split(':')[1].lower() if provider not in self.language_exceptions and \ (use_language or self.kodi_language) and \ 'titles' in self.info and self.info['titles']: try: if not use_language and self.kodi_language and self.kodi_language in self.info[ 'titles']: use_language = self.kodi_language if not use_language and language and language in self.info[ 'titles']: use_language = language if use_language in self.info['titles'] and self.info[ 'titles'][use_language]: title = self.info['titles'][use_language] title = normalize_string(title) # For all non-original titles, that are not base languages of a tracker OR english language, try to remove accents from the title. if use_language != 'original' and ( self.convert_language(use_language) not in self.provider_languages or self.convert_language(use_language) == 'en'): title = remove_accents(title) # Remove characters, filled in 'remove_special_characters' field definition. if 'remove_special_characters' in definition and definition[ 'remove_special_characters']: for char in definition[ 'remove_special_characters']: title = title.replace(char, "") title = " ".join(title.split()) log.info("[%s] Using translated '%s' title %s" % (provider, use_language, repr(title))) else: log.debug( "[%s] Skipping the query '%s' due to missing '%s' language title" % (provider, text, use_language)) # If title for specific language cannot be read - cancel this query return "" except Exception as e: import traceback log.error("%s failed with: %s" % (provider, repr(e))) map(log.debug, traceback.format_exc().split("\n")) text = text.replace('{%s}' % keyword, title) if 'year' in keyword: text = text.replace('{%s}' % keyword, str(self.info["year"])) if 'show_tmdb_id' in keyword: if 'show_tmdb_id' not in self.info: self.info['show_tmdb_id'] = '' text = text.replace('{%s}' % keyword, str(self.info["show_tmdb_id"])) if 'tmdb_id' in keyword: if 'tmdb_id' not in self.info: self.info['tmdb_id'] = '' text = text.replace('{%s}' % keyword, str(self.info["tmdb_id"])) if 'tvdb_id' in keyword: if 'tvdb_id' not in self.info: self.info['tvdb_id'] = '' text = text.replace('{%s}' % keyword, str(self.info["tvdb_id"])) if 'imdb_id' in keyword: if 'imdb_id' not in self.info: self.info['imdb_id'] = '' text = text.replace('{%s}' % keyword, str(self.info["imdb_id"])) if 'season' in keyword: if '+' in keyword: keys = keyword.split('+') season = str(self.info["season"] + get_int(keys[1])) elif ':' in keyword: keys = keyword.split(':') season = ('%%.%sd' % keys[1]) % self.info["season"] else: season = '%s' % self.info["season"] text = text.replace('{%s}' % keyword, season) if 'episode' in keyword and 'absolute' not in keyword: if '+' in keyword: keys = keyword.split('+') episode = str(self.info["episode"] + get_int(keys[1])) elif ':' in keyword: keys = keyword.split(':') episode = ('%%.%sd' % keys[1]) % self.info["episode"] else: episode = '%s' % self.info["episode"] text = text.replace('{%s}' % keyword, episode) if 'absolute_episode' in keyword: if 'absolute_number' not in self.info or not self.info[ 'absolute_number']: log.debug( "Skipping query '%s' due to missing absolute_number" % text) return "" if '+' in keyword: keys = keyword.split('+') episode = str(self.info["absolute_number"] + get_int(keys[1])) elif ':' in keyword: keys = keyword.split(':') episode = ('%%.%sd' % keys[1]) % self.info["absolute_number"] else: episode = '%s' % self.info["absolute_number"] text = text.replace('{%s}' % keyword, episode) if replacing: text = text.replace(u"'", '') return text
def process(provider, generator, filtering, has_special, verify_name=True, verify_size=True, skip_auth=False, start_time=None, timeout=None): """ Method for processing provider results using its generator and Filtering class instance Args: provider (str): Provider ID generator (function): Generator method, can be either ``extract_torrents`` or ``extract_from_api`` filtering (Filtering): Filtering class instance has_special (bool): Whether title contains special chars verify_name (bool): Whether to double-check the results' names match the query or not verify_size (bool): Whether to check the results' file sizes """ log.debug("[%s] execute_process for %s with %s" % (provider, provider, repr(generator))) definition = definitions[provider] definition = get_alias(definition, get_setting("%s_alias" % provider)) client = Client(info=filtering.info, request_charset=definition['charset'], response_charset=definition['response_charset'], is_api='is_api' in definition and definition['is_api']) token = None logged_in = False token_auth = False used_queries = set() if get_setting('kodi_language', bool): kodi_language = xbmc.getLanguage(xbmc.ISO_639_1) if kodi_language: filtering.kodi_language = kodi_language language_exceptions = get_setting('language_exceptions') if language_exceptions.strip().lower(): filtering.language_exceptions = re.split(r',\s?', language_exceptions) log.debug("[%s] Queries: %s" % (provider, filtering.queries)) log.debug("[%s] Extras: %s" % (provider, filtering.extras)) last_priority = 1 for query, extra, priority in zip(filtering.queries, filtering.extras, filtering.queries_priorities): log.debug("[%s] Before keywords - Query: %s - Extra: %s" % (provider, repr(query), repr(extra))) if has_special: # Removing quotes, surrounding {title*} keywords, when title contains special chars query = re.sub("[\"']({title.*?})[\"']", '\\1', query) query = filtering.process_keywords(provider, query, definition) extra = filtering.process_keywords(provider, extra, definition) if not query: continue elif query + extra in used_queries: # Make sure we don't run same query for this provider continue elif priority > last_priority and filtering.results: # Skip fallbacks if there are results log.debug("[%s] Skip fallback as there are already results" % provider) continue elif start_time and timeout and time.time() - start_time + 3 >= timeout: # Stop doing requests if there is 3 seconds left for the overall task continue used_queries.add(query + extra) last_priority = priority try: if 'charset' in definition and definition[ 'charset'] and 'utf' not in definition['charset'].lower(): query = quote(query.encode(definition['charset'])) extra = quote(extra.encode(definition['charset'])) else: query = quote(py2_encode(query)) extra = quote(py2_encode(extra)) except Exception as e: log.debug("[%s] Could not quote the query (%s): %s" % (provider, query, e)) pass log.debug("[%s] After keywords - Query: %s - Extra: %s" % (provider, repr(query), repr(extra))) if not query: return filtering.results url_search = filtering.url.replace('QUERY', query) if extra: url_search = url_search.replace('EXTRA', extra) else: url_search = url_search.replace('EXTRA', '') url_search = url_search.replace(' ', definition['separator']) if definition['separator'] != '%20': url_search = url_search.replace('%20', definition['separator']) # MagnetDL fix... url_search = url_search.replace('FIRSTLETTER', query[:1]) # Creating the payload for POST method if 'post_data' in definition and not filtering.post_data: filtering.post_data = eval(definition['post_data']) payload = dict() for key, value in iteritems(filtering.post_data): if 'QUERY' in value: payload[key] = filtering.post_data[key].replace('QUERY', query) else: payload[key] = filtering.post_data[key] payload[key] = urllib.unquote(payload[key]) # Creating the payload for GET method headers = None data = None if filtering.get_data: data = dict() for key, value in iteritems(filtering.get_data): if 'QUERY' in value: data[key] = filtering.get_data[key].replace('QUERY', query) else: data[key] = filtering.get_data[key] log.debug("- %s query: %s" % (provider, repr(query))) log.debug("-- %s url_search before token: %s" % (provider, repr(url_search))) log.debug("--- %s using POST payload: %s" % (provider, repr(payload))) log.debug("----%s filtering with post_data: %s" % (provider, repr(filtering.post_data))) # Set search's "title" in filtering to double-check results' names if 'filter_title' in definition and definition['filter_title']: filtering.filter_title = True filtering.title = query if 'initial_url' in definition and definition['initial_url']: url = definition['initial_url'] if not url.startswith('http'): url = definition['root_url'] + url client.open(url) if token: log.info('[%s] Reusing existing token' % provider) url_search = url_search.replace('TOKEN', token) elif 'token' in definition: token_url = definition['base_url'] + definition['token'] log.debug("[%s] Getting token for %s at %s" % (provider, provider, repr(token_url))) client.open(py2_encode(token_url)) try: token_data = json.loads(client.content) except: log.error('%s: Failed to get token for %s' % (provider, repr(url_search))) return filtering.results log.debug("[%s] Token response for %s: %s" % (provider, provider, repr(token_data))) if 'token' in token_data: token = token_data['token'] log.debug("[%s] Got token for %s: %s" % (provider, provider, repr(token))) url_search = url_search.replace('TOKEN', token) else: log.warning('%s: Unable to get token for %s' % (provider, repr(url_search))) if logged_in: log.info("[%s] Reusing previous login" % provider) elif token_auth: log.info("[%s] Reusing previous token authorization" % provider) elif 'private' in definition and definition['private']: username = get_setting('%s_username' % provider, unicode) password = get_setting('%s_password' % provider, unicode) passkey = get_setting('%s_passkey' % provider, unicode) if not username and not password and not passkey: for addon_name in ('script.magnetic.%s' % provider, 'script.magnetic.%s-mc' % provider): for setting in ('username', 'password'): try: value = xbmcaddon.Addon(addon_name).getSetting( setting) set_setting('%s_%s' % (provider, setting), value) if setting == 'username': username = value if setting == 'password': password = value except: pass if username: client.username = username url_search = url_search.replace('USERNAME', username) if passkey: logged_in = True client.passkey = passkey url_search = url_search.replace('PASSKEY', passkey) elif 'login_object' in definition and definition['login_object']: login_object = None login_headers = None logged_in = skip_auth try: login_object = definition['login_object'].replace( 'USERNAME', 'u"%s"' % username).replace('PASSWORD', 'u"%s"' % password) except Exception as e: log.error("Could not make login object for %s: %s" % (provider, e)) try: if 'login_headers' in definition and definition[ 'login_headers']: login_headers = eval(definition['login_headers']) except Exception as e: log.error("Could not make login headers for %s: %s" % (provider, e)) # TODO generic flags in definitions for those... if 'csrf_token' in definition and definition['csrf_token']: client.open(definition['root_url'] + definition['login_path']) if client.content: csrf_token = re.search( r'name=\"_?csrf_token\" value=\"(.*?)\"', client.content) if csrf_token: login_object = login_object.replace( 'CSRF_TOKEN', '"%s"' % csrf_token.group(1)) else: logged_in = True if 'token_auth' in definition: # log.debug("[%s] logging in with: %s" % (provider, login_object)) if client.open(definition['root_url'] + definition['token_auth'], post_data=eval(login_object)): try: token_data = json.loads(client.content) except: log.error('%s: Failed to get token from %s' % (provider, definition['token_auth'])) return filtering.results log.debug("[%s] Token response for %s: %s" % (provider, provider, repr(token_data))) if 'token' in token_data: client.token = token_data['token'] log.debug("[%s] Auth token for %s: %s" % (provider, provider, repr(client.token))) else: log.error('[%s] Unable to get auth token for %s' % (provider, repr(url_search))) return filtering.results log.info('[%s] Token auth successful' % provider) token_auth = True else: log.error("[%s] Token auth failed with response: %s" % (provider, repr(client.content))) return filtering.results elif not logged_in and client.login( definition['root_url'], definition['login_path'], eval(login_object), login_headers, definition['login_failed']): log.info('[%s] Login successful' % provider) logged_in = True elif not logged_in: log.error("[%s] Login failed: %s", provider, client.status) log.debug("[%s] Failed login content: %s", provider, repr(client.content)) return filtering.results if logged_in: if provider == 'hd-torrents': client.open(definition['root_url'] + '/torrents.php') csrf_token = re.search( r'name="csrfToken" value="(.*?)"', client.content) url_search = url_search.replace( "CSRF_TOKEN", csrf_token.group(1)) client.save_cookies() log.info("[%s] > %s search URL: %s" % (provider, definition['name'].rjust(longest), url_search)) if 'headers' in definition and definition['headers']: headers = eval(definition['headers']) log.info("[%s] > %s headers: %s" % (provider, definition['name'].rjust(longest), headers)) client.open(py2_encode(url_search), post_data=payload, get_data=data, headers=headers) filtering.results.extend( generate_payload(provider, generator(provider, client), filtering, verify_name, verify_size)) return filtering.results
available_providers = 0 request_time = time.time() auto_timeout = get_setting("auto_timeout", bool) timeout = get_setting("timeout", int) special_chars = "()\"':.[]<>/\\?" if auto_timeout: elementum_addon = xbmcaddon.Addon(id='plugin.video.elementum') if elementum_addon: if elementum_addon.getSetting( 'custom_provider_timeout_enabled') == "true": timeout = int( elementum_addon.getSetting('custom_provider_timeout')) - 2 else: timeout = 28 log.debug("Using timeout from Elementum: %d seconds" % (timeout)) def search(payload, method="general"): """ Main search entrypoint Args: payload (dict): Search payload from Elementum. method (str): Type of search, can be ``general``, ``movie``, ``show``, ``season`` or ``anime`` Returns: list: All filtered results in the format Elementum expects """ log.debug("Searching with payload (%s): %s" % (method, repr(payload))) if method == 'general':
def open(self, url, language='en', post_data=None, get_data=None, headers=None): """ Opens a connection to a webpage and saves its HTML content in ``self.content`` Args: url (str): The URL to open language (str): The language code for the ``Content-Language`` header post_data (dict): POST data for the request get_data (dict): GET data for the request """ if get_data: url += '?' + urlencode(get_data) log.debug("Opening URL: %s" % repr(url)) if self.session.proxies: log.debug("Proxies: %s" % (repr(self.session.proxies))) self._read_cookies(url) self.session.cookies = self._cookies # log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies))) # Default headers for any request. Pretend like we are the usual browser. req_headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7,uk;q=0.6,pl;q=0.5', 'Cache-Control': 'max-age=0', 'Content-Language': language, 'Origin': url, 'Referer': url, 'User-Agent': self.user_agent } # Remove referer for API providers if self.is_api: del req_headers['Referer'] # If headers passed to open() call - we overwrite headers. if headers: for key, value in iteritems(headers): if key == ':path': u = urlparse(url) value = u.path if value: req_headers[key] = value elif key.capitalize() in req_headers: del req_headers[key.capitalize()] if self.token: req_headers["Authorization"] = self.token req = None if post_data: req = requests.Request('POST', url, data=post_data, headers=req_headers) else: req = requests.Request('GET', url, headers=req_headers) prepped = self.session.prepare_request(req) self.request_headers = prepped.headers try: self._good_spider() with self.session.send(prepped) as response: self.headers = response.headers self.status = response.status_code self.url = response.url if self.response_charset: self.content = response.content.decode( self.response_charset, 'ignore') else: self.content = response.text except requests.exceptions.InvalidSchema as e: # If link points to a magnet: then it can be used as a content matches = re.findall( 'No connection adapters were found for \'(.*?)\'', str(e)) if matches: self.content = matches[0] return True import traceback log.error("%s failed with %s:" % (repr(url), repr(e))) map(log.debug, traceback.format_exc().split("\n")) except Exception as e: import traceback log.error("%s failed with %s:" % (repr(url), repr(e))) map(log.debug, traceback.format_exc().split("\n")) log.debug("Status for %s : %s" % (repr(url), str(self.status))) return self.status == 200
def search(payload, method="general"): """ Main search entrypoint Args: payload (dict): Search payload from Elementum. method (str): Type of search, can be ``general``, ``movie``, ``show``, ``season`` or ``anime`` Returns: list: All filtered results in the format Elementum expects """ log.debug("Searching with payload (%s): %s" % (method, repr(payload))) if method == 'general': if 'query' in payload: payload['title'] = payload['query'] payload['titles'] = {'source': payload['query']} else: payload = { 'title': payload, 'titles': { 'source': payload }, } payload['titles'] = dict( (k.lower(), v) for k, v in payload['titles'].iteritems()) # If titles[] exists in payload and there are special chars in titles[source] # then we set a flag to possibly modify the search query payload['has_special'] = 'titles' in payload and \ bool(payload['titles']) and \ 'source' in payload['titles'] and \ any(c in payload['titles']['source'] for c in special_chars) if payload['has_special']: log.debug( "Query title contains special chars, so removing any quotes in the search query" ) if 'proxy_url' not in payload: payload['proxy_url'] = '' if 'internal_proxy_url' not in payload: payload['internal_proxy_url'] = '' if 'elementum_url' not in payload: payload['elementum_url'] = '' global request_time global provider_names global provider_results global available_providers provider_names = [] provider_results = [] available_providers = 0 request_time = time.time() providers = get_enabled_providers(method) if len(providers) == 0: notify(translation(32060), image=get_icon_path()) log.error("No providers enabled") return [] log.info( "Burstin' with %s" % ", ".join([definitions[provider]['name'] for provider in providers])) if get_setting('kodi_language', bool): kodi_language = xbmc.getLanguage(xbmc.ISO_639_1) if not kodi_language: log.warning("Kodi returned empty language code...") elif 'titles' not in payload or not payload['titles']: log.info("No translations available...") elif payload['titles'] and kodi_language not in payload['titles']: log.info("No '%s' translation available..." % kodi_language) p_dialog = xbmcgui.DialogProgressBG() p_dialog.create('Elementum [COLOR FFFF6B00]Burst[/COLOR]', translation(32061)) for provider in providers: available_providers += 1 provider_names.append(definitions[provider]['name']) task = Thread(target=run_provider, args=(provider, payload, method)) task.start() providers_time = time.time() total = float(available_providers) # Exit if all providers have returned results or timeout reached, check every 100ms while time.time() - providers_time < timeout and available_providers > 0: timer = time.time() - providers_time log.debug("Timer: %ds / %ds" % (timer, timeout)) if timer > timeout: break message = translation( 32062 ) % available_providers if available_providers > 1 else translation( 32063) p_dialog.update(int((total - available_providers) / total * 100), message=message) time.sleep(0.25) p_dialog.close() del p_dialog if available_providers > 0: message = u', '.join(provider_names) message = message + translation(32064) log.warning(message.encode('utf-8')) notify(message, ADDON_ICON) log.debug("all provider_results: %s" % repr(provider_results)) filtered_results = apply_filters(provider_results) log.debug("all filtered_results: %s" % repr(filtered_results)) log.info("Providers returned %d results in %s seconds" % (len(filtered_results), round(time.time() - request_time, 2))) return filtered_results
def _parse_item(self, item): result = { "name": None, "provider": "Unknown", "size": "Unknown", "uri": None, "seeds": "0", "peers": "0", "info_hash": "", "language": None, # todo would be nice to assign correct icons but that can be very time consuming due to the number # of indexers in Jackett "icon": get_icon_path(), "_size_bytes": -1 } for ref in item: tag = ref.tag attrib = ref.attrib if tag == "{" + self._torznab_ns + "}attr": val = attrib["value"] if isinstance(val, str): val = val.decode("utf-8") if "name" in attrib and "value" in attrib and attrib["name"] and val and \ attrib["name"] in self._torznab_elementum_mappings["torznab_attrs"]: json = self._torznab_elementum_mappings["torznab_attrs"][ attrib["name"]] result[json] = val continue if ref.tag in self._torznab_elementum_mappings[ "tags"] and ref.text is not None: json = self._torznab_elementum_mappings["tags"][ref.tag] val = ref.text.strip() if isinstance(val, str): val = val.decode("utf-8") result[json] = val # if we didn't get a magnet uri, attempt to resolve the magnet uri. # todo for some reason Elementum cannot resolve the link that gets proxied through Jackett. # So we will resolve it manually for Elementum for now. # In actuality, this should be fixed within Elementum if result["uri"] is None: link = item.find('link') jackett_uri = "" if link is not None: jackett_uri = link.text else: enclosure = item.find('enclosure') if enclosure is not None: jackett_uri = enclosure.attrib['url'] if jackett_uri != "": result["uri"] = get_magnet_from_jackett(jackett_uri) if result["name"] is None or result["uri"] is None: log.warning("Could not parse item; name = %s; uri = %s", result["name"], result["uri"]) log.debug("Failed item is: %s", ElementTree.tostring(item, encoding='utf8')) return None # result["name"] = result["name"].decode("utf-8") # might be needed for non-english items result["seeds"] = int(result["seeds"]) result["peers"] = int(result["peers"]) resolution = get_resolution(result["name"]) result["resolution"] = utils.resolutions.keys()[::-1].index(resolution) result["_resolution"] = resolution result["release_type"] = get_release_type(result["name"]) if result["size"] != "Unknown": result["_size_bytes"] = int(result["size"]) result["size"] = human_size(result["_size_bytes"]) return result