def __init__(self,
                 info=None,
                 request_charset='utf-8',
                 response_charset=None):
        self._counter = 0
        self._cookies_filename = ''
        self._cookies = LWPCookieJar()
        self.url = None
        self.user_agent = USER_AGENT
        self.content = None
        self.status = None
        self.token = None
        self.passkey = None
        self.info = info
        self.proxy_url = None
        self.request_charset = request_charset
        self.response_charset = response_charset

        self.needs_proxylock = False

        self.headers = dict()
        self.request_headers = None

        self.session = requests.session()
        self.session.verify = False

        # Enabling retrying on failed requests
        retries = Retry(total=2,
                        read=2,
                        connect=2,
                        redirect=3,
                        backoff_factor=0.1,
                        status_forcelist=[429, 500, 502, 503, 504])

        self.session.mount('http://', HTTPAdapter(max_retries=retries))
        self.session.mount('https://', HTTPAdapter(max_retries=retries))
        # self.session = cfscrape.create_scraper()
        # self.scraper = cfscrape.create_scraper()
        # self.session = self.scraper.session()

        global dns_public_list
        global dns_opennic_list
        dns_public_list = get_setting("public_dns_list",
                                      unicode).replace(" ", "").split(",")
        dns_opennic_list = get_setting("opennic_dns_list",
                                       unicode).replace(" ", "").split(",")
        # socket.setdefaulttimeout(60)

        # Parsing proxy information
        proxy = {
            'enabled': get_setting("proxy_enabled", bool),
            'use_type': get_setting("proxy_use_type", int),
            'type': proxy_types[0],
            'host': get_setting("proxy_host", unicode),
            'port': get_setting("proxy_port", int),
            'login': get_setting("proxy_login", unicode),
            'password': get_setting("proxy_password", unicode),
        }

        try:
            proxy['type'] = proxy_types[get_setting("proxy_type", int)]
        except:
            pass

        if get_setting("use_public_dns", bool):
            connection.create_connection = patched_create_connection

        if get_setting("use_elementum_proxy", bool):
            elementum_addon = xbmcaddon.Addon(id='plugin.video.elementum')
            if elementum_addon and elementum_addon.getSetting(
                    'internal_proxy_enabled') == "true":
                self.proxy_url = "{0}://{1}:{2}".format(
                    "http", "127.0.0.1", "65222")
                if info and "internal_proxy_url" in info:
                    self.proxy_url = info["internal_proxy_url"]

                self.session.proxies = {
                    'http': self.proxy_url,
                    'https': self.proxy_url,
                }
        elif proxy['enabled']:
            if proxy['use_type'] == 0 and info and "proxy_url" in info:
                log.debug("Setting proxy from Elementum: %s" %
                          (info["proxy_url"]))
            elif proxy['use_type'] == 1:
                log.debug("Setting proxy with custom settings: %s" %
                          (repr(proxy)))

                if proxy['login'] or proxy['password']:
                    self.proxy_url = "{0}://{1}:{2}@{3}:{4}".format(
                        proxy['type'], proxy['login'], proxy['password'],
                        proxy['host'], proxy['port'])
                else:
                    self.proxy_url = "{0}://{1}:{2}".format(
                        proxy['type'], proxy['host'], proxy['port'])

            if self.proxy_url:
                self.session.proxies = {
                    'http': self.proxy_url,
                    'https': self.proxy_url,
                }
Beispiel #2
0
def process(provider,
            generator,
            filtering,
            has_special,
            verify_name=True,
            verify_size=True):
    """ Method for processing provider results using its generator and Filtering class instance

    Args:
        provider        (str): Provider ID
        generator  (function): Generator method, can be either ``extract_torrents`` or ``extract_from_api``
        filtering (Filtering): Filtering class instance
        has_special    (bool): Whether title contains special chars
        verify_name    (bool): Whether to double-check the results' names match the query or not
        verify_size    (bool): Whether to check the results' file sizes
    """
    log.debug("execute_process for %s with %s" % (provider, repr(generator)))
    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))

    client = Client()
    token = None
    logged_in = False
    token_auth = False

    if get_setting("use_cloudhole", bool):
        client.clearance = get_setting('clearance')
        client.user_agent = get_setting('user_agent')

    if get_setting('kodi_language', bool):
        kodi_language = xbmc.getLanguage(xbmc.ISO_639_1)
        if kodi_language:
            filtering.kodi_language = kodi_language
        language_exceptions = get_setting('language_exceptions')
        if language_exceptions.strip().lower():
            filtering.language_exceptions = re.split(r',\s?',
                                                     language_exceptions)

    log.debug("[%s] Queries: %s" % (provider, filtering.queries))
    log.debug("[%s] Extras:  %s" % (provider, filtering.extras))

    for query, extra in zip(filtering.queries, filtering.extras):
        log.debug("[%s] Before keywords - Query: %s - Extra: %s" %
                  (provider, repr(query), repr(extra)))
        if has_special:
            # Removing quotes, surrounding {title*} keywords, when title contains special chars
            query = re.sub("[\"']({title.*?})[\"']", '\\1', query)

        query = filtering.process_keywords(provider, query)
        extra = filtering.process_keywords(provider, extra)
        if 'charset' in definition and 'utf' not in definition[
                'charset'].lower():
            try:
                query = urllib.quote(query.encode(definition['charset']))
                extra = urllib.quote(extra.encode(definition['charset']))
            except:
                pass

        log.debug("[%s] After keywords  - Query: %s - Extra: %s" %
                  (provider, repr(query), repr(extra)))
        if not query:
            return filtering.results

        url_search = filtering.url.replace('QUERY', query)
        if extra:
            url_search = url_search.replace('EXTRA', extra)
        else:
            url_search = url_search.replace('EXTRA', '')
        url_search = url_search.replace(' ', definition['separator'])

        # MagnetDL fix...
        url_search = url_search.replace('FIRSTLETTER', query[:1])

        # Creating the payload for POST method
        payload = dict()
        for key, value in filtering.post_data.iteritems():
            if 'QUERY' in value:
                payload[key] = filtering.post_data[key].replace('QUERY', query)
            else:
                payload[key] = filtering.post_data[key]

        # Creating the payload for GET method
        data = None
        if filtering.get_data:
            data = dict()
            for key, value in filtering.get_data.iteritems():
                if 'QUERY' in value:
                    data[key] = filtering.get_data[key].replace('QUERY', query)
                else:
                    data[key] = filtering.get_data[key]

        log.debug("-   %s query: %s" % (provider, repr(query)))
        log.debug("--  %s url_search before token: %s" %
                  (provider, repr(url_search)))
        log.debug("--- %s using POST payload: %s" % (provider, repr(payload)))
        log.debug("----%s filtering with post_data: %s" %
                  (provider, repr(filtering.post_data)))

        # Set search's "title" in filtering to double-check results' names
        if 'filter_title' in definition and definition['filter_title']:
            filtering.filter_title = True
            filtering.title = query

        if token:
            log.info('[%s] Reusing existing token' % provider)
            url_search = url_search.replace('TOKEN', token)
        elif 'token' in definition:
            token_url = definition['base_url'] + definition['token']
            log.debug("Getting token for %s at %s" %
                      (provider, repr(token_url)))
            client.open(token_url.encode('utf-8'))
            try:
                token_data = json.loads(client.content)
            except:
                log.error('%s: Failed to get token for %s' %
                          (provider, repr(url_search)))
                return filtering.results
            log.debug("Token response for %s: %s" %
                      (provider, repr(token_data)))
            if 'token' in token_data:
                token = token_data['token']
                log.debug("Got token for %s: %s" % (provider, repr(token)))
                url_search = url_search.replace('TOKEN', token)
            else:
                log.warning('%s: Unable to get token for %s' %
                            (provider, repr(url_search)))

        if logged_in:
            log.info("[%s] Reusing previous login" % provider)
        elif token_auth:
            log.info("[%s] Reusing previous token authorization" % provider)
        elif 'private' in definition and definition['private']:
            username = get_setting('%s_username' % provider)
            password = get_setting('%s_password' % provider)
            passkey = get_setting('%s_passkey' % provider)
            if not username and not password and not passkey:
                for addon_name in ('script.magnetic.%s' % provider,
                                   'script.magnetic.%s-mc' % provider):
                    for setting in ('username', 'password'):
                        try:
                            value = xbmcaddon.Addon(addon_name).getSetting(
                                setting)
                            set_setting('%s_%s' % (provider, setting), value)
                            if setting == 'username':
                                username = value
                            if setting == 'password':
                                password = value
                        except:
                            pass

            if passkey:
                logged_in = True
                client.passkey = passkey
                url_search = url_search.replace('PASSKEY', passkey)

            elif 'login_object' in definition and definition['login_object']:
                logged_in = False
                login_object = definition['login_object'].replace(
                    'USERNAME',
                    '"%s"' % username).replace('PASSWORD', '"%s"' % password)

                # TODO generic flags in definitions for those...
                if provider == 'hd-torrents':
                    client.open(definition['root_url'] +
                                definition['login_path'])
                    if client.content:
                        csrf_token = re.search(
                            r'name="csrfToken" value="(.*?)"', client.content)
                        if csrf_token:
                            login_object = login_object.replace(
                                'CSRF_TOKEN', '"%s"' % csrf_token.group(1))
                        else:
                            logged_in = True
                if provider == 'lostfilm':
                    client.open(definition['root_url'] +
                                '/v_search.php?c=111&s=1&e=1')
                    if client.content is not 'log in first':
                        logged_in = True

                if 'token_auth' in definition:
                    # log.debug("[%s] logging in with: %s" % (provider, login_object))
                    if client.open(definition['root_url'] +
                                   definition['token_auth'],
                                   post_data=eval(login_object)):
                        try:
                            token_data = json.loads(client.content)
                        except:
                            log.error('%s: Failed to get token from %s' %
                                      (provider, definition['token_auth']))
                            return filtering.results
                        log.debug("Token response for %s: %s" %
                                  (provider, repr(token_data)))
                        if 'token' in token_data:
                            client.token = token_data['token']
                            log.debug("Auth token for %s: %s" %
                                      (provider, repr(client.token)))
                        else:
                            log.error('%s: Unable to get auth token for %s' %
                                      (provider, repr(url_search)))
                            return filtering.results
                        log.info('[%s] Token auth successful' % provider)
                        token_auth = True
                    else:
                        log.error("[%s] Token auth failed with response: %s" %
                                  (provider, repr(client.content)))
                        return filtering.results
                elif not logged_in and client.login(
                        definition['root_url'] + definition['login_path'],
                        eval(login_object), definition['login_failed']):
                    log.info('[%s] Login successful' % provider)
                    logged_in = True
                elif not logged_in:
                    log.error("[%s] Login failed: %s", provider, client.status)
                    log.debug("[%s] Failed login content: %s", provider,
                              repr(client.content))
                    return filtering.results

                if logged_in:
                    if provider == 'hd-torrents':
                        client.open(definition['root_url'] + '/torrents.php')
                        csrf_token = re.search(
                            r'name="csrfToken" value="(.*?)"', client.content)
                        url_search = url_search.replace(
                            "CSRF_TOKEN", csrf_token.group(1))

                    if provider == 'lostfilm':
                        log.info('[%s] Need open page before search', provider)
                        client.open(url_search.encode('utf-8'),
                                    post_data=payload,
                                    get_data=data)
                        search_info = re.search(r'PlayEpisode\((.*?)\)">',
                                                client.content)
                        if search_info:
                            series_details = re.search(
                                '\'(\d+)\',\'(\d+)\',\'(\d+)\'',
                                search_info.group(1))
                            client.open(definition['root_url'] +
                                        '/v_search.php?c=%s&s=%s&e=%s' %
                                        (series_details.group(1),
                                         series_details.group(2),
                                         series_details.group(3)))
                            redirect_url = re.search(ur'url=(.*?)">',
                                                     client.content)
                            if redirect_url is not None:
                                url_search = redirect_url.group(1)
                        else:
                            return filtering.results

        log.info(">  %s search URL: %s" %
                 (definition['name'].rjust(longest), url_search))

        client.open(url_search.encode('utf-8'),
                    post_data=payload,
                    get_data=data)
        filtering.results.extend(
            generate_payload(provider, generator(provider, client), filtering,
                             verify_name, verify_size))
    return filtering.results
Beispiel #3
0
def get_magnet_from_jackett(original_uri):
    magnet_prefix = 'magnet:'
    uri = original_uri
    while True:
        if len(uri) >= len(magnet_prefix) and uri[0:7] == magnet_prefix:
            return uri

        response = requests.get(uri, allow_redirects=False)
        if response.is_redirect:
            uri = response.headers['Location']
        elif response.status_code == httplib.OK and response.headers.get(
                'Content-Type') == 'application/x-bittorrent':
            torrent = Torrent.from_string(response.content)
            return torrent.get_magnet(True)
        else:
            log.warning(
                "Could not get final redirect location for URI %s. Response was: %d %s",
                original_uri, response.status_code, response.reason)
            log.debug("Response for failed redirect %s is", original_uri)
            log.debug("=" * 50)
            [
                log.debug("%s: %s", h, k)
                for (h, k) in response.headers.iteritems()
            ]
            log.debug("")
            log.debug("%s", base64.standard_b64encode(response.content))
            log.debug("=" * 50)
            break

    return None
    def open(self,
             url,
             language='en',
             post_data=None,
             get_data=None,
             headers=None):
        """ Opens a connection to a webpage and saves its HTML content in ``self.content``

        Args:
            url        (str): The URL to open
            language   (str): The language code for the ``Content-Language`` header
            post_data (dict): POST data for the request
            get_data  (dict): GET data for the request
        """

        if get_data:
            url += '?' + urlencode(get_data)

        log.debug("Opening URL: %s" % repr(url))
        if self.session.proxies:
            log.debug("Proxies: %s" % (repr(self.session.proxies)))

        self._read_cookies(url)
        self.session.cookies = self._cookies

        log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies)))

        # Default headers for any request. Pretend like we are the usual browser.
        req_headers = {
            'User-Agent': self.user_agent,
            'Content-Language': language,
            'Cache-Control': 'no-cache',
            'Accept-Encoding': 'deflate, compress, gzip',
            'Origin': url,
            'Referer': url
        }

        # If headers passed to open() call - we overwrite headers.
        if headers:
            for key, value in headers.iteritems():
                if value:
                    req_headers[key] = value
                elif key.capitalize() in req_headers:
                    del req_headers[key.capitalize()]

        if self.token:
            req_headers["Authorization"] = self.token

        req = None
        if post_data:
            req = requests.Request('POST',
                                   url,
                                   data=post_data,
                                   headers=req_headers)
        else:
            req = requests.Request('GET', url, headers=req_headers)

        prepped = self.session.prepare_request(req)
        self.request_headers = prepped.headers

        try:
            self._good_spider()
            with self.session.send(prepped) as response:
                self.headers = response.headers
                self.status = response.status_code
                self.url = response.url

                self._save_cookies()

                if self.response_charset:
                    self.content = response.content.decode(
                        self.response_charset, 'ignore')
                else:
                    self.content = response.text

        except Exception as e:
            import traceback
            log.error("%s failed with %s:" % (repr(url), repr(e)))
            map(log.debug, traceback.format_exc().split("\n"))

        log.debug("Status for %s : %s" % (repr(url), str(self.status)))

        return self.status == 200
Beispiel #5
0
def cleanup_results(results_list):
    """ Remove duplicate results, hash results without an info_hash, and sort by seeders

    Args:
        results_list (list): Results to clean-up

    Returns:
        list: De-duplicated, hashed and sorted results
    """
    if len(results_list) == 0:
        return []

    hashes = []
    filtered_list = []
    for result in results_list:
        if not result['seeds'] and not use_allow_noseeds:
            log.debug('[%s] Skipping due to no seeds: %s' %
                      (result['provider'][16:-8], repr(result['name'])))
            continue

        if not result['uri']:
            log.debug('[%s] Skipping due to empty uri: %s' %
                      (result['provider'][16:-8], repr(result)))
            continue

        hash_ = result['info_hash'].upper()

        if not hash_:
            try:
                if result['uri'] and result['uri'].startswith('magnet'):
                    hash_ = Magnet(result['uri']).info_hash.upper()
                else:
                    hash_ = py2_encode(result['uri'].split("|")[0])
                    try:
                        hash_ = hash_.encode()
                    except:
                        pass
                    hash_ = hashlib.md5(hash_).hexdigest()
            except:
                pass

        # Make sure all are upper-case and provider-scoped
        hash_ = result['provider'] + hash_.upper()

        # try:
        #     log.debug("[%s] Hash for %s: %s" % (result['provider'][16:-8], repr(result['name']), hash_))
        # except Exception as e:
        #     import traceback
        #     log.warning("%s logging failed with: %s" % (result['provider'], repr(e)))
        #     map(log.debug, traceback.format_exc().split("\n"))

        if not any(existing == hash_ for existing in hashes):
            filtered_list.append(result)
            hashes.append(hash_)
        else:
            log.debug('[%s] Skipping due to repeating hash: %s' %
                      (result['provider'][16:-8], repr(result)))

    return sorted(filtered_list,
                  key=lambda r: (get_int(r['seeds'])),
                  reverse=True)
def extract_torrents(provider, client):
    """ Main torrent extraction generator for non-API based providers

    Args:
        provider  (str): Provider ID
        client (Client): Client class instance

    Yields:
        tuple: A torrent result
    """
    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))
    log.debug("Extracting torrents from %s using definitions: %s" %
              (provider, repr(definition)))

    if not client.content:
        if get_setting("use_debug_parser", bool):
            log.debug("[%s] Parser debug | Page content is empty" % provider)

        raise StopIteration

    dom = Html().feed(client.content)

    key_search = get_search_query(definition, "key")
    row_search = get_search_query(definition, "row")
    name_search = get_search_query(definition, "name")
    torrent_search = get_search_query(definition, "torrent")
    info_hash_search = get_search_query(definition, "infohash")
    size_search = get_search_query(definition, "size")
    seeds_search = get_search_query(definition, "seeds")
    peers_search = get_search_query(definition, "peers")
    referer_search = get_search_query(definition, "referer")

    log.debug("[%s] Parser: %s" % (provider, repr(definition['parser'])))

    q = Queue()
    threads = []
    needs_subpage = 'subpage' in definition and definition['subpage']

    if needs_subpage:

        def extract_subpage(q, name, torrent, size, seeds, peers, info_hash,
                            referer):
            try:
                log.debug("[%s] Getting subpage at %s" %
                          (provider, repr(torrent)))
            except Exception as e:
                import traceback
                log.error("[%s] Subpage logging failed with: %s" %
                          (provider, repr(e)))
                map(log.debug, traceback.format_exc().split("\n"))

            # New client instance, otherwise it's race conditions all over the place
            subclient = Client()
            subclient.passkey = client.passkey
            headers = {}

            if "subpage_mode" in definition:
                if definition["subpage_mode"] == "xhr":
                    headers['X-Requested-With'] = 'XMLHttpRequest'
                    headers['Content-Language'] = ''

            if referer:
                headers['Referer'] = referer

            uri = torrent.split('|')  # Split cookies for private trackers
            subclient.open(uri[0].encode('utf-8'), headers=headers)

            if 'bittorrent' in subclient.headers.get('content-type', ''):
                log.debug('[%s] bittorrent content-type for %s' %
                          (provider, repr(torrent)))
                if len(uri) > 1:  # Stick back cookies if needed
                    torrent = '%s|%s' % (torrent, uri[1])
            else:
                try:
                    torrent = extract_from_page(provider, subclient.content)
                    if torrent and not torrent.startswith('magnet') and len(
                            uri) > 1:  # Stick back cookies if needed
                        torrent = '%s|%s' % (torrent, uri[1])
                except Exception as e:
                    import traceback
                    log.error(
                        "[%s] Subpage extraction for %s failed with: %s" %
                        (provider, repr(uri[0]), repr(e)))
                    map(log.debug, traceback.format_exc().split("\n"))

            ret = (name, info_hash, torrent, size, seeds, peers)
            q.put_nowait(ret)

    if not dom:
        if get_setting("use_debug_parser", bool):
            log.debug(
                "[%s] Parser debug | Could not parse DOM from page content" %
                provider)

        raise StopIteration

    if get_setting("use_debug_parser", bool):
        log.debug(
            "[%s] Parser debug | Page content: %s" %
            (provider, client.content.replace('\r', '').replace('\n', '')))

    key = eval(key_search) if key_search else ""
    if key_search and get_setting("use_debug_parser", bool):
        key_str = key.__str__()
        log.debug(
            "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" %
            (provider, 'key', key_search, key_str.replace('\r', '').replace(
                '\n', '')))

    items = eval(row_search)
    if get_setting("use_debug_parser", bool):
        log.debug("[%s] Parser debug | Matched %d items for '%s' query '%s'" %
                  (provider, len(items), 'row', row_search))

    for item in items:
        if get_setting("use_debug_parser", bool):
            item_str = item.__str__()
            log.debug(
                "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                % (provider, 'row', row_search, item_str.replace(
                    '\r', '').replace('\n', '')))

        if not item:
            continue

        name = eval(name_search) if name_search else ""
        torrent = eval(torrent_search) if torrent_search else ""
        size = eval(size_search) if size_search else ""
        seeds = eval(seeds_search) if seeds_search else ""
        peers = eval(peers_search) if peers_search else ""
        info_hash = eval(info_hash_search) if info_hash_search else ""
        referer = eval(referer_search) if referer_search else ""

        if 'magnet:?' in torrent:
            torrent = torrent[torrent.find('magnet:?'):]

        if get_setting("use_debug_parser", bool):
            log.debug(
                "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                % (provider, 'name', name_search, name))
            log.debug(
                "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                % (provider, 'torrent', torrent_search, torrent))
            log.debug(
                "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                % (provider, 'size', size_search, size))
            log.debug(
                "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                % (provider, 'seeds', seeds_search, seeds))
            log.debug(
                "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                % (provider, 'peers', peers_search, peers))
            if info_hash_search:
                log.debug(
                    "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                    % (provider, 'info_hash', info_hash_search, info_hash))
            if referer_search:
                log.debug(
                    "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                    % (provider, 'info_hash', referer_search, referer))

        # Pass client cookies with torrent if private
        if definition['private'] and not torrent.startswith('magnet'):
            user_agent = USER_AGENT

            if client.passkey:
                torrent = torrent.replace('PASSKEY', client.passkey)
            elif client.token:
                headers = {
                    'Authorization': client.token,
                    'User-Agent': user_agent
                }
                log.debug("[%s] Appending headers: %s" %
                          (provider, repr(headers)))
                torrent = append_headers(torrent, headers)
                log.debug("[%s] Torrent with headers: %s" %
                          (provider, repr(torrent)))
            else:
                log.debug("[%s] Cookies: %s" %
                          (provider, repr(client.cookies())))
                parsed_url = urlparse(definition['root_url'])
                cookie_domain = '{uri.netloc}'.format(uri=parsed_url)
                cookie_domain = re.sub('www\d*\.', '', cookie_domain)
                cookies = []
                for cookie in client._cookies:
                    if cookie_domain in cookie.domain:
                        cookies.append(cookie)
                headers = {}
                if cookies:
                    headers = {
                        'Cookie':
                        ";".join(
                            ["%s=%s" % (c.name, c.value) for c in cookies]),
                        'User-Agent':
                        user_agent
                    }
                    if client.request_headers:
                        headers.update(client.request_headers)
                    if client.url:
                        headers['Referer'] = client.url
                        headers['Origin'] = client.url
                else:
                    headers = {'User-Agent': user_agent}

                torrent = append_headers(torrent, headers)

        if name and torrent and needs_subpage and not torrent.startswith(
                'magnet'):
            if not torrent.startswith('http'):
                torrent = definition['root_url'] + torrent.encode('utf-8')
            t = Thread(target=extract_subpage,
                       args=(q, name, torrent, size, seeds, peers, info_hash,
                             referer))
            threads.append(t)
        else:
            yield (name, info_hash, torrent, size, seeds, peers)

    if needs_subpage:
        log.debug("[%s] Starting subpage threads..." % provider)
        for t in threads:
            t.start()
        for t in threads:
            t.join()

        log.debug("[%s] Threads returned: %s" % (provider, repr(threads)))

        for i in range(q.qsize()):
            ret = q.get_nowait()
            log.debug("[%s] Queue %d got: %s" % (provider, i, repr(ret)))
            yield ret
Beispiel #7
0
    def open(self, url, language='en', post_data=None, get_data=None):
        """ Opens a connection to a webpage and saves its HTML content in ``self.content``

        Args:
            url        (str): The URL to open
            language   (str): The language code for the ``Content-Language`` header
            post_data (dict): POST data for the request
            get_data  (dict): GET data for the request
        """
        if not post_data:
            post_data = {}
        if get_data:
            url += '?' + urlencode(get_data)

        log.debug("Opening URL: %s" % repr(url))
        result = False

        data = urlencode(post_data) if len(post_data) > 0 else None
        req = urllib2.Request(url, data)

        self._read_cookies(url)
        log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies)))

        opener = urllib2.build_opener(
            urllib2.HTTPCookieProcessor(self._cookies))
        req.add_header('User-Agent', self.user_agent)
        req.add_header('Content-Language', language)
        req.add_header("Accept-Encoding", "gzip")
        req.add_header("Origin", url)
        req.add_header("Referer", url)

        if self.token:
            req.add_header("Authorization", self.token)

        try:
            self._good_spider()
            with closing(opener.open(req)) as response:
                self.headers = response.headers
                self._save_cookies()
                if response.headers.get("Content-Encoding", "") == "gzip":
                    import zlib
                    self.content = zlib.decompressobj(
                        16 + zlib.MAX_WBITS).decompress(response.read())
                else:
                    self.content = response.read()

                charset = response.headers.getparam('charset')

                if not charset:
                    match = re.search(
                        """<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"']*([^\s"'/>]*)""",
                        self.content)
                    if match:
                        charset = match.group(1)

                if charset and charset.lower() == 'utf-8':
                    charset = 'utf-8-sig'  # Changing to utf-8-sig to remove BOM if found on decode from utf-8

                if charset:
                    log.debug('Decoding charset from %s for %s' %
                              (charset, repr(url)))
                    self.content = self.content.decode(charset, 'replace')

                self.status = response.getcode()
            result = True

        except urllib2.HTTPError as e:
            self.status = e.code
            log.warning("Status for %s : %s" % (repr(url), str(self.status)))
            if e.code == 403 or e.code == 503:
                log.warning("CloudFlared at %s, try enabling CloudHole" % url)

        except urllib2.URLError as e:
            self.status = repr(e.reason)
            log.warning("Status for %s : %s" % (repr(url), self.status))

        except Exception as e:
            import traceback
            log.error("%s failed with %s:" % (repr(url), repr(e)))
            map(log.debug, traceback.format_exc().split("\n"))

        log.debug("Status for %s : %s" % (repr(url), str(self.status)))

        return result
    def process_keywords(self, provider, text):
        """ Processes the query payload from a provider's keyword definitions

        Args:
            provider (str): Provider ID
            text     (str): Keyword placeholders from definitions, ie. {title}

        Returns:
            str: Processed query keywords
        """
        keywords = self.read_keywords(text)
        replacing = get_setting("filter_quotes", bool)

        for keyword in keywords:
            keyword = keyword.lower()
            if 'title' in keyword:
                title = self.info["title"]
                language = definitions[provider]['language']
                use_language = None
                if ':' in keyword:
                    use_language = keyword.split(':')[1].lower()
                if provider not in self.language_exceptions and \
                   (use_language or self.kodi_language) and \
                   'titles' in self.info and self.info['titles']:
                    try:
                        if self.kodi_language and self.kodi_language in self.info[
                                'titles']:
                            use_language = self.kodi_language
                        if use_language not in self.info['titles']:
                            use_language = language
                            if 'original' in self.info['titles']:
                                title = self.info['titles']['original']
                        if use_language in self.info['titles'] and self.info[
                                'titles'][use_language]:
                            title = self.info['titles'][use_language]
                            title = normalize_string(title)
                            log.info("[%s] Using translated '%s' title %s" %
                                     (provider, use_language, repr(title)))
                            log.debug(
                                "[%s] Translated titles from Elementum: %s" %
                                (provider, repr(self.info['titles'])))
                    except Exception as e:
                        import traceback
                        log.error("%s failed with: %s" % (provider, repr(e)))
                        map(log.debug, traceback.format_exc().split("\n"))
                text = text.replace('{%s}' % keyword, title)

            if 'year' in keyword:
                text = text.replace('{%s}' % keyword, str(self.info["year"]))

            if 'season' in keyword:
                if '+' in keyword:
                    keys = keyword.split('+')
                    season = str(self.info["season"] + get_int(keys[1]))
                elif ':' in keyword:
                    keys = keyword.split(':')
                    season = ('%%.%sd' % keys[1]) % self.info["season"]
                else:
                    season = '%s' % self.info["season"]
                text = text.replace('{%s}' % keyword, season)

            if 'episode' in keyword:
                if '+' in keyword:
                    keys = keyword.split('+')
                    episode = str(self.info["episode"] + get_int(keys[1]))
                elif ':' in keyword:
                    keys = keyword.split(':')
                    episode = ('%%.%sd' % keys[1]) % self.info["episode"]
                else:
                    episode = '%s' % self.info["episode"]
                text = text.replace('{%s}' % keyword, episode)

        if replacing:
            text = text.replace(u"'", '')

        return text
Beispiel #9
0
import json
import urllib2
from time import sleep
from urlparse import urlparse
from contextlib import closing
from elementum.provider import log, get_setting
from cookielib import Cookie, LWPCookieJar
from urllib import urlencode
from utils import encode_dict

from xbmc import translatePath

try:
    ssl._create_default_https_context = ssl._create_unverified_context
except:
    log.debug("Skipping SSL workaround due to old Python version")
    pass

USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 " \
             "(KHTML, like Gecko) Chrome/53.0.2785.21 Safari/537.36"
try:
    PATH_TEMP = translatePath("special://temp").decode(
        sys.getfilesystemencoding(), 'ignore')
except:
    PATH_TEMP = translatePath("special://temp").decode('utf-8')

if get_setting("use_opennic_dns", bool):
    import socket
    prv_getaddrinfo = socket.getaddrinfo
    dns_cache = {
        ('nnm-club.lib', 80, 0, 1): [(2, 1, 0, '', ('81.17.30.22', 80))],
Beispiel #10
0
 def _save_cookies(self):
     try:
         self._cookies.save(self._cookies_filename)
     except Exception as e:
         log.debug("Saving cookies error: %s" % repr(e))
    def __init__(self,
                 proxy_url=None,
                 request_charset='utf-8',
                 response_charset=None):
        self._counter = 0
        self._cookies_filename = ''
        self._cookies = LWPCookieJar()
        self.url = None
        self.user_agent = USER_AGENT
        self.clearance = None
        self.content = None
        self.status = None
        self.token = None
        self.passkey = None
        self.proxy_url = proxy_url
        self.request_charset = request_charset
        self.response_charset = response_charset

        self.use_antizapret = False
        self.needs_proxylock = False

        self.headers = dict()
        self.request_headers = None

        self.session = requests.session()
        self.session.verify = False

        # Enabling retrying on failed requests
        retries = Retry(total=2,
                        read=2,
                        connect=2,
                        redirect=3,
                        backoff_factor=0.1
                        # status_forcelist=[ 500, 502, 503, 504 ])
                        )

        self.session.mount('http://', HTTPAdapter(max_retries=retries))
        self.session.mount('https://', HTTPAdapter(max_retries=retries))
        # self.session = cfscrape.create_scraper()
        # self.scraper = cfscrape.create_scraper()
        # self.session = self.scraper.session()

        global dns_public_list
        global dns_opennic_list
        dns_public_list = get_setting("public_dns_list",
                                      unicode).replace(" ", "").split(",")
        dns_opennic_list = get_setting("opennic_dns_list",
                                       unicode).replace(" ", "").split(",")
        # socket.setdefaulttimeout(60)

        # Parsing proxy information
        proxy = {
            'enabled': get_setting("proxy_enabled", bool),
            'use_type': get_setting("proxy_use_type", int),
            'type': proxy_types[0],
            'host': get_setting("proxy_host", unicode),
            'port': get_setting("proxy_port", int),
            'login': get_setting("proxy_login", unicode),
            'password': get_setting("proxy_password", unicode),
        }

        try:
            proxy['type'] = proxy_types[get_setting("proxy_type", int)]
        except:
            pass

        if get_setting("use_public_dns", bool):
            connection.create_connection = patched_create_connection

        if proxy['enabled']:
            if proxy['use_type'] == 0 and proxy_url:
                log.debug("Setting proxy from Elementum: %s" % (proxy_url))
            elif proxy['use_type'] == 1:
                log.debug("Setting proxy with custom settings: %s" %
                          (repr(proxy)))

                if proxy['login'] or proxy['password']:
                    proxy_url = "{}://{}:{}@{}:{}".format(
                        proxy['type'], proxy['login'], proxy['password'],
                        proxy['host'], proxy['port'])
                else:
                    proxy_url = "{}://{}:{}".format(proxy['type'],
                                                    proxy['host'],
                                                    proxy['port'])
            elif proxy['use_type'] == 2:

                log.debug("Setting proxy to Antizapret resolver")
                self.use_antizapret = True
                proxy_url = None

            if proxy_url:
                self.session.proxies = {
                    'http': proxy_url,
                    'https': proxy_url,
                }
def process(provider,
            generator,
            filtering,
            has_special,
            verify_name=True,
            verify_size=True):
    """ Method for processing provider results using its generator and Filtering class instance

    Args:
        provider        (str): Provider ID
        generator  (function): Generator method, can be either ``extract_torrents`` or ``extract_from_api``
        filtering (Filtering): Filtering class instance
        has_special    (bool): Whether title contains special chars
        verify_name    (bool): Whether to double-check the results' names match the query or not
        verify_size    (bool): Whether to check the results' file sizes
    """
    log.debug("execute_process for %s with %s" % (provider, repr(generator)))
    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))

    client = Client(info=filtering.info)
    logged_in = False

    if get_setting('kodi_language', bool):
        kodi_language = xbmc.getLanguage(xbmc.ISO_639_1)
        if kodi_language:
            filtering.kodi_language = kodi_language
        language_exceptions = get_setting('language_exceptions')
        if language_exceptions.strip().lower():
            filtering.language_exceptions = re.split(r',\s?',
                                                     language_exceptions)

    log.debug("[%s] Queries: %s" % (provider, filtering.queries))
    log.debug("[%s] Extras:  %s" % (provider, filtering.extras))

    for query, extra in zip(filtering.queries, filtering.extras):
        log.debug("[%s] Before keywords - Query: %s - Extra: %s" %
                  (provider, repr(query), repr(extra)))
        if has_special:
            # Removing quotes, surrounding {title*} keywords, when title contains special chars
            query = re.sub("[\"']({title.*?})[\"']", '\\1', query)

        query = filtering.process_keywords(provider, query)
        extra = filtering.process_keywords(provider, extra)
        if 'charset' in definition and 'utf' not in definition[
                'charset'].lower():
            try:
                query = urllib.quote(query.encode(definition['charset']))
                extra = urllib.quote(extra.encode(definition['charset']))
            except:
                pass

        log.debug("[%s] After keywords  - Query: %s - Extra: %s" %
                  (provider, repr(query), repr(extra)))
        if not query:
            return filtering.results

        url_search = filtering.url.replace('QUERY', query)
        if extra:
            url_search = url_search.replace('EXTRA', extra)
        else:
            url_search = url_search.replace('EXTRA', '')
        url_search = url_search.replace(' ', definition['separator'])

        if 'post_data' in definition and not filtering.post_data:
            filtering.post_data = eval(definition['post_data'])

        # Creating the payload for POST method
        payload = dict()
        for key, value in filtering.post_data.iteritems():
            if 'QUERY' in value:
                payload[key] = filtering.post_data[key].replace('QUERY', query)
            else:
                payload[key] = filtering.post_data[key]

        # Creating the payload for GET method
        data = None
        if filtering.get_data:
            data = dict()
            for key, value in filtering.get_data.iteritems():
                if 'QUERY' in value:
                    data[key] = filtering.get_data[key].replace('QUERY', query)
                else:
                    data[key] = filtering.get_data[key]

        log.debug("-   %s query: %s" % (provider, repr(query)))
        log.debug("--  %s url_search before token: %s" %
                  (provider, repr(url_search)))
        log.debug("--- %s using POST payload: %s" % (provider, repr(payload)))
        log.debug("----%s filtering with post_data: %s" %
                  (provider, repr(filtering.post_data)))

        # Set search's "title" in filtering to double-check results' names
        if 'filter_title' in definition and definition['filter_title']:
            filtering.filter_title = True
            filtering.title = query

        if logged_in:
            log.info("[%s] Reusing previous login" % provider)
        elif 'private' in definition and definition['private']:
            username = get_setting('%s_username' % provider)
            password = get_setting('%s_password' % provider)

            if 'login_object' in definition and definition['login_object']:
                logged_in = False
                try:
                    login_object = definition['login_object'].replace(
                        'USERNAME',
                        '"%s"' % username).replace('PASSWORD',
                                                   '"%s"' % password)
                except Exception, e:
                    log.error("[{0}] Make login_object fail: {1}".format(
                        provider, e))
                    return filtering.results

                # TODO generic flags in definitions for those...
                if provider == 'lostfilm':
                    client.open(definition['root_url'] +
                                '/v_search.php?c=110&s=1&e=1')
                    if u'Вход. – LostFilm.TV.' in client.content:
                        pass
                    else:
                        log.info('[%s] Login successful' % provider)
                        logged_in = True

                if not logged_in and client.login(
                        definition['root_url'] + definition['login_path'],
                        eval(login_object), definition['login_failed']):
                    log.info('[%s] Login successful' % provider)
                    logged_in = True
                elif not logged_in:
                    log.error("[%s] Login failed: %s", provider, client.status)
                    log.debug("[%s] Failed login content: %s", provider,
                              repr(client.content))
                    notify(translation(32089).format(provider),
                           image=get_icon_path())
                    return filtering.results

                if logged_in:
                    if provider == 'lostfilm':
                        log.info('[%s] Search lostfilm serial ID...', provider)
                        url_search = fix_lf(url_search)
                        client.open(url_search.encode('utf-8'),
                                    post_data=payload,
                                    get_data=data)
                        series_details = re.search(
                            r'"mark-rate-pane" rel="(\d+),(\d+),(\d+)">',
                            client.content)
                        if series_details:
                            client.open(definition['root_url'] +
                                        '/v_search.php?a=%s%s%s' %
                                        (series_details.group(1),
                                         series_details.group(2).zfill(3),
                                         series_details.group(3).zfill(3)))
                            redirect_url = re.search(ur'url=(.*?)">',
                                                     client.content)
                            if redirect_url is not None:
                                url_search = redirect_url.group(1)
                        else:
                            log.info('[%s] Not found ID in %s' %
                                     (provider, url_search))
                            return filtering.results

        log.info(">  %s search URL: %s" %
                 (definition['name'].rjust(longest), url_search))

        client.open(url_search.encode('utf-8'),
                    post_data=payload,
                    get_data=data)
        filtering.results.extend(
            generate_payload(provider, generator(provider, client), filtering,
                             verify_name, verify_size))
Beispiel #13
0
    def open(self, url, language='en', post_data=None, get_data=None):
        """ Opens a connection to a webpage and saves its HTML content in ``self.content``

        Args:
            url        (str): The URL to open
            language   (str): The language code for the ``Content-Language`` header
            post_data (dict): POST data for the request
            get_data  (dict): GET data for the request
        """
        if not post_data:
            post_data = {}
        if get_data:
            url += '?' + urlencode(get_data)

        log.debug("Opening URL: %s" % repr(url))
        result = False

        data = urlencode(post_data) if len(post_data) > 0 else None
        req = urllib2.Request(url, data)

        self._read_cookies(url)
        log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies)))

        handlers = []

        if get_setting("use_elementum_proxy", bool) and self.proxy_url:
            if self.proxy_type:
                if self.proxy_type == 2:
                    proxyHandler = urllib2.ProxyHandler({
                        'http':
                        self.proxy_url,
                        'https':
                        self.proxy_url,
                    })
                    handlers.append(proxyHandler)
                elif self.proxy_type == 1:
                    import proxy.socks as socks
                    from proxy.sockshandler import SocksiPyHandler
                    prx_info = self.proxy_url.split(':')
                    handlers.append(
                        SocksiPyHandler(socks.PROXY_TYPE_SOCKS5,
                                        prx_info[1].replace("//", ''),
                                        int(prx_info[2])))
            else:
                proxyHandler = urllib2.ProxyHandler({
                    'http': self.proxy_url,
                    'https': self.proxy_url,
                })
                handlers.append(proxyHandler)

        cookieHandler = urllib2.HTTPCookieProcessor(self._cookies)
        handlers.append(cookieHandler)

        opener = urllib2.build_opener(*handlers)
        req.add_header('User-Agent', self.user_agent)
        req.add_header('Content-Language', language)
        req.add_header("Accept-Encoding", "gzip")
        req.add_header("Origin", url)
        req.add_header("Referer", url)

        try:
            self._good_spider()
            with closing(opener.open(req)) as response:
                self.headers = response.headers
                self._save_cookies()
                if response.headers.get("Content-Encoding", "") == "gzip":
                    import zlib
                    self.content = zlib.decompressobj(
                        16 + zlib.MAX_WBITS).decompress(response.read())
                else:
                    self.content = response.read()

                charset = response.headers.getparam('charset')

                if not charset:
                    match = re.search(
                        """<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"']*([^\s"'/>]*)""",
                        self.content)
                    if match:
                        charset = match.group(1)

                if charset and charset.lower() == 'utf-8':
                    charset = 'utf-8-sig'  # Changing to utf-8-sig to remove BOM if found on decode from utf-8

                if charset:
                    log.debug('Decoding charset from %s for %s' %
                              (charset, repr(url)))
                    self.content = self.content.decode(charset, 'replace')

                self.status = response.getcode()
            result = True

        except urllib2.HTTPError as e:
            self.status = e.code
            log.warning("Status for %s : %s" % (repr(url), str(self.status)))

        except urllib2.URLError as e:
            self.status = repr(e.reason)
            log.warning("Status for %s : %s" % (repr(url), self.status))

        except Exception as e:
            import traceback
            log.error("%s failed with %s:" % (repr(url), repr(e)))
            map(log.debug, traceback.format_exc().split("\n"))

        log.debug("Status for %s : %s" % (repr(url), str(self.status)))

        return result
Beispiel #14
0
def extract_from_api(provider, client):
    """ Main API parsing generator for API-based providers

    An almost clever API parser, mostly just for YTS, RARBG and T411

    Args:
        provider  (str): Provider ID
        client (Client): Client class instance

    Yields:
        tuple: A torrent result
    """
    try:
        data = json.loads(client.content)
    except:
        data = []
    log.debug("[%s] JSON response from API: %s" %
              (unquote(provider), repr(data)))

    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))
    api_format = definition['api_format']

    results = []
    # If 'results' is empty - then we can try to take all the data as an array of results.
    # Usable when api returns results without any other data.
    if not api_format['results']:
        results = data
    else:
        result_keys = api_format['results'].split('.')
        log.debug("[%s] result_keys: %s" % (provider, repr(result_keys)))
        for key in result_keys:
            if key in data:
                data = data[key]
            else:
                data = []
        results = data
    log.debug("[%s] results: %s" % (provider, repr(results)))

    if 'subresults' in api_format:
        from copy import deepcopy
        for result in results:  # A little too specific to YTS but who cares...
            result['name'] = result[api_format['name']]
        subresults = []
        subresults_keys = api_format['subresults'].split('.')
        for key in subresults_keys:
            for result in results:
                if key in result:
                    for subresult in result[key]:
                        sub = deepcopy(result)
                        sub.update(subresult)
                        subresults.append(sub)
        results = subresults
        log.debug("[%s] with subresults: %s" % (provider, repr(results)))

    for result in results:
        if not result or not isinstance(result, dict):
            continue
        name = ''
        info_hash = ''
        torrent = ''
        size = ''
        seeds = ''
        peers = ''
        if 'name' in api_format:
            name = result[api_format['name']]
        if 'description' in api_format:
            if name:
                name += ' '
            name += result[api_format['description']]
        if 'torrent' in api_format:
            torrent = result[api_format['torrent']]
            if 'download_path' in definition:
                torrent = definition['base_url'] + definition[
                    'download_path'] + torrent
            if client.token:
                user_agent = USER_AGENT
                headers = {
                    'Authorization': client.token,
                    'User-Agent': user_agent
                }
                log.debug("[%s] Appending headers: %s" %
                          (provider, repr(headers)))
                torrent = append_headers(torrent, headers)
                log.debug("[%s] Torrent with headers: %s" %
                          (provider, repr(torrent)))
        if 'info_hash' in api_format:
            info_hash = result[api_format['info_hash']]
        if 'quality' in api_format:  # Again quite specific to YTS...
            name = "%s - %s" % (name, result[api_format['quality']])
        if 'size' in api_format:
            size = result[api_format['size']]
            if isinstance(size, (long, int)):
                size = sizeof(size)
            elif isinstance(size, basestring) and size.isdigit():
                size = sizeof(int(size))
        if 'seeds' in api_format:
            seeds = result[api_format['seeds']]
            if isinstance(seeds, basestring) and seeds.isdigit():
                seeds = int(seeds)
        if 'peers' in api_format:
            peers = result[api_format['peers']]
            if isinstance(peers, basestring) and peers.isdigit():
                peers = int(peers)
        yield (name, info_hash, torrent, size, seeds, peers)
Beispiel #15
0
def extract_torrents(provider, client):
    """ Main torrent extraction generator for non-API based providers

    Args:
        provider  (str): Provider ID
        client (Client): Client class instance

    Yields:
        tuple: A torrent result
    """
    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))
    log.debug("Extracting torrents from %s using definitions: %s" %
              (provider, repr(definition)))

    if not client.content:
        raise StopIteration

    dom = Html().feed(client.content)

    row_search = "dom." + definition['parser']['row']
    name_search = definition['parser']['name']
    torrent_search = definition['parser']['torrent']
    info_hash_search = definition['parser']['infohash']
    size_search = definition['parser']['size']
    seeds_search = definition['parser']['seeds']
    peers_search = definition['parser']['peers']

    log.debug("[%s] Parser: %s" % (provider, repr(definition['parser'])))

    q = Queue()
    threads = []
    needs_subpage = 'subpage' in definition and definition['subpage']

    if needs_subpage:

        def extract_subpage(q, name, torrent, size, seeds, peers, info_hash):
            try:
                log.debug("[%s] Getting subpage at %s" %
                          (provider, repr(torrent)))
            except Exception as e:
                import traceback
                log.error("[%s] Subpage logging failed with: %s" %
                          (provider, repr(e)))
                map(log.debug, traceback.format_exc().split("\n"))

            # New client instance, otherwise it's race conditions all over the place
            subclient = Client()
            subclient.passkey = client.passkey

            if get_setting("use_cloudhole", bool):
                subclient.clearance = get_setting('clearance')
                subclient.user_agent = get_setting('user_agent')

            uri = torrent.split('|')  # Split cookies for private trackers
            subclient.open(uri[0].encode('utf-8'))

            if 'bittorrent' in subclient.headers.get('content-type', ''):
                log.debug('[%s] bittorrent content-type for %s' %
                          (provider, repr(torrent)))
                if len(uri) > 1:  # Stick back cookies if needed
                    torrent = '%s|%s' % (torrent, uri[1])
            else:
                try:
                    torrent = extract_from_page(provider, subclient.content)
                    if torrent and not torrent.startswith('magnet') and len(
                            uri) > 1:  # Stick back cookies if needed
                        torrent = '%s|%s' % (torrent, uri[1])
                except Exception as e:
                    import traceback
                    log.error(
                        "[%s] Subpage extraction for %s failed with: %s" %
                        (provider, repr(uri[0]), repr(e)))
                    map(log.debug, traceback.format_exc().split("\n"))

            ret = (name, info_hash, torrent, size, seeds, peers)
            q.put_nowait(ret)

    if not dom:
        raise StopIteration

    for item in eval(row_search):
        if not item:
            continue
        name = eval(name_search)
        torrent = eval(torrent_search) if torrent_search else ""
        size = eval(size_search) if size_search else ""
        seeds = eval(seeds_search) if seeds_search else ""
        peers = eval(peers_search) if peers_search else ""
        info_hash = eval(info_hash_search) if info_hash_search else ""

        # Pass client cookies with torrent if private
        if (definition['private'] or get_setting(
                "use_cloudhole", bool)) and not torrent.startswith('magnet'):
            user_agent = USER_AGENT
            if get_setting("use_cloudhole", bool):
                user_agent = get_setting("user_agent")

            if client.passkey:
                torrent = torrent.replace('PASSKEY', client.passkey)
            elif client.token:
                headers = {
                    'Authorization': client.token,
                    'User-Agent': user_agent
                }
                log.debug("[%s] Appending headers: %s" %
                          (provider, repr(headers)))
                torrent = append_headers(torrent, headers)
                log.debug("[%s] Torrent with headers: %s" %
                          (provider, repr(torrent)))
            else:
                log.debug("[%s] Cookies: %s" %
                          (provider, repr(client.cookies())))
                parsed_url = urlparse(definition['root_url'])
                cookie_domain = '{uri.netloc}'.format(uri=parsed_url).replace(
                    'www.', '')
                cookies = []
                log.debug("[%s] cookie_domain: %s" % (provider, cookie_domain))
                for cookie in client._cookies:
                    log.debug(
                        "[%s] cookie for domain: %s (%s=%s)" %
                        (provider, cookie.domain, cookie.name, cookie.value))
                    if cookie_domain in cookie.domain:
                        cookies.append(cookie)
                if cookies:
                    headers = {
                        'Cookie':
                        ";".join(
                            ["%s=%s" % (c.name, c.value) for c in cookies]),
                        'User-Agent':
                        user_agent
                    }
                    log.debug("[%s] Appending headers: %s" %
                              (provider, repr(headers)))
                    torrent = append_headers(torrent, headers)
                    log.debug("[%s] Torrent with headers: %s" %
                              (provider, repr(torrent)))

        if name and torrent and needs_subpage:
            if not torrent.startswith('http'):
                torrent = definition['root_url'] + torrent.encode('utf-8')
            t = Thread(target=extract_subpage,
                       args=(q, name, torrent, size, seeds, peers, info_hash))
            threads.append(t)
        else:
            yield (name, info_hash, torrent, size, seeds, peers)

    if needs_subpage:
        log.debug("[%s] Starting subpage threads..." % provider)
        for t in threads:
            t.start()
        for t in threads:
            t.join()

        log.debug("[%s] Threads returned: %s" % (provider, repr(threads)))

        for i in range(q.qsize()):
            ret = q.get_nowait()
            log.debug("[%s] Queue %d got: %s" % (provider, i, repr(ret)))
            yield ret
Beispiel #16
0
def extract_from_page(provider, content):
    """ Sub-page extraction method

    Args:
        provider (str): Provider ID
        content  (str): Page content from Client instance

    Returns:
        str: Torrent or magnet link extracted from sub-page
    """
    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))

    try:
        matches = re.findall(r'magnet:\?[^\'"\s<>\[\]]+', content)
        if matches:
            result = matches[0]
            log.debug('[%s] Matched magnet link: %s' %
                      (provider, repr(result)))
            return result

        matches = re.findall('http(.*?).torrent["\']', content)
        if matches:
            result = 'http' + matches[0] + '.torrent'
            result = result.replace('torcache.net', 'itorrents.org')
            log.debug('[%s] Matched torrent link: %s' %
                      (provider, repr(result)))
            return result

        matches = re.findall('/download\?token=[A-Za-z0-9%]+', content)
        if matches:
            result = definition['root_url'] + matches[0]
            log.debug('[%s] Matched download link with token: %s' %
                      (provider, repr(result)))
            return result

        matches = re.findall('"(/download/[A-Za-z0-9]+)"', content)
        if matches:
            result = definition['root_url'] + matches[0]
            log.debug('[%s] Matched download link: %s' %
                      (provider, repr(result)))
            return result

        matches = re.findall('/torrents/download/\?id=[a-z0-9-_.]+',
                             content)  # t411
        if matches:
            result = definition['root_url'] + matches[0]
            log.debug('[%s] Matched download link with an ID: %s' %
                      (provider, repr(result)))
            return result

        matches = re.findall('\: ([A-Fa-f0-9]{40})', content)
        if matches:
            result = "magnet:?xt=urn:btih:" + matches[0]
            log.debug('[%s] Matched magnet info_hash search: %s' %
                      (provider, repr(result)))
            return result

        matches = re.findall('/download.php\?id=([A-Za-z0-9]{40})\W', content)
        if matches:
            result = "magnet:?xt=urn:btih:" + matches[0]
            log.debug('[%s] Matched download link: %s' %
                      (provider, repr(result)))
            return result

        matches = re.findall('(/download.php\?id=[A-Za-z0-9]+[^\s\'"]*)',
                             content)
        if matches:
            result = definition['root_url'] + matches[0]
            log.debug('[%s] Matched download link: %s' %
                      (provider, repr(result)))
            return result
    except:
        pass

    return None
def generate_payload(provider,
                     generator,
                     filtering,
                     verify_name=True,
                     verify_size=True):
    """ Payload formatter to format results the way Elementum expects them

    Args:
        provider        (str): Provider ID
        generator  (function): Generator method, can be either ``extract_torrents`` or ``extract_from_api``
        filtering (Filtering): Filtering class instance
        verify_name    (bool): Whether to double-check the results' names match the query or not
        verify_size    (bool): Whether to check the results' file sizes

    Returns:
        list: Formatted results
    """
    filtering.information(provider)
    results = []

    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))

    for id, name, info_hash, uri, size, seeds, peers in generator:
        size = clean_size(size)
        # uri, info_hash = clean_magnet(uri, info_hash)
        v_name = name if verify_name else filtering.title
        v_size = size if verify_size else None
        if filtering.verify(provider, v_name, v_size):
            sort_seeds = get_int(seeds)
            sort_resolution = filtering.determine_resolution(v_name)[1] + 1
            sort_balance = (sort_seeds + 1) * 3 * sort_resolution

            results.append({
                "id":
                id,
                "name":
                name,
                "uri":
                uri,
                "info_hash":
                info_hash,
                "size":
                size,
                "seeds":
                sort_seeds,
                "peers":
                get_int(peers),
                "language":
                definition["language"] if 'language' in definition else 'en',
                "provider":
                '[COLOR %s]%s[/COLOR]' %
                (definition['color'], definition['name']),
                "icon":
                os.path.join(ADDON_PATH, 'burst', 'providers', 'icons',
                             '%s.png' % provider),
                "sort_resolution":
                sort_resolution,
                "sort_balance":
                sort_balance
            })
        else:
            log.debug(filtering.reason)

    log.debug('[%s] >>>>>> %s would send %d torrents to Elementum <<<<<<<' %
              (provider, provider, len(results)))
    results = cleanup_results(results)
    log.debug(
        '[%s] >>>>>> %s would send %d torrents to Elementum after cleanup <<<<<<<'
        % (provider, provider, len(results)))

    return results
Beispiel #18
0
    def process_keywords(self, provider, text, definition):
        """ Processes the query payload from a provider's keyword definitions

        Args:
            provider (str): Provider ID
            text     (str): Keyword placeholders from definitions, ie. {title}

        Returns:
            str: Processed query keywords
        """
        keywords = self.read_keywords(text)
        replacing = use_filter_quotes

        for keyword in keywords:
            keyword = keyword.lower()
            if 'title' in keyword:
                title = self.info["title"]
                language = definitions[provider]['language']
                use_language = None
                if ':' in keyword:
                    use_language = keyword.split(':')[1].lower()
                if provider not in self.language_exceptions and \
                   (use_language or self.kodi_language) and \
                   'titles' in self.info and self.info['titles']:
                    try:
                        if not use_language and self.kodi_language and self.kodi_language in self.info[
                                'titles']:
                            use_language = self.kodi_language
                        if not use_language and language and language in self.info[
                                'titles']:
                            use_language = language

                        if use_language in self.info['titles'] and self.info[
                                'titles'][use_language]:
                            title = self.info['titles'][use_language]
                            title = normalize_string(title)
                            # For all non-original titles, that are not base languages of a tracker OR english language, try to remove accents from the title.
                            if use_language != 'original' and (
                                    self.convert_language(use_language)
                                    not in self.provider_languages
                                    or self.convert_language(use_language)
                                    == 'en'):
                                title = remove_accents(title)
                            # Remove characters, filled in 'remove_special_characters' field definition.
                            if 'remove_special_characters' in definition and definition[
                                    'remove_special_characters']:
                                for char in definition[
                                        'remove_special_characters']:
                                    title = title.replace(char, "")
                                title = " ".join(title.split())

                            log.info("[%s] Using translated '%s' title %s" %
                                     (provider, use_language, repr(title)))
                        else:
                            log.debug(
                                "[%s] Skipping the query '%s' due to missing '%s' language title"
                                % (provider, text, use_language))
                            # If title for specific language cannot be read - cancel this query
                            return ""
                    except Exception as e:
                        import traceback
                        log.error("%s failed with: %s" % (provider, repr(e)))
                        map(log.debug, traceback.format_exc().split("\n"))
                text = text.replace('{%s}' % keyword, title)

            if 'year' in keyword:
                text = text.replace('{%s}' % keyword, str(self.info["year"]))

            if 'show_tmdb_id' in keyword:
                if 'show_tmdb_id' not in self.info:
                    self.info['show_tmdb_id'] = ''

                text = text.replace('{%s}' % keyword,
                                    str(self.info["show_tmdb_id"]))

            if 'tmdb_id' in keyword:
                if 'tmdb_id' not in self.info:
                    self.info['tmdb_id'] = ''

                text = text.replace('{%s}' % keyword,
                                    str(self.info["tmdb_id"]))

            if 'tvdb_id' in keyword:
                if 'tvdb_id' not in self.info:
                    self.info['tvdb_id'] = ''

                text = text.replace('{%s}' % keyword,
                                    str(self.info["tvdb_id"]))

            if 'imdb_id' in keyword:
                if 'imdb_id' not in self.info:
                    self.info['imdb_id'] = ''

                text = text.replace('{%s}' % keyword,
                                    str(self.info["imdb_id"]))

            if 'season' in keyword:
                if '+' in keyword:
                    keys = keyword.split('+')
                    season = str(self.info["season"] + get_int(keys[1]))
                elif ':' in keyword:
                    keys = keyword.split(':')
                    season = ('%%.%sd' % keys[1]) % self.info["season"]
                else:
                    season = '%s' % self.info["season"]
                text = text.replace('{%s}' % keyword, season)

            if 'episode' in keyword and 'absolute' not in keyword:
                if '+' in keyword:
                    keys = keyword.split('+')
                    episode = str(self.info["episode"] + get_int(keys[1]))
                elif ':' in keyword:
                    keys = keyword.split(':')
                    episode = ('%%.%sd' % keys[1]) % self.info["episode"]
                else:
                    episode = '%s' % self.info["episode"]
                text = text.replace('{%s}' % keyword, episode)

            if 'absolute_episode' in keyword:
                if 'absolute_number' not in self.info or not self.info[
                        'absolute_number']:
                    log.debug(
                        "Skipping query '%s' due to missing absolute_number" %
                        text)
                    return ""
                if '+' in keyword:
                    keys = keyword.split('+')
                    episode = str(self.info["absolute_number"] +
                                  get_int(keys[1]))
                elif ':' in keyword:
                    keys = keyword.split(':')
                    episode = ('%%.%sd' %
                               keys[1]) % self.info["absolute_number"]
                else:
                    episode = '%s' % self.info["absolute_number"]
                text = text.replace('{%s}' % keyword, episode)

        if replacing:
            text = text.replace(u"'", '')

        return text
def process(provider,
            generator,
            filtering,
            has_special,
            verify_name=True,
            verify_size=True,
            skip_auth=False,
            start_time=None,
            timeout=None):
    """ Method for processing provider results using its generator and Filtering class instance

    Args:
        provider        (str): Provider ID
        generator  (function): Generator method, can be either ``extract_torrents`` or ``extract_from_api``
        filtering (Filtering): Filtering class instance
        has_special    (bool): Whether title contains special chars
        verify_name    (bool): Whether to double-check the results' names match the query or not
        verify_size    (bool): Whether to check the results' file sizes
    """
    log.debug("[%s] execute_process for %s with %s" %
              (provider, provider, repr(generator)))
    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))

    client = Client(info=filtering.info,
                    request_charset=definition['charset'],
                    response_charset=definition['response_charset'],
                    is_api='is_api' in definition and definition['is_api'])
    token = None
    logged_in = False
    token_auth = False
    used_queries = set()

    if get_setting('kodi_language', bool):
        kodi_language = xbmc.getLanguage(xbmc.ISO_639_1)
        if kodi_language:
            filtering.kodi_language = kodi_language
        language_exceptions = get_setting('language_exceptions')
        if language_exceptions.strip().lower():
            filtering.language_exceptions = re.split(r',\s?',
                                                     language_exceptions)

    log.debug("[%s] Queries: %s" % (provider, filtering.queries))
    log.debug("[%s] Extras:  %s" % (provider, filtering.extras))

    last_priority = 1
    for query, extra, priority in zip(filtering.queries, filtering.extras,
                                      filtering.queries_priorities):
        log.debug("[%s] Before keywords - Query: %s - Extra: %s" %
                  (provider, repr(query), repr(extra)))
        if has_special:
            # Removing quotes, surrounding {title*} keywords, when title contains special chars
            query = re.sub("[\"']({title.*?})[\"']", '\\1', query)

        query = filtering.process_keywords(provider, query, definition)
        extra = filtering.process_keywords(provider, extra, definition)

        if not query:
            continue
        elif query + extra in used_queries:
            # Make sure we don't run same query for this provider
            continue
        elif priority > last_priority and filtering.results:
            # Skip fallbacks if there are results
            log.debug("[%s] Skip fallback as there are already results" %
                      provider)
            continue
        elif start_time and timeout and time.time() - start_time + 3 >= timeout:
            # Stop doing requests if there is 3 seconds left for the overall task
            continue

        used_queries.add(query + extra)
        last_priority = priority

        try:
            if 'charset' in definition and definition[
                    'charset'] and 'utf' not in definition['charset'].lower():
                query = quote(query.encode(definition['charset']))
                extra = quote(extra.encode(definition['charset']))
            else:
                query = quote(py2_encode(query))
                extra = quote(py2_encode(extra))
        except Exception as e:
            log.debug("[%s] Could not quote the query (%s): %s" %
                      (provider, query, e))
            pass

        log.debug("[%s] After keywords  - Query: %s - Extra: %s" %
                  (provider, repr(query), repr(extra)))
        if not query:
            return filtering.results

        url_search = filtering.url.replace('QUERY', query)
        if extra:
            url_search = url_search.replace('EXTRA', extra)
        else:
            url_search = url_search.replace('EXTRA', '')

        url_search = url_search.replace(' ', definition['separator'])
        if definition['separator'] != '%20':
            url_search = url_search.replace('%20', definition['separator'])

        # MagnetDL fix...
        url_search = url_search.replace('FIRSTLETTER', query[:1])

        # Creating the payload for POST method
        if 'post_data' in definition and not filtering.post_data:
            filtering.post_data = eval(definition['post_data'])

        payload = dict()
        for key, value in iteritems(filtering.post_data):
            if 'QUERY' in value:
                payload[key] = filtering.post_data[key].replace('QUERY', query)
            else:
                payload[key] = filtering.post_data[key]
            payload[key] = urllib.unquote(payload[key])

        # Creating the payload for GET method
        headers = None
        data = None
        if filtering.get_data:
            data = dict()
            for key, value in iteritems(filtering.get_data):
                if 'QUERY' in value:
                    data[key] = filtering.get_data[key].replace('QUERY', query)
                else:
                    data[key] = filtering.get_data[key]

        log.debug("-   %s query: %s" % (provider, repr(query)))
        log.debug("--  %s url_search before token: %s" %
                  (provider, repr(url_search)))
        log.debug("--- %s using POST payload: %s" % (provider, repr(payload)))
        log.debug("----%s filtering with post_data: %s" %
                  (provider, repr(filtering.post_data)))

        # Set search's "title" in filtering to double-check results' names
        if 'filter_title' in definition and definition['filter_title']:
            filtering.filter_title = True
            filtering.title = query

        if 'initial_url' in definition and definition['initial_url']:
            url = definition['initial_url']
            if not url.startswith('http'):
                url = definition['root_url'] + url
            client.open(url)

        if token:
            log.info('[%s] Reusing existing token' % provider)
            url_search = url_search.replace('TOKEN', token)
        elif 'token' in definition:
            token_url = definition['base_url'] + definition['token']
            log.debug("[%s] Getting token for %s at %s" %
                      (provider, provider, repr(token_url)))
            client.open(py2_encode(token_url))
            try:
                token_data = json.loads(client.content)
            except:
                log.error('%s: Failed to get token for %s' %
                          (provider, repr(url_search)))
                return filtering.results
            log.debug("[%s] Token response for %s: %s" %
                      (provider, provider, repr(token_data)))
            if 'token' in token_data:
                token = token_data['token']
                log.debug("[%s] Got token for %s: %s" %
                          (provider, provider, repr(token)))
                url_search = url_search.replace('TOKEN', token)
            else:
                log.warning('%s: Unable to get token for %s' %
                            (provider, repr(url_search)))

        if logged_in:
            log.info("[%s] Reusing previous login" % provider)
        elif token_auth:
            log.info("[%s] Reusing previous token authorization" % provider)
        elif 'private' in definition and definition['private']:
            username = get_setting('%s_username' % provider, unicode)
            password = get_setting('%s_password' % provider, unicode)
            passkey = get_setting('%s_passkey' % provider, unicode)
            if not username and not password and not passkey:
                for addon_name in ('script.magnetic.%s' % provider,
                                   'script.magnetic.%s-mc' % provider):
                    for setting in ('username', 'password'):
                        try:
                            value = xbmcaddon.Addon(addon_name).getSetting(
                                setting)
                            set_setting('%s_%s' % (provider, setting), value)
                            if setting == 'username':
                                username = value
                            if setting == 'password':
                                password = value
                        except:
                            pass

            if username:
                client.username = username
                url_search = url_search.replace('USERNAME', username)

            if passkey:
                logged_in = True
                client.passkey = passkey
                url_search = url_search.replace('PASSKEY', passkey)

            elif 'login_object' in definition and definition['login_object']:
                login_object = None
                login_headers = None
                logged_in = skip_auth

                try:
                    login_object = definition['login_object'].replace(
                        'USERNAME',
                        'u"%s"' % username).replace('PASSWORD',
                                                    'u"%s"' % password)
                except Exception as e:
                    log.error("Could not make login object for %s: %s" %
                              (provider, e))
                try:
                    if 'login_headers' in definition and definition[
                            'login_headers']:
                        login_headers = eval(definition['login_headers'])
                except Exception as e:
                    log.error("Could not make login headers for %s: %s" %
                              (provider, e))

                # TODO generic flags in definitions for those...
                if 'csrf_token' in definition and definition['csrf_token']:
                    client.open(definition['root_url'] +
                                definition['login_path'])
                    if client.content:
                        csrf_token = re.search(
                            r'name=\"_?csrf_token\" value=\"(.*?)\"',
                            client.content)
                        if csrf_token:
                            login_object = login_object.replace(
                                'CSRF_TOKEN', '"%s"' % csrf_token.group(1))
                        else:
                            logged_in = True

                if 'token_auth' in definition:
                    # log.debug("[%s] logging in with: %s" % (provider, login_object))
                    if client.open(definition['root_url'] +
                                   definition['token_auth'],
                                   post_data=eval(login_object)):
                        try:
                            token_data = json.loads(client.content)
                        except:
                            log.error('%s: Failed to get token from %s' %
                                      (provider, definition['token_auth']))
                            return filtering.results
                        log.debug("[%s] Token response for %s: %s" %
                                  (provider, provider, repr(token_data)))
                        if 'token' in token_data:
                            client.token = token_data['token']
                            log.debug("[%s] Auth token for %s: %s" %
                                      (provider, provider, repr(client.token)))
                        else:
                            log.error('[%s] Unable to get auth token for %s' %
                                      (provider, repr(url_search)))
                            return filtering.results
                        log.info('[%s] Token auth successful' % provider)
                        token_auth = True
                    else:
                        log.error("[%s] Token auth failed with response: %s" %
                                  (provider, repr(client.content)))
                        return filtering.results
                elif not logged_in and client.login(
                        definition['root_url'], definition['login_path'],
                        eval(login_object), login_headers,
                        definition['login_failed']):
                    log.info('[%s] Login successful' % provider)
                    logged_in = True
                elif not logged_in:
                    log.error("[%s] Login failed: %s", provider, client.status)
                    log.debug("[%s] Failed login content: %s", provider,
                              repr(client.content))
                    return filtering.results

                if logged_in:
                    if provider == 'hd-torrents':
                        client.open(definition['root_url'] + '/torrents.php')
                        csrf_token = re.search(
                            r'name="csrfToken" value="(.*?)"', client.content)
                        url_search = url_search.replace(
                            "CSRF_TOKEN", csrf_token.group(1))
                    client.save_cookies()

        log.info("[%s] >  %s search URL: %s" %
                 (provider, definition['name'].rjust(longest), url_search))

        if 'headers' in definition and definition['headers']:
            headers = eval(definition['headers'])
            log.info("[%s] >  %s headers: %s" %
                     (provider, definition['name'].rjust(longest), headers))

        client.open(py2_encode(url_search),
                    post_data=payload,
                    get_data=data,
                    headers=headers)
        filtering.results.extend(
            generate_payload(provider, generator(provider, client), filtering,
                             verify_name, verify_size))
    return filtering.results
available_providers = 0
request_time = time.time()
auto_timeout = get_setting("auto_timeout", bool)
timeout = get_setting("timeout", int)
special_chars = "()\"':.[]<>/\\?"

if auto_timeout:
    elementum_addon = xbmcaddon.Addon(id='plugin.video.elementum')
    if elementum_addon:
        if elementum_addon.getSetting(
                'custom_provider_timeout_enabled') == "true":
            timeout = int(
                elementum_addon.getSetting('custom_provider_timeout')) - 2
        else:
            timeout = 28
        log.debug("Using timeout from Elementum: %d seconds" % (timeout))


def search(payload, method="general"):
    """ Main search entrypoint

    Args:
        payload (dict): Search payload from Elementum.
        method   (str): Type of search, can be ``general``, ``movie``, ``show``, ``season`` or ``anime``

    Returns:
        list: All filtered results in the format Elementum expects
    """
    log.debug("Searching with payload (%s): %s" % (method, repr(payload)))

    if method == 'general':
Beispiel #21
0
    def open(self,
             url,
             language='en',
             post_data=None,
             get_data=None,
             headers=None):
        """ Opens a connection to a webpage and saves its HTML content in ``self.content``

        Args:
            url        (str): The URL to open
            language   (str): The language code for the ``Content-Language`` header
            post_data (dict): POST data for the request
            get_data  (dict): GET data for the request
        """

        if get_data:
            url += '?' + urlencode(get_data)

        log.debug("Opening URL: %s" % repr(url))
        if self.session.proxies:
            log.debug("Proxies: %s" % (repr(self.session.proxies)))

        self._read_cookies(url)
        self.session.cookies = self._cookies

        # log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies)))

        # Default headers for any request. Pretend like we are the usual browser.
        req_headers = {
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
            'Accept-Language':
            'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7,uk;q=0.6,pl;q=0.5',
            'Cache-Control': 'max-age=0',
            'Content-Language': language,
            'Origin': url,
            'Referer': url,
            'User-Agent': self.user_agent
        }

        # Remove referer for API providers
        if self.is_api:
            del req_headers['Referer']

        # If headers passed to open() call - we overwrite headers.
        if headers:
            for key, value in iteritems(headers):
                if key == ':path':
                    u = urlparse(url)
                    value = u.path
                if value:
                    req_headers[key] = value
                elif key.capitalize() in req_headers:
                    del req_headers[key.capitalize()]

        if self.token:
            req_headers["Authorization"] = self.token

        req = None
        if post_data:
            req = requests.Request('POST',
                                   url,
                                   data=post_data,
                                   headers=req_headers)
        else:
            req = requests.Request('GET', url, headers=req_headers)

        prepped = self.session.prepare_request(req)
        self.request_headers = prepped.headers

        try:
            self._good_spider()
            with self.session.send(prepped) as response:
                self.headers = response.headers
                self.status = response.status_code
                self.url = response.url

                if self.response_charset:
                    self.content = response.content.decode(
                        self.response_charset, 'ignore')
                else:
                    self.content = response.text

        except requests.exceptions.InvalidSchema as e:
            # If link points to a magnet: then it can be used as a content
            matches = re.findall(
                'No connection adapters were found for \'(.*?)\'', str(e))
            if matches:
                self.content = matches[0]
                return True

            import traceback
            log.error("%s failed with %s:" % (repr(url), repr(e)))
            map(log.debug, traceback.format_exc().split("\n"))
        except Exception as e:
            import traceback
            log.error("%s failed with %s:" % (repr(url), repr(e)))
            map(log.debug, traceback.format_exc().split("\n"))

        log.debug("Status for %s : %s" % (repr(url), str(self.status)))

        return self.status == 200
def search(payload, method="general"):
    """ Main search entrypoint

    Args:
        payload (dict): Search payload from Elementum.
        method   (str): Type of search, can be ``general``, ``movie``, ``show``, ``season`` or ``anime``

    Returns:
        list: All filtered results in the format Elementum expects
    """
    log.debug("Searching with payload (%s): %s" % (method, repr(payload)))

    if method == 'general':
        if 'query' in payload:
            payload['title'] = payload['query']
            payload['titles'] = {'source': payload['query']}
        else:
            payload = {
                'title': payload,
                'titles': {
                    'source': payload
                },
            }

    payload['titles'] = dict(
        (k.lower(), v) for k, v in payload['titles'].iteritems())

    # If titles[] exists in payload and there are special chars in titles[source]
    #   then we set a flag to possibly modify the search query
    payload['has_special'] = 'titles' in payload and \
                             bool(payload['titles']) and \
                             'source' in payload['titles'] and \
                             any(c in payload['titles']['source'] for c in special_chars)
    if payload['has_special']:
        log.debug(
            "Query title contains special chars, so removing any quotes in the search query"
        )

    if 'proxy_url' not in payload:
        payload['proxy_url'] = ''
    if 'internal_proxy_url' not in payload:
        payload['internal_proxy_url'] = ''
    if 'elementum_url' not in payload:
        payload['elementum_url'] = ''

    global request_time
    global provider_names
    global provider_results
    global available_providers

    provider_names = []
    provider_results = []
    available_providers = 0
    request_time = time.time()

    providers = get_enabled_providers(method)

    if len(providers) == 0:
        notify(translation(32060), image=get_icon_path())
        log.error("No providers enabled")
        return []

    log.info(
        "Burstin' with %s" %
        ", ".join([definitions[provider]['name'] for provider in providers]))

    if get_setting('kodi_language', bool):
        kodi_language = xbmc.getLanguage(xbmc.ISO_639_1)
        if not kodi_language:
            log.warning("Kodi returned empty language code...")
        elif 'titles' not in payload or not payload['titles']:
            log.info("No translations available...")
        elif payload['titles'] and kodi_language not in payload['titles']:
            log.info("No '%s' translation available..." % kodi_language)

    p_dialog = xbmcgui.DialogProgressBG()
    p_dialog.create('Elementum [COLOR FFFF6B00]Burst[/COLOR]',
                    translation(32061))
    for provider in providers:
        available_providers += 1
        provider_names.append(definitions[provider]['name'])
        task = Thread(target=run_provider, args=(provider, payload, method))
        task.start()

    providers_time = time.time()
    total = float(available_providers)

    # Exit if all providers have returned results or timeout reached, check every 100ms
    while time.time() - providers_time < timeout and available_providers > 0:
        timer = time.time() - providers_time
        log.debug("Timer: %ds / %ds" % (timer, timeout))
        if timer > timeout:
            break
        message = translation(
            32062
        ) % available_providers if available_providers > 1 else translation(
            32063)
        p_dialog.update(int((total - available_providers) / total * 100),
                        message=message)
        time.sleep(0.25)

    p_dialog.close()
    del p_dialog

    if available_providers > 0:
        message = u', '.join(provider_names)
        message = message + translation(32064)
        log.warning(message.encode('utf-8'))
        notify(message, ADDON_ICON)

    log.debug("all provider_results: %s" % repr(provider_results))

    filtered_results = apply_filters(provider_results)

    log.debug("all filtered_results: %s" % repr(filtered_results))

    log.info("Providers returned %d results in %s seconds" %
             (len(filtered_results), round(time.time() - request_time, 2)))

    return filtered_results
Beispiel #23
0
    def _parse_item(self, item):
        result = {
            "name": None,
            "provider": "Unknown",
            "size": "Unknown",
            "uri": None,
            "seeds": "0",
            "peers": "0",
            "info_hash": "",
            "language": None,

            # todo would be nice to assign correct icons but that can be very time consuming due to the number
            #  of indexers in Jackett
            "icon": get_icon_path(),
            "_size_bytes": -1
        }

        for ref in item:
            tag = ref.tag
            attrib = ref.attrib
            if tag == "{" + self._torznab_ns + "}attr":
                val = attrib["value"]
                if isinstance(val, str):
                    val = val.decode("utf-8")
                if "name" in attrib and "value" in attrib and attrib["name"] and val and \
                        attrib["name"] in self._torznab_elementum_mappings["torznab_attrs"]:
                    json = self._torznab_elementum_mappings["torznab_attrs"][
                        attrib["name"]]
                    result[json] = val
                continue

            if ref.tag in self._torznab_elementum_mappings[
                    "tags"] and ref.text is not None:
                json = self._torznab_elementum_mappings["tags"][ref.tag]
                val = ref.text.strip()

                if isinstance(val, str):
                    val = val.decode("utf-8")

                result[json] = val

        # if we didn't get a magnet uri, attempt to resolve the magnet uri.
        # todo for some reason Elementum cannot resolve the link that gets proxied through Jackett.
        #  So we will resolve it manually for Elementum for now.
        #  In actuality, this should be fixed within Elementum
        if result["uri"] is None:
            link = item.find('link')
            jackett_uri = ""
            if link is not None:
                jackett_uri = link.text
            else:
                enclosure = item.find('enclosure')
                if enclosure is not None:
                    jackett_uri = enclosure.attrib['url']

            if jackett_uri != "":
                result["uri"] = get_magnet_from_jackett(jackett_uri)

        if result["name"] is None or result["uri"] is None:
            log.warning("Could not parse item; name = %s; uri = %s",
                        result["name"], result["uri"])
            log.debug("Failed item is: %s",
                      ElementTree.tostring(item, encoding='utf8'))
            return None

        # result["name"] = result["name"].decode("utf-8") # might be needed for non-english items
        result["seeds"] = int(result["seeds"])
        result["peers"] = int(result["peers"])
        resolution = get_resolution(result["name"])
        result["resolution"] = utils.resolutions.keys()[::-1].index(resolution)
        result["_resolution"] = resolution
        result["release_type"] = get_release_type(result["name"])

        if result["size"] != "Unknown":
            result["_size_bytes"] = int(result["size"])
            result["size"] = human_size(result["_size_bytes"])

        return result