def url_get(url, params={}, headers={}, post=None):

    if params:
        import urllib
        url = "%s?%s" % (url, urllib.urlencode(params))

    if post:
        import urllib
        post = urllib.urlencode(post)

    req = urllib2.Request(url, post)
    req.add_header("User-Agent", USER_AGENT)

    for k, v in headers.items():
        req.add_header(k, v)

    try:
        with closing(urllib2.urlopen(req)) as response:
            data = response.read()
            if response.headers.get("Content-Encoding", "") == "gzip":
                import zlib
                return zlib.decompressobj(16 + zlib.MAX_WBITS).decompress(data)
            return data
    except urllib2.HTTPError as e:
        log.error("HTTP Error(%s): %s" % (e.errno, e.strerror))
        return None
Exemple #2
0
    def _do_search_request(self, request_params):
        censored_params = request_params.copy()
        censored_key = censored_params['apikey']
        censored_params['apikey'] = "{}{}{}".format(censored_key[0:2],
                                                    "*" * 26,
                                                    censored_key[-4:])
        log.debug('Making a request to Jackett using params %s',
                  repr(censored_params))

        search_resp = self._session.get("all/results/torznab",
                                        params=request_params)
        if search_resp.status_code != httplib.OK:
            notify(translation(32700).format(search_resp.reason),
                   image=get_icon_path())
            log.error("Jackett returned %s", search_resp.reason)
            return []

        err = self.get_error(search_resp.content)
        if err is not None:
            notify(translation(32700).format(err["description"]),
                   image=get_icon_path())
            log.error("got code %s: %s", err["code"], err["description"])
            return []

        log.debug("Jackett returned below response")
        log.debug("===============================")
        log.debug(search_resp.content)
        log.debug("===============================")

        return self._parse_items(search_resp.content)
Exemple #3
0
def load_overrides(path, custom=False):
    """ Overrides loader for Python files

    Note:
        Overrides must be in an ``overrides`` dictionary.

    Args:
        path    (str): Path to Python file to be loaded
        custom (bool): Boolean flag to specify if this is a custom overrides file
    """
    try:
        if custom:
            sys.path.append(path)
            from overrides import overrides
        else:
            from burst_overrides import overrides
        if custom:
            log.debug("Imported overrides: %s", repr(overrides))
        for provider in overrides:
            update_definitions(provider, overrides[provider])
        if custom:
            log.info("Successfully loaded overrides from %s",
                     os.path.join(path, "overrides.py"))
    except Exception as e:
        import traceback
        log.error("Failed importing %soverrides: %s",
                  "custom " if custom else "", repr(e))
        map(log.error, traceback.format_exc().split("\n"))
def get_cloudhole_clearance(cloudhole_key):
    """ CloudHole clearance fetcher

    Args:
        cloudhole_key (str): The CloudHole API key saved in settings or from ``get_cloudhole_key`` directly
    Returns:
        tuple: A CloudHole clearance cookie and user-agent string
    """
    user_agent = USER_AGENT
    clearance = None
    if cloudhole_key:
        try:
            r = urllib2.Request("https://cloudhole.herokuapp.com/clearances")
            r.add_header('Content-type', 'application/json')
            r.add_header('Authorization', cloudhole_key)
            with closing(urllib2.urlopen(r)) as response:
                content = response.read()
            log.debug("CloudHole returned: %s" % content)
            data = json.loads(content)
            user_agent = data[0]['userAgent']
            clearance = data[0]['cookies']
            log.info("New UA and clearance: %s / %s" % (user_agent, clearance))
        except Exception as e:
            log.error("CloudHole error: %s" % repr(e))
    return clearance, user_agent
 def __init__(self):
     try:
         self.config = config()
     except Exception as e:
         log.error("Got an exception from config composer: %s" % (repr(e)))
         self.config = None
         pass
Exemple #6
0
        def extract_subpage(q, name, torrent, size, seeds, peers, info_hash,
                            referer):
            try:
                log.debug("[%s] Getting subpage at %s" %
                          (provider, repr(torrent)))
            except Exception as e:
                import traceback
                log.error("[%s] Subpage logging failed with: %s" %
                          (provider, repr(e)))
                map(log.debug, traceback.format_exc().split("\n"))

            # New client instance, otherwise it's race conditions all over the place
            subclient = Client()
            subclient.passkey = client.passkey
            headers = {}

            if "subpage_mode" in definition:
                if definition["subpage_mode"] == "xhr":
                    headers['X-Requested-With'] = 'XMLHttpRequest'
                    headers['Content-Language'] = ''

            if referer:
                headers['Referer'] = referer

            uri = torrent.split('|')  # Split cookies for private trackers
            subclient.open(py2_encode(uri[0]), headers=headers)

            if 'bittorrent' in subclient.headers.get('content-type', ''):
                log.debug('[%s] bittorrent content-type for %s' %
                          (provider, repr(torrent)))
                if len(uri) > 1:  # Stick back cookies if needed
                    torrent = '%s|%s' % (torrent, uri[1])
            else:
                try:
                    torrent = extract_from_page(provider, subclient.content)
                    if torrent and not torrent.startswith('magnet') and len(
                            uri) > 1:  # Stick back cookies if needed
                        torrent = '%s|%s' % (torrent, uri[1])
                except Exception as e:
                    import traceback
                    log.error(
                        "[%s] Subpage extraction for %s failed with: %s" %
                        (provider, repr(uri[0]), repr(e)))
                    map(log.debug, traceback.format_exc().split("\n"))

            log.debug("[%s] Subpage torrent for %s: %s" %
                      (provider, repr(uri[0]), torrent))
            ret = (name, info_hash, torrent, size, seeds, peers)

            # Cache this subpage result if another query would need to request same url.
            provider_cache[uri[0]] = torrent
            q.put_nowait(ret)
def load_providers(path, custom=False, fix_seasons=False):
    """ Definitions loader for json files

    Args:
        path         (str): Path to json file to be loaded
        custom      (bool): Boolean flag to specify if this is a custom provider
        fix_seasons (bool): Boolean flag to apply default fix to seasons keywords
    """
    try:
        with open(path) as file:
            providers = json.load(file)
        for provider in providers:
            update_definitions(provider, providers[provider], custom,
                               fix_seasons)
    except Exception as e:
        import traceback
        log.error("Failed importing providers from %s: %s", path, repr(e))
        map(log.error, traceback.format_exc().split("\n"))
def get_cloudhole_key():
    """ CloudHole API key fetcher

    Returns:
        str: A CloudHole API key
    """
    cloudhole_key = None
    try:
        r = urllib2.Request("https://cloudhole.herokuapp.com/key")
        r.add_header('Content-type', 'application/json')
        with closing(urllib2.urlopen(r)) as response:
            content = response.read()
        log.info("CloudHole key: %s" % content)
        data = json.loads(content)
        cloudhole_key = data['key']
    except Exception as e:
        log.error("Getting CloudHole key error: %s" % repr(e))
    return cloudhole_key
Exemple #9
0
def load_providers(path, custom=False):
    """ Definitions loader for json files

    Args:
        path         (str): Path to json file to be loaded
        custom      (bool): Boolean flag to specify if this is a custom provider
    """
    if not os.path.exists(path):
        return

    try:
        with open(path, encoding="utf-8") as file:
            providers = json.load(file)
        for provider in providers:
            update_definitions(provider, providers[provider], custom)
    except Exception as e:
        import traceback
        log.error("Failed importing providers from %s: %s", path, repr(e))
        map(log.error, traceback.format_exc().split("\n"))
Exemple #10
0
    def get_caps(self):
        caps_resp = self._session.get("all/results/torznab",
                                      params={
                                          "t": "caps",
                                          "apikey": self._api_key
                                      })

        if caps_resp.status_code != httplib.OK:
            notify(translation(32700).format(caps_resp.reason),
                   image=get_icon_path())
            log.error("Jackett return %s", caps_resp.reason)
            set_setting('settings_validated', caps_resp.reason)
            return

        err = self.get_error(caps_resp.content)
        if err is not None:
            notify(translation(32700).format(err["description"]),
                   image=get_icon_path())
            log.error("got code %s: %s", err["code"], err["description"])
            set_setting('settings_validated', err["description"])
            return

        set_setting('settings_validated', 'Success')

        xml = ET.ElementTree(ET.fromstring(caps_resp.content)).getroot()

        # todo handle gracefully, doesn't exist for individual trackers
        # self._caps["limits"] = xml.find("limits").attrib

        self._caps["search_tags"] = {}
        for type_tag in xml.findall('searching/*'):
            self._caps["search_tags"][type_tag.tag] = {
                "enabled":
                type_tag.attrib["available"] == "yes",
                "params": [
                    p for p in type_tag.attrib['supportedParams'].split(",")
                    if p
                ],
            }

        log.info("Found capabilities: %s", repr(self._caps))
Exemple #11
0
        def extract_subpage(q, name, torrent, size, seeds, peers, info_hash):
            try:
                log.debug("[%s] Getting subpage at %s" %
                          (provider, repr(torrent)))
            except Exception as e:
                import traceback
                log.error("[%s] Subpage logging failed with: %s" %
                          (provider, repr(e)))
                map(log.debug, traceback.format_exc().split("\n"))

            # New client instance, otherwise it's race conditions all over the place
            subclient = Client()
            subclient.passkey = client.passkey

            if get_setting("use_cloudhole", bool):
                subclient.clearance = get_setting('clearance')
                subclient.user_agent = get_setting('user_agent')

            uri = torrent.split('|')  # Split cookies for private trackers
            subclient.open(uri[0].encode('utf-8'))

            if 'bittorrent' in subclient.headers.get('content-type', ''):
                log.debug('[%s] bittorrent content-type for %s' %
                          (provider, repr(torrent)))
                if len(uri) > 1:  # Stick back cookies if needed
                    torrent = '%s|%s' % (torrent, uri[1])
            else:
                try:
                    torrent = extract_from_page(provider, subclient.content)
                    if torrent and not torrent.startswith('magnet') and len(
                            uri) > 1:  # Stick back cookies if needed
                        torrent = '%s|%s' % (torrent, uri[1])
                except Exception as e:
                    import traceback
                    log.error(
                        "[%s] Subpage extraction for %s failed with: %s" %
                        (provider, repr(uri[0]), repr(e)))
                    map(log.debug, traceback.format_exc().split("\n"))

            ret = (name, info_hash, torrent, size, seeds, peers)
            q.put_nowait(ret)
Exemple #12
0
def load():
    from elementum.provider import log
    from utils import get_setting

    if get_setting("enable_debugger", bool):
        import pkgutil
        import re
        from os import path
        import sys

        additional_libraries = get_setting("debugger_additional_libraries")
        if additional_libraries != "":
            if not path.exists(additional_libraries):
                log.error(
                    "Debugger has been enabled but additional libraries directory, skipping loading of debugger"
                )
                return
            sys.path.append(additional_libraries)

        if pkgutil.find_loader("pydevd_pycharm") is None:
            log.error(
                "Debugger currently only supports IntelliJ IDEA and derivatives. If you need additional "
            )
            return

        host = get_setting("debugger_host")
        valid_host_regex = re.compile(
            r'''
                    ^
                      (?:
                        (?:(?:(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}(?:[0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))
                      |
                        (?:(?:(?:[a-zA-Z]|[a-zA-Z][a-zA-Z0-9\-]*[a-zA-Z0-9])\.)+(?:[A-Za-z|[A-Za-z][A-Za-z0-9\‌-]*[A-Za-z0-9]))
                      )
                    $
        ''', re.VERBOSE)
        if not valid_host_regex.match(host):
            log.error("debugger: invalid host detected.. Skipping")
            return False

        try:
            port = get_setting("debugger_port", int)
        except ValueError:
            log.exception("debugger: invalid port detected")
            return

        if not (0 < int(port) <= 65535):
            log.exception("debugger: port must be between 0 and 65535")
            return

        import pydevd_pycharm
        pydevd_pycharm.settrace(host,
                                port=port,
                                stdoutToServer=True,
                                stderrToServer=True)
        log.info("pycharm debugger successfully loaded")
Exemple #13
0
    def process_keywords(self, provider, text, definition):
        """ Processes the query payload from a provider's keyword definitions

        Args:
            provider (str): Provider ID
            text     (str): Keyword placeholders from definitions, ie. {title}

        Returns:
            str: Processed query keywords
        """
        keywords = self.read_keywords(text)
        replacing = use_filter_quotes

        for keyword in keywords:
            keyword = keyword.lower()
            if 'title' in keyword:
                title = self.info["title"]
                language = definitions[provider]['language']
                use_language = None
                if ':' in keyword:
                    use_language = keyword.split(':')[1].lower()
                if provider not in self.language_exceptions and \
                   (use_language or self.kodi_language) and \
                   'titles' in self.info and self.info['titles']:
                    try:
                        if not use_language and self.kodi_language and self.kodi_language in self.info[
                                'titles']:
                            use_language = self.kodi_language
                        if not use_language and language and language in self.info[
                                'titles']:
                            use_language = language

                        if use_language in self.info['titles'] and self.info[
                                'titles'][use_language]:
                            title = self.info['titles'][use_language]
                            title = normalize_string(title)
                            # For all non-original titles, try to remove accents from the title.
                            if use_language != 'original':
                                title = remove_accents(title)
                            # Remove characters, filled in 'remove_special_characters' field definition.
                            if 'remove_special_characters' in definition and definition[
                                    'remove_special_characters']:
                                for char in definition[
                                        'remove_special_characters']:
                                    title = title.replace(char, "")
                                title = " ".join(title.split())

                            log.info("[%s] Using translated '%s' title %s" %
                                     (provider, use_language, repr(title)))
                        else:
                            log.debug(
                                "[%s] Skipping the query '%s' due to missing '%s' language title"
                                % (provider, text, use_language))
                            # If title for specific language cannot be read - cancel this query
                            return ""
                    except Exception as e:
                        import traceback
                        log.error("%s failed with: %s" % (provider, repr(e)))
                        map(log.debug, traceback.format_exc().split("\n"))
                text = text.replace('{%s}' % keyword, title)

            if 'year' in keyword:
                text = text.replace('{%s}' % keyword, str(self.info["year"]))

            if 'show_tmdb_id' in keyword:
                if 'show_tmdb_id' not in self.info:
                    self.info['show_tmdb_id'] = ''

                text = text.replace('{%s}' % keyword,
                                    str(self.info["show_tmdb_id"]))

            if 'tmdb_id' in keyword:
                if 'tmdb_id' not in self.info:
                    self.info['tmdb_id'] = ''

                text = text.replace('{%s}' % keyword,
                                    str(self.info["tmdb_id"]))

            if 'tvdb_id' in keyword:
                if 'tvdb_id' not in self.info:
                    self.info['tvdb_id'] = ''

                text = text.replace('{%s}' % keyword,
                                    str(self.info["tvdb_id"]))

            if 'imdb_id' in keyword:
                if 'imdb_id' not in self.info:
                    self.info['imdb_id'] = ''

                text = text.replace('{%s}' % keyword,
                                    str(self.info["imdb_id"]))

            if 'season' in keyword:
                if '+' in keyword:
                    keys = keyword.split('+')
                    season = str(self.info["season"] + get_int(keys[1]))
                elif ':' in keyword:
                    keys = keyword.split(':')
                    season = ('%%.%sd' % keys[1]) % self.info["season"]
                else:
                    season = '%s' % self.info["season"]
                text = text.replace('{%s}' % keyword, season)

            if 'episode' in keyword:
                if '+' in keyword:
                    keys = keyword.split('+')
                    episode = str(self.info["episode"] + get_int(keys[1]))
                elif ':' in keyword:
                    keys = keyword.split(':')
                    episode = ('%%.%sd' % keys[1]) % self.info["episode"]
                else:
                    episode = '%s' % self.info["episode"]
                text = text.replace('{%s}' % keyword, episode)

        if replacing:
            text = text.replace(u"'", '')

        return text
Exemple #14
0
    def open(self, url, language='en', post_data=None, get_data=None):
        """ Opens a connection to a webpage and saves its HTML content in ``self.content``

        Args:
            url        (str): The URL to open
            language   (str): The language code for the ``Content-Language`` header
            post_data (dict): POST data for the request
            get_data  (dict): GET data for the request
        """
        if not post_data:
            post_data = {}
        if get_data:
            url += '?' + urlencode(get_data)

        log.debug("Opening URL: %s" % repr(url))
        result = False

        data = urlencode(post_data) if len(post_data) > 0 else None
        req = urllib2.Request(url, data)

        self._read_cookies(url)
        log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies)))

        handlers = []

        if get_setting("use_elementum_proxy", bool) and self.proxy_url:
            if self.proxy_type:
                if self.proxy_type == 2:
                    proxyHandler = urllib2.ProxyHandler({
                        'http': self.proxy_url,
                        'https': self.proxy_url,
                    })
                    handlers.append(proxyHandler)
                elif self.proxy_type == 1:
                    import proxy.socks as socks
                    from proxy.sockshandler import SocksiPyHandler
                    prx_info = self.proxy_url.split(':')
                    handlers.append(SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, prx_info[1].replace("//", ''), int(prx_info[2])))
            else:
                proxyHandler = urllib2.ProxyHandler({
                    'http': self.proxy_url,
                    'https': self.proxy_url,
                })
                handlers.append(proxyHandler)

        cookieHandler = urllib2.HTTPCookieProcessor(self._cookies)
        handlers.append(cookieHandler)

        opener = urllib2.build_opener(*handlers)
        req.add_header('User-Agent', self.user_agent)
        req.add_header('Content-Language', language)
        req.add_header("Accept-Encoding", "gzip")
        req.add_header("Origin", url)
        req.add_header("Referer", url)

        try:
            self._good_spider()
            with closing(opener.open(req)) as response:
                self.headers = response.headers
                self._save_cookies()
                if response.headers.get("Content-Encoding", "") == "gzip":
                    import zlib
                    self.content = zlib.decompressobj(16 + zlib.MAX_WBITS).decompress(response.read())
                else:
                    self.content = response.read()

                charset = response.headers.getparam('charset')

                if not charset:
                    match = re.search("""<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"']*([^\s"'/>]*)""", self.content)
                    if match:
                        charset = match.group(1)

                if charset and charset.lower() == 'utf-8':
                    charset = 'utf-8-sig'  # Changing to utf-8-sig to remove BOM if found on decode from utf-8

                if charset:
                    log.debug('Decoding charset from %s for %s' % (charset, repr(url)))
                    self.content = self.content.decode(charset, 'replace')

                self.status = response.getcode()
            result = True

        except urllib2.HTTPError as e:
            self.status = e.code
            log.warning("Status for %s : %s" % (repr(url), str(self.status)))

        except urllib2.URLError as e:
            self.status = repr(e.reason)
            log.warning("Status for %s : %s" % (repr(url), self.status))

        except Exception as e:
            import traceback
            log.error("%s failed with %s:" % (repr(url), repr(e)))
            map(log.debug, traceback.format_exc().split("\n"))

        log.debug("Status for %s : %s" % (repr(url), str(self.status)))

        return result
    def open(self, url, language='en', post_data=None, get_data=None):
        """ Opens a connection to a webpage and saves its HTML content in ``self.content``

        Args:
            url        (str): The URL to open
            language   (str): The language code for the ``Content-Language`` header
            post_data (dict): POST data for the request
            get_data  (dict): GET data for the request
        """
        if not post_data:
            post_data = {}
        if get_data:
            url += '?' + urlencode(get_data)

        log.debug("Opening URL: %s" % repr(url))
        result = False

        data = urlencode(post_data) if len(post_data) > 0 else None
        req = urllib2.Request(url, data)

        self._read_cookies(url)
        log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies)))

        opener = urllib2.build_opener(
            urllib2.HTTPCookieProcessor(self._cookies))
        if get_setting("use_public_dns", bool):
            opener = urllib2.build_opener(
                urllib2.HTTPCookieProcessor(self._cookies), MyHTTPHandler)
        urllib2.install_opener(opener)

        req.add_header('User-Agent', self.user_agent)
        req.add_header('Content-Language', language)
        req.add_header("Accept-Encoding", "gzip")
        req.add_header("Origin", url)
        req.add_header("Referer", url)

        if self.token:
            req.add_header("Authorization", self.token)

        try:
            self._good_spider()
            with closing(opener.open(req)) as response:
                self.headers = response.headers
                self._save_cookies()
                if response.headers.get("Content-Encoding", "") == "gzip":
                    import zlib
                    self.content = zlib.decompressobj(
                        16 + zlib.MAX_WBITS).decompress(response.read())
                else:
                    self.content = response.read()

                charset = response.headers.getparam('charset')

                if not charset:
                    match = re.search(
                        """<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"']*([^\s"'/>]*)""",
                        self.content)
                    if match:
                        charset = match.group(1)

                if charset and charset.lower() == 'utf-8':
                    charset = 'utf-8-sig'  # Changing to utf-8-sig to remove BOM if found on decode from utf-8

                if charset:
                    log.debug('Decoding charset from %s for %s' %
                              (charset, repr(url)))
                    self.content = self.content.decode(charset, 'replace')

                self.status = response.getcode()
            result = True

        except urllib2.HTTPError as e:
            self.status = e.code
            log.warning("Status for %s : %s" % (repr(url), str(self.status)))
            if e.code == 403 or e.code == 503:
                log.warning("CloudFlared at %s, try enabling CloudHole" % url)

        except urllib2.URLError as e:
            self.status = repr(e.reason)
            log.warning("Status for %s : %s" % (repr(url), self.status))

        except Exception as e:
            import traceback
            log.error("%s failed with %s:" % (repr(url), repr(e)))
            map(log.debug, traceback.format_exc().split("\n"))

        log.debug("Status for %s : %s" % (repr(url), str(self.status)))

        return result
    def open(self,
             url,
             language='en',
             post_data=None,
             get_data=None,
             headers=None):
        """ Opens a connection to a webpage and saves its HTML content in ``self.content``

        Args:
            url        (str): The URL to open
            language   (str): The language code for the ``Content-Language`` header
            post_data (dict): POST data for the request
            get_data  (dict): GET data for the request
        """

        if get_data:
            url += '?' + urlencode(get_data)

        log.debug("Opening URL: %s" % repr(url))
        if self.session.proxies:
            log.debug("Proxies: %s" % (repr(self.session.proxies)))

        self._read_cookies(url)
        self.session.cookies = self._cookies

        # log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies)))

        # Default headers for any request. Pretend like we are the usual browser.
        req_headers = {
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
            'Accept-Language':
            'en-EN,en;q=0.9,en-US;q=0.8,en;q=0.7,uk;q=0.6,pl;q=0.5',
            'Cache-Control': 'no-cache',
            'Content-Language': language,
            'Origin': url,
            'Referer': url,
            'User-Agent': self.user_agent
        }

        # If headers passed to open() call - we overwrite headers.
        if headers:
            for key, value in headers.iteritems():
                if key == ':path':
                    u = urlparse(url)
                    value = u.path
                if value:
                    req_headers[key] = value
                elif key.capitalize() in req_headers:
                    del req_headers[key.capitalize()]

        if self.token:
            req_headers["Authorization"] = self.token

        req = None
        if post_data:
            req = requests.Request('POST',
                                   url,
                                   data=post_data,
                                   headers=req_headers)
        else:
            req = requests.Request('GET', url, headers=req_headers)

        prepped = self.session.prepare_request(req)
        self.request_headers = prepped.headers

        try:
            self._good_spider()
            with self.session.send(prepped) as response:
                self.headers = response.headers
                self.status = response.status_code
                self.url = response.url

                self._save_cookies()

                if self.response_charset:
                    self.content = response.content.decode(
                        self.response_charset, 'ignore')
                else:
                    self.content = response.text

        except requests.exceptions.InvalidSchema as e:
            # If link points to a magnet: then it can be used as a content
            matches = re.findall(
                'No connection adapters were found for \'(.*?)\'', str(e))
            if matches:
                self.content = matches[0]
                return True

            import traceback
            log.error("%s failed with %s:" % (repr(url), repr(e)))
            map(log.debug, traceback.format_exc().split("\n"))
        except Exception as e:
            import traceback
            log.error("%s failed with %s:" % (repr(url), repr(e)))
            map(log.debug, traceback.format_exc().split("\n"))

        log.debug("Status for %s : %s" % (repr(url), str(self.status)))

        return self.status == 200
Exemple #17
0
def process(provider,
            generator,
            filtering,
            has_special,
            verify_name=True,
            verify_size=True):
    """ Method for processing provider results using its generator and Filtering class instance

    Args:
        provider        (str): Provider ID
        generator  (function): Generator method, can be either ``extract_torrents`` or ``extract_from_api``
        filtering (Filtering): Filtering class instance
        has_special    (bool): Whether title contains special chars
        verify_name    (bool): Whether to double-check the results' names match the query or not
        verify_size    (bool): Whether to check the results' file sizes
    """
    log.debug("execute_process for %s with %s" % (provider, repr(generator)))
    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))

    client = Client(info=filtering.info)
    logged_in = False

    if get_setting('kodi_language', bool):
        kodi_language = xbmc.getLanguage(xbmc.ISO_639_1)
        if kodi_language:
            filtering.kodi_language = kodi_language
        language_exceptions = get_setting('language_exceptions')
        if language_exceptions.strip().lower():
            filtering.language_exceptions = re.split(r',\s?',
                                                     language_exceptions)

    log.debug("[%s] Queries: %s" % (provider, filtering.queries))
    log.debug("[%s] Extras:  %s" % (provider, filtering.extras))

    for query, extra in zip(filtering.queries, filtering.extras):
        log.debug("[%s] Before keywords - Query: %s - Extra: %s" %
                  (provider, repr(query), repr(extra)))
        if has_special:
            # Removing quotes, surrounding {title*} keywords, when title contains special chars
            query = re.sub("[\"']({title.*?})[\"']", '\\1', query)

        query = filtering.process_keywords(provider, query)
        extra = filtering.process_keywords(provider, extra)
        if 'charset' in definition and 'utf' not in definition[
                'charset'].lower():
            try:
                query = urllib.quote(query.encode(definition['charset']))
                extra = urllib.quote(extra.encode(definition['charset']))
            except:
                pass

        log.debug("[%s] After keywords  - Query: %s - Extra: %s" %
                  (provider, repr(query), repr(extra)))
        if not query:
            return filtering.results

        query = urllib.quote(query.encode('utf-8'))
        url_search = filtering.url.replace('QUERY', query)
        if extra:
            url_search = url_search.replace('EXTRA', extra)
        else:
            url_search = url_search.replace('EXTRA', '')
        url_search = url_search.replace(' ', definition['separator'])

        if 'post_data' in definition and not filtering.post_data:
            filtering.post_data = eval(definition['post_data'])

        # Creating the payload for POST method
        payload = dict()
        for key, value in filtering.post_data.iteritems():
            if 'QUERY' in value:
                payload[key] = filtering.post_data[key].replace('QUERY', query)
            else:
                payload[key] = filtering.post_data[key]

        # Creating the payload for GET method
        data = None
        if filtering.get_data:
            data = dict()
            for key, value in filtering.get_data.iteritems():
                if 'QUERY' in value:
                    data[key] = filtering.get_data[key].replace('QUERY', query)
                else:
                    data[key] = filtering.get_data[key]

        log.debug("-   %s query: %s" % (provider, repr(query)))
        log.debug("--  %s url_search before token: %s" %
                  (provider, repr(url_search)))
        log.debug("--- %s using POST payload: %s" % (provider, repr(payload)))
        log.debug("----%s filtering with post_data: %s" %
                  (provider, repr(filtering.post_data)))

        # Set search's "title" in filtering to double-check results' names
        if 'filter_title' in definition and definition['filter_title']:
            filtering.filter_title = True
            filtering.title = query

        if logged_in:
            log.info("[%s] Reusing previous login" % provider)
        elif 'private' in definition and definition['private']:
            username = get_setting('%s_username' % provider)
            password = get_setting('%s_password' % provider)

            if 'login_object' in definition and definition['login_object']:
                logged_in = False
                try:
                    login_object = definition['login_object'].replace(
                        'USERNAME',
                        '"%s"' % username).replace('PASSWORD',
                                                   '"%s"' % password)
                except Exception, e:
                    log.error("[{0}] Make login_object fail: {1}".format(
                        provider, e))
                    return filtering.results

                # TODO generic flags in definitions for those...
                if provider == 'lostfilm':
                    client.open(definition['root_url'] +
                                '/v_search.php?c=110&s=1&e=1')
                    if u'Вход. – LostFilm.TV.' in client.content:
                        pass
                    else:
                        log.info('[%s] Login successful' % provider)
                        logged_in = True

                if not logged_in and client.login(
                        definition['root_url'] + definition['login_path'],
                        eval(login_object), definition['login_failed']):
                    log.info('[%s] Login successful' % provider)
                    logged_in = True
                elif not logged_in:
                    log.error("[%s] Login failed: %s", provider, client.status)
                    log.debug("[%s] Failed login content: %s", provider,
                              repr(client.content))
                    notify(translation(32089).format(provider),
                           image=get_icon_path())
                    return filtering.results

                if logged_in:
                    if provider == 'lostfilm':
                        log.info('[%s] Search lostfilm serial ID...', provider)
                        url_search = fix_lf(url_search)
                        client.open(url_search.encode('utf-8'),
                                    post_data=payload,
                                    get_data=data)
                        series_details = re.search(
                            r'"mark-rate-pane" rel="(\d+),(\d+),(\d+)">',
                            client.content)
                        if series_details:
                            client.open(definition['root_url'] +
                                        '/v_search.php?a=%s%s%s' %
                                        (series_details.group(1),
                                         series_details.group(2).zfill(3),
                                         series_details.group(3).zfill(3)))
                            redirect_url = re.search(ur'url=(.*?)">',
                                                     client.content)
                            if redirect_url is not None:
                                url_search = redirect_url.group(1)
                        else:
                            log.info('[%s] Not found ID in %s' %
                                     (provider, url_search))
                            return filtering.results

        log.info(">  %s search URL: %s" %
                 (definition['name'].rjust(longest), url_search))

        client.open(url_search, post_data=payload, get_data=data)
        filtering.results.extend(
            generate_payload(provider, generator(provider, client), filtering,
                             verify_name, verify_size))
Exemple #18
0
def process(provider,
            generator,
            filtering,
            has_special,
            verify_name=True,
            verify_size=True):
    """ Method for processing provider results using its generator and Filtering class instance

    Args:
        provider        (str): Provider ID
        generator  (function): Generator method, can be either ``extract_torrents`` or ``extract_from_api``
        filtering (Filtering): Filtering class instance
        has_special    (bool): Whether title contains special chars
        verify_name    (bool): Whether to double-check the results' names match the query or not
        verify_size    (bool): Whether to check the results' file sizes
    """
    log.debug("execute_process for %s with %s" % (provider, repr(generator)))
    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))

    client = Client()
    token = None
    logged_in = False
    token_auth = False

    if get_setting("use_cloudhole", bool):
        client.clearance = get_setting('clearance')
        client.user_agent = get_setting('user_agent')

    if get_setting('kodi_language', bool):
        kodi_language = xbmc.getLanguage(xbmc.ISO_639_1)
        if kodi_language:
            filtering.kodi_language = kodi_language
        language_exceptions = get_setting('language_exceptions')
        if language_exceptions.strip().lower():
            filtering.language_exceptions = re.split(r',\s?',
                                                     language_exceptions)

    log.debug("[%s] Queries: %s" % (provider, filtering.queries))
    log.debug("[%s] Extras:  %s" % (provider, filtering.extras))

    for query, extra in zip(filtering.queries, filtering.extras):
        log.debug("[%s] Before keywords - Query: %s - Extra: %s" %
                  (provider, repr(query), repr(extra)))
        if has_special:
            # Removing quotes, surrounding {title*} keywords, when title contains special chars
            query = re.sub("[\"']({title.*?})[\"']", '\\1', query)

        query = filtering.process_keywords(provider, query)
        extra = filtering.process_keywords(provider, extra)
        if 'charset' in definition and 'utf' not in definition[
                'charset'].lower():
            try:
                query = urllib.quote(query.encode(definition['charset']))
                extra = urllib.quote(extra.encode(definition['charset']))
            except:
                pass

        log.debug("[%s] After keywords  - Query: %s - Extra: %s" %
                  (provider, repr(query), repr(extra)))
        if not query:
            return filtering.results

        url_search = filtering.url.replace('QUERY', query)
        if extra:
            url_search = url_search.replace('EXTRA', extra)
        else:
            url_search = url_search.replace('EXTRA', '')
        url_search = url_search.replace(' ', definition['separator'])

        # MagnetDL fix...
        url_search = url_search.replace('FIRSTLETTER', query[:1])

        # Creating the payload for POST method
        if 'post_data' in definition and not filtering.post_data:
            filtering.post_data = eval(definition['post_data'])

        payload = dict()
        for key, value in filtering.post_data.iteritems():
            if 'QUERY' in value:
                payload[key] = filtering.post_data[key].replace('QUERY', query)
            else:
                payload[key] = filtering.post_data[key]

        # Creating the payload for GET method
        data = None
        if filtering.get_data:
            data = dict()
            for key, value in filtering.get_data.iteritems():
                if 'QUERY' in value:
                    data[key] = filtering.get_data[key].replace('QUERY', query)
                else:
                    data[key] = filtering.get_data[key]

        log.debug("-   %s query: %s" % (provider, repr(query)))
        log.debug("--  %s url_search before token: %s" %
                  (provider, repr(url_search)))
        log.debug("--- %s using POST payload: %s" % (provider, repr(payload)))
        log.debug("----%s filtering with post_data: %s" %
                  (provider, repr(filtering.post_data)))

        # Set search's "title" in filtering to double-check results' names
        if 'filter_title' in definition and definition['filter_title']:
            filtering.filter_title = True
            filtering.title = query

        if token:
            log.info('[%s] Reusing existing token' % provider)
            url_search = url_search.replace('TOKEN', token)
        elif 'token' in definition:
            token_url = definition['base_url'] + definition['token']
            log.debug("Getting token for %s at %s" %
                      (provider, repr(token_url)))
            client.open(token_url.encode('utf-8'))
            try:
                token_data = json.loads(client.content)
            except:
                log.error('%s: Failed to get token for %s' %
                          (provider, repr(url_search)))
                return filtering.results
            log.debug("Token response for %s: %s" %
                      (provider, repr(token_data)))
            if 'token' in token_data:
                token = token_data['token']
                log.debug("Got token for %s: %s" % (provider, repr(token)))
                url_search = url_search.replace('TOKEN', token)
            else:
                log.warning('%s: Unable to get token for %s' %
                            (provider, repr(url_search)))

        if logged_in:
            log.info("[%s] Reusing previous login" % provider)
        elif token_auth:
            log.info("[%s] Reusing previous token authorization" % provider)
        elif 'private' in definition and definition['private']:
            username = get_setting('%s_username' % provider)
            password = get_setting('%s_password' % provider)
            passkey = get_setting('%s_passkey' % provider)
            if not username and not password and not passkey:
                for addon_name in ('script.magnetic.%s' % provider,
                                   'script.magnetic.%s-mc' % provider):
                    for setting in ('username', 'password'):
                        try:
                            value = xbmcaddon.Addon(addon_name).getSetting(
                                setting)
                            set_setting('%s_%s' % (provider, setting), value)
                            if setting == 'username':
                                username = value
                            if setting == 'password':
                                password = value
                        except:
                            pass

            if passkey:
                logged_in = True
                client.passkey = passkey
                url_search = url_search.replace('PASSKEY', passkey)

            elif 'login_object' in definition and definition['login_object']:
                logged_in = False
                login_object = definition['login_object'].replace(
                    'USERNAME',
                    '"%s"' % username).replace('PASSWORD', '"%s"' % password)

                # TODO generic flags in definitions for those...
                if provider == 'hd-torrents':
                    client.open(definition['root_url'] +
                                definition['login_path'])
                    if client.content:
                        csrf_token = re.search(
                            r'name="csrfToken" value="(.*?)"', client.content)
                        if csrf_token:
                            login_object = login_object.replace(
                                'CSRF_TOKEN', '"%s"' % csrf_token.group(1))
                        else:
                            logged_in = True
                if provider == 'lostfilm':
                    client.open(definition['root_url'] +
                                '/v_search.php?c=111&s=1&e=1')
                    if client.content is not 'log in first':
                        logged_in = True

                if 'token_auth' in definition:
                    # log.debug("[%s] logging in with: %s" % (provider, login_object))
                    if client.open(definition['root_url'] +
                                   definition['token_auth'],
                                   post_data=eval(login_object)):
                        try:
                            token_data = json.loads(client.content)
                        except:
                            log.error('%s: Failed to get token from %s' %
                                      (provider, definition['token_auth']))
                            return filtering.results
                        log.debug("Token response for %s: %s" %
                                  (provider, repr(token_data)))
                        if 'token' in token_data:
                            client.token = token_data['token']
                            log.debug("Auth token for %s: %s" %
                                      (provider, repr(client.token)))
                        else:
                            log.error('%s: Unable to get auth token for %s' %
                                      (provider, repr(url_search)))
                            return filtering.results
                        log.info('[%s] Token auth successful' % provider)
                        token_auth = True
                    else:
                        log.error("[%s] Token auth failed with response: %s" %
                                  (provider, repr(client.content)))
                        return filtering.results
                elif not logged_in and client.login(
                        definition['root_url'] + definition['login_path'],
                        eval(login_object), definition['login_failed']):
                    log.info('[%s] Login successful' % provider)
                    logged_in = True
                elif not logged_in:
                    log.error("[%s] Login failed: %s", provider, client.status)
                    log.debug("[%s] Failed login content: %s", provider,
                              repr(client.content))
                    return filtering.results

                if logged_in:
                    if provider == 'hd-torrents':
                        client.open(definition['root_url'] + '/torrents.php')
                        csrf_token = re.search(
                            r'name="csrfToken" value="(.*?)"', client.content)
                        url_search = url_search.replace(
                            "CSRF_TOKEN", csrf_token.group(1))

                    if provider == 'lostfilm':
                        log.info('[%s] Need open page before search', provider)
                        client.open(url_search.encode('utf-8'),
                                    post_data=payload,
                                    get_data=data)
                        search_info = re.search(r'PlayEpisode\((.*?)\)">',
                                                client.content)
                        if search_info:
                            series_details = re.search(
                                '\'(\d+)\',\'(\d+)\',\'(\d+)\'',
                                search_info.group(1))
                            client.open(definition['root_url'] +
                                        '/v_search.php?c=%s&s=%s&e=%s' %
                                        (series_details.group(1),
                                         series_details.group(2),
                                         series_details.group(3)))
                            redirect_url = re.search(ur'url=(.*?)">',
                                                     client.content)
                            if redirect_url is not None:
                                url_search = redirect_url.group(1)
                        else:
                            return filtering.results

        log.info(">  %s search URL: %s" %
                 (definition['name'].rjust(longest), url_search))

        client.open(url_search.encode('utf-8'),
                    post_data=payload,
                    get_data=data)
        filtering.results.extend(
            generate_payload(provider, generator(provider, client), filtering,
                             verify_name, verify_size))
    return filtering.results
Exemple #19
0
def search(payload, method="general"):
    """ Main search entrypoint

    Args:
        payload (dict): Search payload from Elementum.
        method   (str): Type of search, can be ``general``, ``movie``, ``show``, ``season`` or ``anime``

    Returns:
        list: All filtered results in the format Elementum expects
    """
    log.debug("Searching with payload (%s): %s" % (method, repr(payload)))

    if method == 'general':
        if 'query' in payload:
            payload['title'] = payload['query']
            payload['titles'] = {'source': payload['query']}
        else:
            payload = {
                'title': payload,
                'titles': {
                    'source': payload
                },
            }

    payload['titles'] = dict(
        (k.lower(), v) for k, v in payload['titles'].iteritems())

    # If titles[] exists in payload and there are special chars in titles[source]
    #   then we set a flag to possibly modify the search query
    payload['has_special'] = 'titles' in payload and \
                             bool(payload['titles']) and \
                             'source' in payload['titles'] and \
                             any(c in payload['titles']['source'] for c in special_chars)
    if payload['has_special']:
        log.debug(
            "Query title contains special chars, so removing any quotes in the search query"
        )

    if 'proxy_url' not in payload:
        payload['proxy_url'] = ''
    if 'internal_proxy_url' not in payload:
        payload['internal_proxy_url'] = ''
    if 'elementum_url' not in payload:
        payload['elementum_url'] = ''
    if 'silent' not in payload:
        payload['silent'] = False
    if 'skip_auth' not in payload:
        payload['skip_auth'] = False

    global request_time
    global provider_names
    global provider_results
    global available_providers

    provider_names = []
    provider_results = []
    available_providers = 0
    request_time = time.time()

    providers = get_enabled_providers(method)

    if len(providers) == 0:
        if not payload['silent']:
            notify(translation(32060), image=get_icon_path())
        log.error("No providers enabled")
        return []

    log.info(
        "Burstin' with %s" %
        ", ".join([definitions[provider]['name'] for provider in providers]))

    if get_setting('kodi_language', bool):
        kodi_language = xbmc.getLanguage(xbmc.ISO_639_1)
        if not kodi_language:
            log.warning("Kodi returned empty language code...")
        elif 'titles' not in payload or not payload['titles']:
            log.info("No translations available...")
        elif payload['titles'] and kodi_language not in payload['titles']:
            log.info("No '%s' translation available..." % kodi_language)

    p_dialog = xbmcgui.DialogProgressBG()
    if not payload['silent']:
        p_dialog.create('Elementum [COLOR FFFF6B00]Burst[/COLOR]',
                        translation(32061))

    for provider in providers:
        available_providers += 1
        provider_names.append(definitions[provider]['name'])
        task = Thread(target=run_provider, args=(provider, payload, method))
        task.start()

    providers_time = time.time()
    total = float(available_providers)

    # Exit if all providers have returned results or timeout reached, check every 100ms
    while time.time() - providers_time < timeout and available_providers > 0:
        timer = time.time() - providers_time
        log.debug("Timer: %ds / %ds" % (timer, timeout))
        if timer > timeout:
            break
        message = translation(
            32062
        ) % available_providers if available_providers > 1 else translation(
            32063)
        if not payload['silent']:
            p_dialog.update(int((total - available_providers) / total * 100),
                            message=message)
        time.sleep(0.25)

    if not payload['silent']:
        p_dialog.close()
    del p_dialog

    if available_providers > 0:
        message = u', '.join(provider_names)
        message = message + translation(32064)
        log.warning(message.encode('utf-8'))
        if not payload['silent']:
            notify(message, ADDON_ICON)

    log.debug("all provider_results: %s" % repr(provider_results))

    filtered_results = apply_filters(provider_results)

    log.debug("all filtered_results: %s" % repr(filtered_results))

    log.info("Providers returned %d results in %s seconds" %
             (len(filtered_results), round(time.time() - request_time, 2)))

    return filtered_results
Exemple #20
0
import os
import sys

from kodi_six import xbmcgui
from elementum.provider import register, log

sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'resources',
                                'libs'))
sys.path.insert(0, os.path.dirname(__file__))

if __name__ == '__main__':
    from src import debugger, utils
    from src.jackett import search, validate_client

    if len(sys.argv) == 1:
        log.error("Elementum Jackett plugin must be run through Elementum")
        p_dialog = xbmcgui.Dialog()
        try:
            p_dialog.ok('Elementum [COLOR FFFF6B00]Jackett[/COLOR]',
                        utils.translation(32800))
        finally:
            del p_dialog

        sys.exit(1)

    if sys.argv[1] == "validate_settings":
        validate_client()
    else:
        debugger.load()
        register(
            lambda q: search(q),
    def open(self,
             url,
             language='en',
             post_data=None,
             get_data=None,
             headers=None):
        """ Opens a connection to a webpage and saves its HTML content in ``self.content``

        Args:
            url        (str): The URL to open
            language   (str): The language code for the ``Content-Language`` header
            post_data (dict): POST data for the request
            get_data  (dict): GET data for the request
        """

        if get_data:
            url += '?' + urlencode(get_data)

        log.debug("Opening URL: %s" % repr(url))
        if self.session.proxies:
            log.debug("Proxies: %s" % (repr(self.session.proxies)))

        self._read_cookies(url)
        self.session.cookies = self._cookies

        log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies)))

        # Default headers for any request. Pretend like we are the usual browser.
        req_headers = {
            'User-Agent': self.user_agent,
            'Content-Language': language,
            'Cache-Control': 'no-cache',
            'Accept-Encoding': 'deflate, compress, gzip',
            'Origin': url,
            'Referer': url
        }

        # If headers passed to open() call - we overwrite headers.
        if headers:
            for key, value in headers.iteritems():
                if value:
                    req_headers[key] = value
                elif key.capitalize() in req_headers:
                    del req_headers[key.capitalize()]

        if self.token:
            req_headers["Authorization"] = self.token

        req = None
        if post_data:
            req = requests.Request('POST',
                                   url,
                                   data=post_data,
                                   headers=req_headers)
        else:
            req = requests.Request('GET', url, headers=req_headers)

        prepped = self.session.prepare_request(req)
        self.request_headers = prepped.headers

        try:
            self._good_spider()
            with self.session.send(prepped) as response:
                self.headers = response.headers
                self.status = response.status_code
                self.url = response.url

                self._save_cookies()

                if self.response_charset:
                    self.content = response.content.decode(
                        self.response_charset, 'ignore')
                else:
                    self.content = response.text

        except Exception as e:
            import traceback
            log.error("%s failed with %s:" % (repr(url), repr(e)))
            map(log.debug, traceback.format_exc().split("\n"))

        log.debug("Status for %s : %s" % (repr(url), str(self.status)))

        return self.status == 200
digs = string.digits + string.ascii_letters

try:
    PATH_TEMP = translatePath("special://temp").decode(
        sys.getfilesystemencoding(), 'ignore')
except:
    PATH_TEMP = translatePath("special://temp").decode('utf-8')

CACHE_DIR = os.path.join(PATH_TEMP, 'burst')
if not os.path.exists(CACHE_DIR):
    try:
        os.makedirs(CACHE_DIR)
    except Exception as e:
        log.debug("Error creating cache directory: %s" % repr(e))
if not os.access(CACHE_DIR, os.W_OK):
    log.error("Directory '%s' is not writable, antizapret will not work" %
              CACHE_DIR)
    CACHE_DIR = ""

# Use these values for debugging
# CACHE_DIR="/var/tmp/pac/"
# CACHE = 1


@contextmanager
def shelf(filename, ttl=0):
    import shelve
    filename = os.path.join(CACHE_DIR, filename)
    with LOCKS.get(filename, threading.RLock()):
        with closing(shelve.open(filename, writeback=True)) as d:
            import time
            if not d:
Exemple #23
0

# Load providers
load_providers(os.path.join(ADDON_PATH, 'burst', 'providers',
                            'providers.json'))

# Load providers overrides
load_overrides(os.path.join(ADDON_PATH, 'burst', 'providers'))

# Load user's custom providers
custom_providers = os.path.join(xbmc.translatePath(ADDON_PROFILE), "providers")
if not os.path.exists(custom_providers):
    try:
        os.makedirs(custom_providers)
    except Exception as e:
        log.error("Unable to create custom providers folder: %s", repr(e))
        pass
for provider_file in glob(os.path.join(custom_providers, "*.json")):
    log.info("Importing and enabling %s" % provider_file)
    load_providers(provider_file, custom=True)

# Load user's custom overrides
custom_overrides = xbmc.translatePath(ADDON_PROFILE)
if os.path.exists(os.path.join(custom_overrides, 'overrides.py')):
    load_overrides(custom_overrides, custom=True)

# Load json overrides
load_providers(
    os.path.join(xbmc.translatePath(ADDON_PROFILE), 'overrides.json'))

# Setting mandatory fields to their default values for each provider.
def config():
    global _config
    if not _config and CACHE_DIR:
        with shelf("antizapret.pac_config", ttl=CACHE) as pac:
            if not pac.get("value"):
                data = None
                log.info("Fetching Antizapret PAC file")
                try:
                    req = urllib2.Request(PAC_URL,
                                          headers={
                                              'User-Agent': USER_AGENT,
                                              'Accept-Encoding': 'gzip',
                                              'Origin': PAC_URL,
                                              'Referer': PAC_URL
                                          })
                    response = urllib2.urlopen(req)
                    if response.headers.get("Content-Encoding", "") == "gzip":
                        import zlib
                        data = zlib.decompressobj(16 +
                                                  zlib.MAX_WBITS).decompress(
                                                      response.read())
                    else:
                        data = response.read()

                except Exception as e:
                    log.error("Fetching Antizapret PAC failed with code: %s" %
                              (repr(e)))
                    data = ""

                pac["value"] = {
                    "servers_usual": [],
                    "servers_special": [],
                    "domains": [],
                    "specials": [],
                    "ips": [],
                    "dn": [],
                }

                # Find servers for specific cases
                r_usual = re.search(
                    r'if \(yip === 1 \|\| shost === curarr\[i\]\) \{.*?return "(.*?)".*?\}',
                    data,
                    flags=re.DOTALL)
                r_special = re.search(
                    r'if \(isInNet\(oip, special\[i\]\[0\], special\[i\]\[1\]\)\) \{return "(.*?)";\}',
                    data,
                    flags=re.DOTALL)

                # Find arrays containing domains
                res = re.findall(r'(d_\w+) = "(.*?)"', data, flags=re.DOTALL)

                r_list_special = re.search(r'special = \[(.*?)\];',
                                           data,
                                           flags=re.DOTALL)
                r_dn = re.search(r'dn = \{(.*?)\};', data, flags=re.DOTALL)

                if r_usual:
                    pac["value"]["servers_usual"] = get_servers(
                        r_usual.group(1))
                if r_special:
                    pac["value"]["servers_special"] = get_servers(
                        r_special.group(1))

                if r_dn:
                    ary = r_dn.group(1).replace("'", "").split(", ")
                    for i in ary:
                        # ary2 = i.split(":")
                        # pac["value"]["dn"].append([i[1], i[0]])
                        pac["value"]["dn"].append(i.split(":"))

                if r_list_special:
                    for i in r_list_special.group(1).replace(
                            '],[', ']  ,  [').replace('"', '').split("  ,  "):
                        s = i.replace('[', '').replace(']', '').rstrip(',')
                        ary = s.strip().rstrip(',').split(', ')
                        pac["value"]["specials"].append([ary[0], int(ary[1])])

                if res:
                    domains = ""
                    ips = ""

                    for r in res:
                        l = r[1].replace('\\', '')

                        if r[0] == 'd_ipaddr':
                            ips += l
                        else:
                            domains += l

                    pac["value"]["domains"] = domains.split(" ")
                    pac["value"]["ips"] = re.findall(r'.{8}',
                                                     ips,
                                                     flags=re.DOTALL)

            _config = pac["value"]
    return _config
Exemple #25
0
def extract_torrents(provider, client):
    """ Main torrent extraction generator for non-API based providers

    Args:
        provider  (str): Provider ID
        client (Client): Client class instance

    Yields:
        tuple: A torrent result
    """
    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))
    log.debug("[%s] Extracting torrents from %s using definitions: %s" %
              (provider, provider, repr(definition)))

    if not client.content:
        if get_setting("use_debug_parser", bool):
            log.debug("[%s] Parser debug | Page content is empty" % provider)

        raise StopIteration

    dom = Html().feed(client.content)

    key_search = get_search_query(definition, "key")
    row_search = get_search_query(definition, "row")
    name_search = get_search_query(definition, "name")
    torrent_search = get_search_query(definition, "torrent")
    info_hash_search = get_search_query(definition, "infohash")
    size_search = get_search_query(definition, "size")
    seeds_search = get_search_query(definition, "seeds")
    peers_search = get_search_query(definition, "peers")
    referer_search = get_search_query(definition, "referer")

    log.debug("[%s] Parser: %s" % (provider, repr(definition['parser'])))

    q = Queue()
    threads = []
    needs_subpage = 'subpage' in definition and definition['subpage']

    if needs_subpage:

        def extract_subpage(q, name, torrent, size, seeds, peers, info_hash,
                            referer):
            try:
                log.debug("[%s] Getting subpage at %s" %
                          (provider, repr(torrent)))
            except Exception as e:
                import traceback
                log.error("[%s] Subpage logging failed with: %s" %
                          (provider, repr(e)))
                map(log.debug, traceback.format_exc().split("\n"))

            # New client instance, otherwise it's race conditions all over the place
            subclient = Client()
            subclient.passkey = client.passkey
            headers = {}

            if "subpage_mode" in definition:
                if definition["subpage_mode"] == "xhr":
                    headers['X-Requested-With'] = 'XMLHttpRequest'
                    headers['Content-Language'] = ''

            if referer:
                headers['Referer'] = referer

            uri = torrent.split('|')  # Split cookies for private trackers
            subclient.open(uri[0].encode('utf-8'), headers=headers)

            if 'bittorrent' in subclient.headers.get('content-type', ''):
                log.debug('[%s] bittorrent content-type for %s' %
                          (provider, repr(torrent)))
                if len(uri) > 1:  # Stick back cookies if needed
                    torrent = '%s|%s' % (torrent, uri[1])
            else:
                try:
                    torrent = extract_from_page(provider, subclient.content)
                    if torrent and not torrent.startswith('magnet') and len(
                            uri) > 1:  # Stick back cookies if needed
                        torrent = '%s|%s' % (torrent, uri[1])
                except Exception as e:
                    import traceback
                    log.error(
                        "[%s] Subpage extraction for %s failed with: %s" %
                        (provider, repr(uri[0]), repr(e)))
                    map(log.debug, traceback.format_exc().split("\n"))

            ret = (name, info_hash, torrent, size, seeds, peers)
            q.put_nowait(ret)

    if not dom:
        if get_setting("use_debug_parser", bool):
            log.debug(
                "[%s] Parser debug | Could not parse DOM from page content" %
                provider)

        raise StopIteration

    if get_setting("use_debug_parser", bool):
        log.debug(
            "[%s] Parser debug | Page content: %s" %
            (provider, client.content.replace('\r', '').replace('\n', '')))

    key = eval(key_search) if key_search else ""
    if key_search and get_setting("use_debug_parser", bool):
        key_str = key.__str__()
        log.debug(
            "[%s] Parser debug | Matched '%s' iteration for query '%s': %s" %
            (provider, 'key', key_search, key_str.replace('\r', '').replace(
                '\n', '')))

    items = eval(row_search)
    if get_setting("use_debug_parser", bool):
        log.debug("[%s] Parser debug | Matched %d items for '%s' query '%s'" %
                  (provider, len(items), 'row', row_search))

    for item in items:
        if get_setting("use_debug_parser", bool):
            item_str = item.__str__()
            log.debug(
                "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                % (provider, 'row', row_search, item_str.replace(
                    '\r', '').replace('\n', '')))

        if not item:
            continue

        try:
            name = eval(name_search) if name_search else ""
            torrent = eval(torrent_search) if torrent_search else ""
            size = eval(size_search) if size_search else ""
            seeds = eval(seeds_search) if seeds_search else ""
            peers = eval(peers_search) if peers_search else ""
            info_hash = eval(info_hash_search) if info_hash_search else ""
            referer = eval(referer_search) if referer_search else ""

            if 'magnet:?' in torrent:
                torrent = torrent[torrent.find('magnet:?'):]

            if get_setting("use_debug_parser", bool):
                log.debug(
                    "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                    % (provider, 'name', name_search, name))
                log.debug(
                    "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                    % (provider, 'torrent', torrent_search, torrent))
                log.debug(
                    "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                    % (provider, 'size', size_search, size))
                log.debug(
                    "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                    % (provider, 'seeds', seeds_search, seeds))
                log.debug(
                    "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                    % (provider, 'peers', peers_search, peers))
                if info_hash_search:
                    log.debug(
                        "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                        % (provider, 'info_hash', info_hash_search, info_hash))
                if referer_search:
                    log.debug(
                        "[%s] Parser debug | Matched '%s' iteration for query '%s': %s"
                        % (provider, 'info_hash', referer_search, referer))

            # Pass client cookies with torrent if private
            if not torrent.startswith('magnet'):
                user_agent = USER_AGENT

                if client.passkey:
                    torrent = torrent.replace('PASSKEY', client.passkey)
                elif client.token:
                    headers = {
                        'Authorization': client.token,
                        'User-Agent': user_agent
                    }
                    log.debug("[%s] Appending headers: %s" %
                              (provider, repr(headers)))
                    torrent = append_headers(torrent, headers)
                    log.debug("[%s] Torrent with headers: %s" %
                              (provider, repr(torrent)))
                else:
                    parsed_url = urlparse(torrent.split('|')[0])
                    cookie_domain = '{uri.netloc}'.format(uri=parsed_url)
                    cookie_domain = re.sub('www\d*\.', '', cookie_domain)
                    cookies = []
                    for cookie in client._cookies:
                        if cookie_domain in cookie.domain:
                            cookies.append(cookie)
                    headers = {}
                    if cookies:
                        headers = {'User-Agent': user_agent}
                        log.debug("[%s] Cookies res: %s / %s" %
                                  (provider, repr(headers),
                                   repr(client.request_headers)))
                        if client.request_headers:
                            headers.update(client.request_headers)
                        if client.url:
                            headers['Referer'] = client.url
                            headers['Origin'] = client.url
                        # Need to set Cookie afterwards to avoid rewriting it with session Cookies
                        headers['Cookie'] = ";".join(
                            ["%s=%s" % (c.name, c.value) for c in cookies])
                    else:
                        headers = {'User-Agent': user_agent}

                    torrent = append_headers(torrent, headers)

            if name and torrent and needs_subpage and not torrent.startswith(
                    'magnet'):
                if not torrent.startswith('http'):
                    torrent = definition['root_url'] + torrent.encode('utf-8')
                t = Thread(target=extract_subpage,
                           args=(q, name, torrent, size, seeds, peers,
                                 info_hash, referer))
                threads.append(t)
            else:
                yield (name, info_hash, torrent, size, seeds, peers)
        except Exception as e:
            log.error("[%s] Got an exception while parsing results: %s" %
                      (provider, repr(e)))

    if needs_subpage:
        log.debug("[%s] Starting subpage threads..." % provider)
        for t in threads:
            t.start()
        for t in threads:
            t.join()

        for i in range(q.qsize()):
            ret = q.get_nowait()
            log.debug("[%s] Queue %d got: %s" % (provider, i, repr(ret)))
            yield ret
def process(provider, generator, filtering, has_special, verify_name=True, verify_size=True, skip_auth=False, start_time=None, timeout=None):
    """ Method for processing provider results using its generator and Filtering class instance

    Args:
        provider        (str): Provider ID
        generator  (function): Generator method, can be either ``extract_torrents`` or ``extract_from_api``
        filtering (Filtering): Filtering class instance
        has_special    (bool): Whether title contains special chars
        verify_name    (bool): Whether to double-check the results' names match the query or not
        verify_size    (bool): Whether to check the results' file sizes
    """
    log.debug("[%s] execute_process for %s with %s" % (provider, provider, repr(generator)))
    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))

    client = Client(info=filtering.info, request_charset=definition['charset'], response_charset=definition['response_charset'])
    token = None
    logged_in = False
    token_auth = False

    if get_setting('kodi_language', bool):
        kodi_language = xbmc.getLanguage(xbmc.ISO_639_1)
        if kodi_language:
            filtering.kodi_language = kodi_language
        language_exceptions = get_setting('language_exceptions')
        if language_exceptions.strip().lower():
            filtering.language_exceptions = re.split(r',\s?', language_exceptions)

    log.debug("[%s] Queries: %s" % (provider, filtering.queries))
    log.debug("[%s] Extras:  %s" % (provider, filtering.extras))

    for query, extra in zip(filtering.queries, filtering.extras):
        log.debug("[%s] Before keywords - Query: %s - Extra: %s" % (provider, repr(query), repr(extra)))
        if has_special:
            # Removing quotes, surrounding {title*} keywords, when title contains special chars
            query = re.sub("[\"']({title.*?})[\"']", '\\1', query)

        query = filtering.process_keywords(provider, query)
        extra = filtering.process_keywords(provider, extra)

        if not query:
            continue
        elif extra == '-' and filtering.results:
            continue
        elif start_time and timeout and time.time() - start_time + 3 >= timeout:
            # Stop doing requests if there is 3 seconds left for the overall task
            continue

        try:
            if 'charset' in definition and definition['charset'] and 'utf' not in definition['charset'].lower():
                query = quote(query.encode(definition['charset']))
                extra = quote(extra.encode(definition['charset']))
            else:
                query = quote(py2_encode(query))
                extra = quote(py2_encode(extra))
        except Exception as e:
            log.debug("[%s] Could not quote the query (%s): %s" % (provider, query, e))
            pass

        log.debug("[%s] After keywords  - Query: %s - Extra: %s" % (provider, repr(query), repr(extra)))
        if not query:
            return filtering.results

        url_search = filtering.url.replace('QUERY', query)
        if extra and extra != '-':
            url_search = url_search.replace('EXTRA', extra)
        else:
            url_search = url_search.replace('EXTRA', '')

        url_search = url_search.replace(' ', definition['separator'])
        if definition['separator'] != '%20':
            url_search = url_search.replace('%20', definition['separator'])

        # MagnetDL fix...
        url_search = url_search.replace('FIRSTLETTER', query[:1])

        # Creating the payload for POST method
        if 'post_data' in definition and not filtering.post_data:
            filtering.post_data = eval(definition['post_data'])

        payload = dict()
        for key, value in iteritems(filtering.post_data):
            if 'QUERY' in value:
                payload[key] = filtering.post_data[key].replace('QUERY', query)
            else:
                payload[key] = filtering.post_data[key]
            payload[key] = urllib.unquote(payload[key])

        # Creating the payload for GET method
        headers = None
        data = None
        if filtering.get_data:
            data = dict()
            for key, value in iteritems(filtering.get_data):
                if 'QUERY' in value:
                    data[key] = filtering.get_data[key].replace('QUERY', query)
                else:
                    data[key] = filtering.get_data[key]

        log.debug("-   %s query: %s" % (provider, repr(query)))
        log.debug("--  %s url_search before token: %s" % (provider, repr(url_search)))
        log.debug("--- %s using POST payload: %s" % (provider, repr(payload)))
        log.debug("----%s filtering with post_data: %s" % (provider, repr(filtering.post_data)))

        # Set search's "title" in filtering to double-check results' names
        if 'filter_title' in definition and definition['filter_title']:
            filtering.filter_title = True
            filtering.title = query

        if 'initial_url' in definition and definition['initial_url']:
            url = definition['initial_url']
            if not url.startswith('http'):
                url = definition['root_url'] + url
            client.open(url)

        if token:
            log.info('[%s] Reusing existing token' % provider)
            url_search = url_search.replace('TOKEN', token)
        elif 'token' in definition:
            token_url = definition['base_url'] + definition['token']
            log.debug("[%s] Getting token for %s at %s" % (provider, provider, repr(token_url)))
            client.open(py2_encode(token_url))
            try:
                token_data = json.loads(client.content)
            except:
                log.error('%s: Failed to get token for %s' % (provider, repr(url_search)))
                return filtering.results
            log.debug("[%s] Token response for %s: %s" % (provider, provider, repr(token_data)))
            if 'token' in token_data:
                token = token_data['token']
                log.debug("[%s] Got token for %s: %s" % (provider, provider, repr(token)))
                url_search = url_search.replace('TOKEN', token)
            else:
                log.warning('%s: Unable to get token for %s' % (provider, repr(url_search)))

        if logged_in:
            log.info("[%s] Reusing previous login" % provider)
        elif token_auth:
            log.info("[%s] Reusing previous token authorization" % provider)
        elif 'private' in definition and definition['private']:
            username = get_setting('%s_username' % provider, unicode)
            password = get_setting('%s_password' % provider, unicode)
            passkey = get_setting('%s_passkey' % provider, unicode)
            if not username and not password and not passkey:
                for addon_name in ('script.magnetic.%s' % provider, 'script.magnetic.%s-mc' % provider):
                    for setting in ('username', 'password'):
                        try:
                            value = xbmcaddon.Addon(addon_name).getSetting(setting)
                            set_setting('%s_%s' % (provider, setting), value)
                            if setting == 'username':
                                username = value
                            if setting == 'password':
                                password = value
                        except:
                            pass

            if username:
                client.username = username
                url_search = url_search.replace('USERNAME', username)

            if passkey:
                logged_in = True
                client.passkey = passkey
                url_search = url_search.replace('PASSKEY', passkey)

            elif 'login_object' in definition and definition['login_object']:
                login_object = None
                login_headers = None
                logged_in = skip_auth

                try:
                    login_object = definition['login_object'].replace('USERNAME', 'u"%s"' % username).replace('PASSWORD', 'u"%s"' % password)
                except Exception as e:
                    log.error("Could not make login object for %s: %s" % (provider, e))
                try:
                    if 'login_headers' in definition and definition['login_headers']:
                        login_headers = eval(definition['login_headers'])
                except Exception as e:
                    log.error("Could not make login headers for %s: %s" % (provider, e))

                # TODO generic flags in definitions for those...
                if 'csrf_token' in definition and definition['csrf_token']:
                    client.open(definition['root_url'] + definition['login_path'])
                    if client.content:
                        csrf_token = re.search(r'name=\"_?csrf_token\" value=\"(.*?)\"', client.content)
                        if csrf_token:
                            login_object = login_object.replace('CSRF_TOKEN', '"%s"' % csrf_token.group(1))
                        else:
                            logged_in = True

                if 'token_auth' in definition:
                    # log.debug("[%s] logging in with: %s" % (provider, login_object))
                    if client.open(definition['root_url'] + definition['token_auth'], post_data=eval(login_object)):
                        try:
                            token_data = json.loads(client.content)
                        except:
                            log.error('%s: Failed to get token from %s' % (provider, definition['token_auth']))
                            return filtering.results
                        log.debug("[%s] Token response for %s: %s" % (provider, provider, repr(token_data)))
                        if 'token' in token_data:
                            client.token = token_data['token']
                            log.debug("[%s] Auth token for %s: %s" % (provider, provider, repr(client.token)))
                        else:
                            log.error('[%s] Unable to get auth token for %s' % (provider, repr(url_search)))
                            return filtering.results
                        log.info('[%s] Token auth successful' % provider)
                        token_auth = True
                    else:
                        log.error("[%s] Token auth failed with response: %s" % (provider, repr(client.content)))
                        return filtering.results
                elif not logged_in and client.login(definition['root_url'], definition['login_path'],
                                                    eval(login_object), login_headers, definition['login_failed']):
                    log.info('[%s] Login successful' % provider)
                    logged_in = True
                elif not logged_in:
                    log.error("[%s] Login failed: %s", provider, client.status)
                    log.debug("[%s] Failed login content: %s", provider, repr(client.content))
                    return filtering.results

                if logged_in:
                    if provider == 'hd-torrents':
                        client.open(definition['root_url'] + '/torrents.php')
                        csrf_token = re.search(r'name="csrfToken" value="(.*?)"', client.content)
                        url_search = url_search.replace("CSRF_TOKEN", csrf_token.group(1))
                    client.save_cookies()

        log.info("[%s] >  %s search URL: %s" % (provider, definition['name'].rjust(longest), url_search))

        if 'headers' in definition and definition['headers']:
            headers = eval(definition['headers'])
            log.info("[%s] >  %s headers: %s" % (provider, definition['name'].rjust(longest), headers))

        client.open(py2_encode(url_search), post_data=payload, get_data=data, headers=headers)
        filtering.results.extend(
            generate_payload(provider,
                             generator(provider, client),
                             filtering,
                             verify_name,
                             verify_size))
    return filtering.results
    def process_keywords(self, provider, text):
        """ Processes the query payload from a provider's keyword definitions

        Args:
            provider (str): Provider ID
            text     (str): Keyword placeholders from definitions, ie. {title}

        Returns:
            str: Processed query keywords
        """
        keywords = self.read_keywords(text)
        replacing = get_setting("filter_quotes", bool)

        for keyword in keywords:
            keyword = keyword.lower()
            if 'title' in keyword:
                title = self.info["title"]
                language = definitions[provider]['language']
                use_language = None
                if ':' in keyword:
                    use_language = keyword.split(':')[1].lower()
                if provider not in self.language_exceptions and \
                   (use_language or self.kodi_language) and \
                   'titles' in self.info and self.info['titles']:
                    try:
                        if self.kodi_language and self.kodi_language in self.info[
                                'titles']:
                            use_language = self.kodi_language
                        if use_language not in self.info['titles']:
                            use_language = language
                            if 'original' in self.info['titles']:
                                title = self.info['titles']['original']
                        if use_language in self.info['titles'] and self.info[
                                'titles'][use_language]:
                            title = self.info['titles'][use_language]
                            title = normalize_string(title)
                            log.info("[%s] Using translated '%s' title %s" %
                                     (provider, use_language, repr(title)))
                            log.debug(
                                "[%s] Translated titles from Elementum: %s" %
                                (provider, repr(self.info['titles'])))
                    except Exception as e:
                        import traceback
                        log.error("%s failed with: %s" % (provider, repr(e)))
                        map(log.debug, traceback.format_exc().split("\n"))
                text = text.replace('{%s}' % keyword, title)

            if 'year' in keyword:
                text = text.replace('{%s}' % keyword, str(self.info["year"]))

            if 'season' in keyword:
                if '+' in keyword:
                    keys = keyword.split('+')
                    season = str(self.info["season"] + get_int(keys[1]))
                elif ':' in keyword:
                    keys = keyword.split(':')
                    season = ('%%.%sd' % keys[1]) % self.info["season"]
                else:
                    season = '%s' % self.info["season"]
                text = text.replace('{%s}' % keyword, season)

            if 'episode' in keyword:
                if '+' in keyword:
                    keys = keyword.split('+')
                    episode = str(self.info["episode"] + get_int(keys[1]))
                elif ':' in keyword:
                    keys = keyword.split(':')
                    episode = ('%%.%sd' % keys[1]) % self.info["episode"]
                else:
                    episode = '%s' % self.info["episode"]
                text = text.replace('{%s}' % keyword, episode)

        if replacing:
            text = text.replace(u"'", '')

        return text
    def open(self,
             url,
             language='en',
             post_data=None,
             get_data=None,
             headers=None,
             proxy_url=None,
             charset='utf8'):
        """ Opens a connection to a webpage and saves its HTML content in ``self.content``

        Args:
            url        (str): The URL to open
            language   (str): The language code for the ``Content-Language`` header
            post_data (dict): POST data for the request
            get_data  (dict): GET data for the request
        """
        if not post_data:
            post_data = {}
        if get_data:
            url += '?' + urlencode(get_data)

        log.debug("Opening URL: %s" % repr(url))
        result = False

        data = urlencode(post_data) if len(post_data) > 0 else None
        req = urllib2.Request(url, data)

        self._read_cookies(url)
        log.debug("Cookies for %s: %s" % (repr(url), repr(self._cookies)))

        # Parsing proxy information
        proxy = {
            'enabled': get_setting("proxy_enabled", bool),
            'use_type': get_setting("proxy_use_type", int),
            'type': proxy_types[0],
            'host': get_setting("proxy_host", unicode),
            'port': get_setting("proxy_port", int),
            'login': get_setting("proxy_login", unicode),
            'password': get_setting("proxy_password", unicode),
        }

        try:
            proxy['type'] = proxy_types[get_setting("proxy_type", int)]
        except:
            pass

        handlers = [urllib2.HTTPCookieProcessor(self._cookies)]

        if get_setting("use_public_dns", bool):
            handlers.append(MyHTTPHandler)

        if proxy['enabled']:
            if proxy['use_type'] == 0 and proxy_url:
                log.debug("Setting proxy from Elementum: %s" % (proxy_url))
                handlers.append(parse_proxy_url(proxy_url))
            elif proxy['use_type'] == 1:
                log.debug("Setting proxy with custom settings: %s" %
                          (repr(proxy)))
                handlers.append(
                    SocksiPyHandler(proxytype=proxy['type'],
                                    proxyaddr=proxy['host'],
                                    proxyport=int(proxy['port']),
                                    username=proxy['login'],
                                    password=proxy['password'],
                                    rdns=True))
            elif proxy['use_type'] == 2:
                try:
                    handlers.append(antizapret.AntizapretProxyHandler())
                except Exception as e:
                    log.info("Could not create antizapret configuration: %s" %
                             (e))

        opener = urllib2.build_opener(*handlers)

        req.add_header('User-Agent', self.user_agent)
        req.add_header('Content-Language', language)
        req.add_header("Accept-Encoding", "gzip")
        req.add_header("Origin", url)
        req.add_header("Referer", url)

        if headers:
            for key, value in headers.iteritems():
                if value:
                    req.add_header(key, value)
                else:
                    del req.headers[key.capitalize()]

        if self.token:
            req.add_header("Authorization", self.token)

        try:
            self._good_spider()
            with closing(opener.open(req)) as response:
                self.headers = response.headers
                self._save_cookies()
                if response.headers.get("Content-Encoding", "") == "gzip":
                    import zlib
                    self.content = zlib.decompressobj(
                        16 + zlib.MAX_WBITS).decompress(response.read())
                else:
                    self.content = response.read()

                charset = response.headers.getparam('charset')

                if not charset:
                    match = re.search(
                        """<meta(?!\s*(?:name|value)\s*=)[^>]*?charset\s*=[\s"']*([^\s"'/>]*)""",
                        self.content)
                    if match:
                        charset = match.group(1)

                # We try to remove non-utf chars. Should we?
                if (charset and charset.lower() == 'utf-8') or charset is None:
                    charset = 'utf-8-sig'  # Changing to utf-8-sig to remove BOM if found on decode from utf-8

                if charset:
                    log.debug('Decoding charset from %s for %s' %
                              (charset, repr(url)))
                    self.content = self.content.decode(charset, 'replace')

                self.status = response.getcode()
            result = True

        except urllib2.HTTPError as e:
            self.status = e.code
            log.warning("Status for %s : %s" % (repr(url), str(self.status)))
            if e.code == 403 or e.code == 503:
                log.warning("CloudFlared at %s, try enabling CloudHole" % url)

        except urllib2.URLError as e:
            self.status = repr(e.reason)
            log.warning("Status for %s : %s" % (repr(url), self.status))

        except Exception as e:
            import traceback
            log.error("%s failed with %s:" % (repr(url), repr(e)))
            map(log.debug, traceback.format_exc().split("\n"))

        log.debug("Status for %s : %s" % (repr(url), str(self.status)))

        return result