Beispiel #1
0
 def urlencode(query):
     # Dict[str, str] -> str
     return py2_decode(
         _urlencode({
             py2_encode(param): py2_encode(arg)
             for param, arg in query.items()
         }))
Beispiel #2
0
def reinterpret_windows1252_as_utf8(wrong_text):
    """
        Maybe this was always meant to be in a single-byte encoding, and it
        makes the most sense in utf-8.
    :param wrong_text: text with problems
    :type: str or unicode
    :return: corrected text
    :rtype: str or unicode
    """
    altered_bytes = []
    for char in wrong_text:
        if ord(char) in WINDOWS_1252_GREMLINS:
            altered_bytes.append(py2_encode(char, 'WINDOWS_1252'))

        else:
            altered_bytes.append(py2_encode(char, 'latin-1', 'replace'))

    return py2_decode(''.join(altered_bytes), 'utf-8', 'replace')
Beispiel #3
0
def reinterpret_latin1_as_windows1252(wrong_text):
    """
        Maybe this was always meant to be in a single-byte encoding, and it
        makes the most sense in Windows-1252.
    :param wrong_text: text with problems
    :type: str or unicode
    :return: corrected text
    :rtype: str or unicode
    """
    return py2_decode(py2_encode(wrong_text, 'latin-1'), 'WINDOWS_1252', 'replace')
Beispiel #4
0
        def extract_subpage(q, name, torrent, size, seeds, peers, info_hash,
                            referer):
            try:
                log.debug("[%s] Getting subpage at %s" %
                          (provider, repr(torrent)))
            except Exception as e:
                import traceback
                log.error("[%s] Subpage logging failed with: %s" %
                          (provider, repr(e)))
                map(log.debug, traceback.format_exc().split("\n"))

            # New client instance, otherwise it's race conditions all over the place
            subclient = Client()
            subclient.passkey = client.passkey
            headers = {}

            if "subpage_mode" in definition:
                if definition["subpage_mode"] == "xhr":
                    headers['X-Requested-With'] = 'XMLHttpRequest'
                    headers['Content-Language'] = ''

            if referer:
                headers['Referer'] = referer

            uri = torrent.split('|')  # Split cookies for private trackers
            subclient.open(py2_encode(uri[0]), headers=headers)

            if 'bittorrent' in subclient.headers.get('content-type', ''):
                log.debug('[%s] bittorrent content-type for %s' %
                          (provider, repr(torrent)))
                if len(uri) > 1:  # Stick back cookies if needed
                    torrent = '%s|%s' % (torrent, uri[1])
            else:
                try:
                    torrent = extract_from_page(provider, subclient.content)
                    if torrent and not torrent.startswith('magnet') and len(
                            uri) > 1:  # Stick back cookies if needed
                        torrent = '%s|%s' % (torrent, uri[1])
                except Exception as e:
                    import traceback
                    log.error(
                        "[%s] Subpage extraction for %s failed with: %s" %
                        (provider, repr(uri[0]), repr(e)))
                    map(log.debug, traceback.format_exc().split("\n"))

            log.debug("[%s] Subpage torrent for %s: %s" %
                      (provider, repr(uri[0]), torrent))
            ret = (name, info_hash, torrent, size, seeds, peers)

            # Cache this subpage result if another query would need to request same url.
            provider_cache[uri[0]] = torrent
            q.put_nowait(ret)
 def _execute(execPath):
     process = subprocess.Popen(py2_encode(execPath),
                                shell=True,
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE,
                                stdin=subprocess.PIPE)
     stdout_value, stderr_value = process.communicate()
     stdout_value = stdout_value.decode()
     stderr_value = stderr_value.decode()
     retCode = process.returncode
     if len(stderr_value) > 2:
         debug.logError(stderr_value)
     return stdout_value
Beispiel #6
0
def remove_accents(string):
    """
        Remove any accent in the string
    :param string: string to remove accents
    :type string: str or unicode
    :return: string without accents
    :rtype: unicode
    """
    if not isinstance(string, unicode):
        string = normalize_string(string)

    nfkd_form = unicodedata.normalize('NFKD', string)
    only_ascii = py2_encode(nfkd_form, 'ASCII', 'ignore').strip()
    return string if only_ascii == u'' else only_ascii
Beispiel #7
0
def reinterpret_latin1_as_utf8(wrong_text):
    new_bytes = py2_encode(wrong_text, 'latin-1', 'replace')
    return py2_decode(new_bytes, 'utf-8', 'replace')
def process(provider, generator, filtering, has_special, verify_name=True, verify_size=True, skip_auth=False, start_time=None, timeout=None):
    """ Method for processing provider results using its generator and Filtering class instance

    Args:
        provider        (str): Provider ID
        generator  (function): Generator method, can be either ``extract_torrents`` or ``extract_from_api``
        filtering (Filtering): Filtering class instance
        has_special    (bool): Whether title contains special chars
        verify_name    (bool): Whether to double-check the results' names match the query or not
        verify_size    (bool): Whether to check the results' file sizes
    """
    log.debug("[%s] execute_process for %s with %s" % (provider, provider, repr(generator)))
    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))

    client = Client(info=filtering.info, request_charset=definition['charset'], response_charset=definition['response_charset'])
    token = None
    logged_in = False
    token_auth = False

    if get_setting('kodi_language', bool):
        kodi_language = xbmc.getLanguage(xbmc.ISO_639_1)
        if kodi_language:
            filtering.kodi_language = kodi_language
        language_exceptions = get_setting('language_exceptions')
        if language_exceptions.strip().lower():
            filtering.language_exceptions = re.split(r',\s?', language_exceptions)

    log.debug("[%s] Queries: %s" % (provider, filtering.queries))
    log.debug("[%s] Extras:  %s" % (provider, filtering.extras))

    for query, extra in zip(filtering.queries, filtering.extras):
        log.debug("[%s] Before keywords - Query: %s - Extra: %s" % (provider, repr(query), repr(extra)))
        if has_special:
            # Removing quotes, surrounding {title*} keywords, when title contains special chars
            query = re.sub("[\"']({title.*?})[\"']", '\\1', query)

        query = filtering.process_keywords(provider, query)
        extra = filtering.process_keywords(provider, extra)

        if not query:
            continue
        elif extra == '-' and filtering.results:
            continue
        elif start_time and timeout and time.time() - start_time + 3 >= timeout:
            # Stop doing requests if there is 3 seconds left for the overall task
            continue

        try:
            if 'charset' in definition and definition['charset'] and 'utf' not in definition['charset'].lower():
                query = quote(query.encode(definition['charset']))
                extra = quote(extra.encode(definition['charset']))
            else:
                query = quote(py2_encode(query))
                extra = quote(py2_encode(extra))
        except Exception as e:
            log.debug("[%s] Could not quote the query (%s): %s" % (provider, query, e))
            pass

        log.debug("[%s] After keywords  - Query: %s - Extra: %s" % (provider, repr(query), repr(extra)))
        if not query:
            return filtering.results

        url_search = filtering.url.replace('QUERY', query)
        if extra and extra != '-':
            url_search = url_search.replace('EXTRA', extra)
        else:
            url_search = url_search.replace('EXTRA', '')

        url_search = url_search.replace(' ', definition['separator'])
        if definition['separator'] != '%20':
            url_search = url_search.replace('%20', definition['separator'])

        # MagnetDL fix...
        url_search = url_search.replace('FIRSTLETTER', query[:1])

        # Creating the payload for POST method
        if 'post_data' in definition and not filtering.post_data:
            filtering.post_data = eval(definition['post_data'])

        payload = dict()
        for key, value in iteritems(filtering.post_data):
            if 'QUERY' in value:
                payload[key] = filtering.post_data[key].replace('QUERY', query)
            else:
                payload[key] = filtering.post_data[key]
            payload[key] = urllib.unquote(payload[key])

        # Creating the payload for GET method
        headers = None
        data = None
        if filtering.get_data:
            data = dict()
            for key, value in iteritems(filtering.get_data):
                if 'QUERY' in value:
                    data[key] = filtering.get_data[key].replace('QUERY', query)
                else:
                    data[key] = filtering.get_data[key]

        log.debug("-   %s query: %s" % (provider, repr(query)))
        log.debug("--  %s url_search before token: %s" % (provider, repr(url_search)))
        log.debug("--- %s using POST payload: %s" % (provider, repr(payload)))
        log.debug("----%s filtering with post_data: %s" % (provider, repr(filtering.post_data)))

        # Set search's "title" in filtering to double-check results' names
        if 'filter_title' in definition and definition['filter_title']:
            filtering.filter_title = True
            filtering.title = query

        if 'initial_url' in definition and definition['initial_url']:
            url = definition['initial_url']
            if not url.startswith('http'):
                url = definition['root_url'] + url
            client.open(url)

        if token:
            log.info('[%s] Reusing existing token' % provider)
            url_search = url_search.replace('TOKEN', token)
        elif 'token' in definition:
            token_url = definition['base_url'] + definition['token']
            log.debug("[%s] Getting token for %s at %s" % (provider, provider, repr(token_url)))
            client.open(py2_encode(token_url))
            try:
                token_data = json.loads(client.content)
            except:
                log.error('%s: Failed to get token for %s' % (provider, repr(url_search)))
                return filtering.results
            log.debug("[%s] Token response for %s: %s" % (provider, provider, repr(token_data)))
            if 'token' in token_data:
                token = token_data['token']
                log.debug("[%s] Got token for %s: %s" % (provider, provider, repr(token)))
                url_search = url_search.replace('TOKEN', token)
            else:
                log.warning('%s: Unable to get token for %s' % (provider, repr(url_search)))

        if logged_in:
            log.info("[%s] Reusing previous login" % provider)
        elif token_auth:
            log.info("[%s] Reusing previous token authorization" % provider)
        elif 'private' in definition and definition['private']:
            username = get_setting('%s_username' % provider, unicode)
            password = get_setting('%s_password' % provider, unicode)
            passkey = get_setting('%s_passkey' % provider, unicode)
            if not username and not password and not passkey:
                for addon_name in ('script.magnetic.%s' % provider, 'script.magnetic.%s-mc' % provider):
                    for setting in ('username', 'password'):
                        try:
                            value = xbmcaddon.Addon(addon_name).getSetting(setting)
                            set_setting('%s_%s' % (provider, setting), value)
                            if setting == 'username':
                                username = value
                            if setting == 'password':
                                password = value
                        except:
                            pass

            if username:
                client.username = username
                url_search = url_search.replace('USERNAME', username)

            if passkey:
                logged_in = True
                client.passkey = passkey
                url_search = url_search.replace('PASSKEY', passkey)

            elif 'login_object' in definition and definition['login_object']:
                login_object = None
                login_headers = None
                logged_in = skip_auth

                try:
                    login_object = definition['login_object'].replace('USERNAME', 'u"%s"' % username).replace('PASSWORD', 'u"%s"' % password)
                except Exception as e:
                    log.error("Could not make login object for %s: %s" % (provider, e))
                try:
                    if 'login_headers' in definition and definition['login_headers']:
                        login_headers = eval(definition['login_headers'])
                except Exception as e:
                    log.error("Could not make login headers for %s: %s" % (provider, e))

                # TODO generic flags in definitions for those...
                if 'csrf_token' in definition and definition['csrf_token']:
                    client.open(definition['root_url'] + definition['login_path'])
                    if client.content:
                        csrf_token = re.search(r'name=\"_?csrf_token\" value=\"(.*?)\"', client.content)
                        if csrf_token:
                            login_object = login_object.replace('CSRF_TOKEN', '"%s"' % csrf_token.group(1))
                        else:
                            logged_in = True

                if 'token_auth' in definition:
                    # log.debug("[%s] logging in with: %s" % (provider, login_object))
                    if client.open(definition['root_url'] + definition['token_auth'], post_data=eval(login_object)):
                        try:
                            token_data = json.loads(client.content)
                        except:
                            log.error('%s: Failed to get token from %s' % (provider, definition['token_auth']))
                            return filtering.results
                        log.debug("[%s] Token response for %s: %s" % (provider, provider, repr(token_data)))
                        if 'token' in token_data:
                            client.token = token_data['token']
                            log.debug("[%s] Auth token for %s: %s" % (provider, provider, repr(client.token)))
                        else:
                            log.error('[%s] Unable to get auth token for %s' % (provider, repr(url_search)))
                            return filtering.results
                        log.info('[%s] Token auth successful' % provider)
                        token_auth = True
                    else:
                        log.error("[%s] Token auth failed with response: %s" % (provider, repr(client.content)))
                        return filtering.results
                elif not logged_in and client.login(definition['root_url'], definition['login_path'],
                                                    eval(login_object), login_headers, definition['login_failed']):
                    log.info('[%s] Login successful' % provider)
                    logged_in = True
                elif not logged_in:
                    log.error("[%s] Login failed: %s", provider, client.status)
                    log.debug("[%s] Failed login content: %s", provider, repr(client.content))
                    return filtering.results

                if logged_in:
                    if provider == 'hd-torrents':
                        client.open(definition['root_url'] + '/torrents.php')
                        csrf_token = re.search(r'name="csrfToken" value="(.*?)"', client.content)
                        url_search = url_search.replace("CSRF_TOKEN", csrf_token.group(1))
                    client.save_cookies()

        log.info("[%s] >  %s search URL: %s" % (provider, definition['name'].rjust(longest), url_search))

        if 'headers' in definition and definition['headers']:
            headers = eval(definition['headers'])
            log.info("[%s] >  %s headers: %s" % (provider, definition['name'].rjust(longest), headers))

        client.open(py2_encode(url_search), post_data=payload, get_data=data, headers=headers)
        filtering.results.extend(
            generate_payload(provider,
                             generator(provider, client),
                             filtering,
                             verify_name,
                             verify_size))
    return filtering.results
def extract_torrents(provider, client):
    """ Main torrent extraction generator for non-API based providers

    Args:
        provider  (str): Provider ID
        client (Client): Client class instance

    Yields:
        tuple: A torrent result
    """
    definition = definitions[provider]
    definition = get_alias(definition, get_setting("%s_alias" % provider))
    log.debug("[%s] Extracting torrents from %s using definitions: %s" % (provider, provider, repr(definition)))

    if not client.content:
        if debug_parser:
            log.debug("[%s] Parser debug | Page content is empty" % provider)

        raise StopIteration

    dom = Html().feed(client.content)

    key_search = get_search_query(definition, "key")
    row_search = get_search_query(definition, "row")
    name_search = get_search_query(definition, "name")
    torrent_search = get_search_query(definition, "torrent")
    info_hash_search = get_search_query(definition, "infohash")
    size_search = get_search_query(definition, "size")
    seeds_search = get_search_query(definition, "seeds")
    peers_search = get_search_query(definition, "peers")
    referer_search = get_search_query(definition, "referer")

    log.debug("[%s] Parser: %s" % (provider, repr(definition['parser'])))

    q = Queue()
    threads = []
    needs_subpage = 'subpage' in definition and definition['subpage']

    if needs_subpage:
        def extract_subpage(q, name, torrent, size, seeds, peers, info_hash, referer):
            try:
                log.debug("[%s] Getting subpage at %s" % (provider, repr(torrent)))
            except Exception as e:
                import traceback
                log.error("[%s] Subpage logging failed with: %s" % (provider, repr(e)))
                map(log.debug, traceback.format_exc().split("\n"))

            # New client instance, otherwise it's race conditions all over the place
            subclient = Client()
            subclient.passkey = client.passkey
            headers = {}

            if "subpage_mode" in definition:
                if definition["subpage_mode"] == "xhr":
                    headers['X-Requested-With'] = 'XMLHttpRequest'
                    headers['Content-Language'] = ''

            if referer:
                headers['Referer'] = referer

            uri = torrent.split('|')  # Split cookies for private trackers
            subclient.open(py2_encode(uri[0]), headers=headers)

            if 'bittorrent' in subclient.headers.get('content-type', ''):
                log.debug('[%s] bittorrent content-type for %s' % (provider, repr(torrent)))
                if len(uri) > 1:  # Stick back cookies if needed
                    torrent = '%s|%s' % (torrent, uri[1])
            else:
                try:
                    torrent = extract_from_page(provider, subclient.content)
                    if torrent and not torrent.startswith('magnet') and len(uri) > 1:  # Stick back cookies if needed
                        torrent = '%s|%s' % (torrent, uri[1])
                except Exception as e:
                    import traceback
                    log.error("[%s] Subpage extraction for %s failed with: %s" % (provider, repr(uri[0]), repr(e)))
                    map(log.debug, traceback.format_exc().split("\n"))

            log.debug("[%s] Subpage torrent for %s: %s" % (provider, repr(uri[0]), torrent))
            ret = (name, info_hash, torrent, size, seeds, peers)

            # Cache this subpage result if another query would need to request same url.
            provider_cache[uri[0]] = torrent
            q.put_nowait(ret)

    if not dom:
        if debug_parser:
            log.debug("[%s] Parser debug | Could not parse DOM from page content" % provider)

        raise StopIteration

    if debug_parser:
        log.debug("[%s] Parser debug | Page content: %s" % (provider, client.content.replace('\r', '').replace('\n', '')))

    key = eval(key_search) if key_search else ""
    if key_search and debug_parser:
        key_str = key.__str__()
        log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'key', key_search, key_str.replace('\r', '').replace('\n', '')))

    items = eval(row_search)
    if debug_parser:
        log.debug("[%s] Parser debug | Matched %d items for '%s' query '%s'" % (provider, len(items), 'row', row_search))

    for item in items:
        if debug_parser:
            item_str = item.__str__()
            log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'row', row_search, item_str.replace('\r', '').replace('\n', '')))

        if not item:
            continue

        try:
            name = eval(name_search) if name_search else ""
            torrent = eval(torrent_search) if torrent_search else ""
            size = eval(size_search) if size_search else ""
            seeds = eval(seeds_search) if seeds_search else ""
            peers = eval(peers_search) if peers_search else ""
            info_hash = eval(info_hash_search) if info_hash_search else ""
            referer = eval(referer_search) if referer_search else ""

            if 'magnet:?' in torrent:
                torrent = torrent[torrent.find('magnet:?'):]

            if debug_parser:
                log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'name', name_search, name))
                log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'torrent', torrent_search, torrent))
                log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'size', size_search, size))
                log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'seeds', seeds_search, seeds))
                log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'peers', peers_search, peers))
                if info_hash_search:
                    log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'info_hash', info_hash_search, info_hash))
                if referer_search:
                    log.debug("[%s] Parser debug | Matched '%s' iteration for query '%s': %s" % (provider, 'info_hash', referer_search, referer))

            # Pass client cookies with torrent if private
            if not torrent.startswith('magnet'):
                user_agent = USER_AGENT

                if client.passkey:
                    torrent = torrent.replace('PASSKEY', client.passkey)
                elif client.token:
                    headers = {'Authorization': client.token, 'User-Agent': user_agent}
                    log.debug("[%s] Appending headers: %s" % (provider, repr(headers)))
                    torrent = append_headers(torrent, headers)
                    log.debug("[%s] Torrent with headers: %s" % (provider, repr(torrent)))
                else:
                    parsed_url = urlparse(torrent.split('|')[0])
                    cookie_domain = '{uri.netloc}'.format(uri=parsed_url)
                    cookie_domain = re.sub('www\d*\.', '', cookie_domain)
                    cookies = []
                    for cookie in client._cookies:
                        if cookie_domain in cookie.domain:
                            cookies.append(cookie)
                    headers = {}
                    if cookies:
                        headers = {'User-Agent': user_agent}
                        if client.request_headers:
                            headers.update(client.request_headers)
                        if client.url:
                            headers['Referer'] = client.url
                            headers['Origin'] = client.url
                        # Need to set Cookie afterwards to avoid rewriting it with session Cookies
                        headers['Cookie'] = ";".join(["%s=%s" % (c.name, c.value) for c in cookies])
                    else:
                        headers = {'User-Agent': user_agent}

                    torrent = append_headers(torrent, headers)

            if name and torrent and needs_subpage and not torrent.startswith('magnet'):
                if not torrent.startswith('http'):
                    torrent = definition['root_url'] + py2_encode(torrent)
                # Check if this url was previously requested, to avoid doing same job again.
                uri = torrent.split('|')
                if uri and uri[0] and uri[0] in provider_cache and provider_cache[uri[0]]:
                    yield (name, info_hash, provider_cache[uri[0]], size, seeds, peers)
                    continue

                t = Thread(target=extract_subpage, args=(q, name, torrent, size, seeds, peers, info_hash, referer))
                threads.append(t)
            else:
                yield (name, info_hash, torrent, size, seeds, peers)
        except Exception as e:
            log.error("[%s] Got an exception while parsing results: %s" % (provider, repr(e)))

    if needs_subpage:
        log.debug("[%s] Starting subpage threads..." % provider)
        for t in threads:
            t.start()
        for t in threads:
            t.join()

        for i in range(q.qsize()):
            ret = q.get_nowait()
            log.debug("[%s] Queue %d got: %s" % (provider, i, repr(ret)))
            yield ret

    # Save cookies in cookie jar
    client.save_cookies()
 def strftime(format, t=None):
     return py2_decode(_strftime(py2_encode(format)))
 def parse_qs(qs):
     # str -> Dict[str, List[str]]
     return {py2_decode(param): [py2_decode(a) for a in args]
             for param, args in _parse_qs(py2_encode(qs)).items()}