Esempio n. 1
0
    def httpRequest(self, url):
        headers = {
            'User-Agent':
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
            'Accept-Encoding': 'none',
            'Accept-Language': 'en-US,en;q=0.8',
            'Connection': 'keep-alive'
        }

        req = urllib.request.Request(url, headers=headers)

        if config.trackers_proxy == "tor":
            tor_manager = self.site.connection_server.tor_manager
            handler = sockshandler.SocksiPyHandler(socks.SOCKS5,
                                                   tor_manager.proxy_ip,
                                                   tor_manager.proxy_port)
            opener = urllib.request.build_opener(handler)
            return opener.open(req, timeout=50)
        elif config.trackers_proxy == "disable":
            return urllib.request.urlopen(req, timeout=25)
        else:
            proxy_ip, proxy_port = config.trackers_proxy.split(":")
            handler = sockshandler.SocksiPyHandler(socks.SOCKS5, proxy_ip,
                                                   int(proxy_port))
            opener = urllib.request.build_opener(handler)
            return opener.open(req, timeout=50)
Esempio n. 2
0
    def _additional_handlers(self):

        handlers = []

        if self.session.get('proxy'):
            protocol, host, port = self._get_proxy()

            if protocol and host and port:
                handlers.append(
                    sockshandler.SocksiPyHandler(
                        protocol,
                        host,
                        port
                    )
                )
            else:
                raise ChannelException(messages.channels.error_proxy_format)

        # Skip certificate checks
        ctx = ssl.create_default_context()
        ctx.check_hostname = False
        ctx.verify_mode = ssl.CERT_NONE
        
        handlers.append(urllib2.HTTPSHandler(context=ctx))

        return handlers
Esempio n. 3
0
def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cookiejar_send=None, cookiejar_receive=None, use_tor=True, return_response=False):
    '''
    When cookiejar_send is set to a CookieJar object,
     those cookies will be sent in the request (but cookies in response will not be merged into it)
    When cookiejar_receive is set to a CookieJar object,
     cookies received in the response will be merged into the object (nothing will be sent from it)
    When both are set to the same object, cookies will be sent from the object,
     and response cookies will be merged into it.
    '''
    headers = dict(headers)     # Note: Calling dict() on a dict will make a copy
    headers['Accept-Encoding'] = 'gzip, br'

    # prevent python version being leaked by urllib if User-Agent isn't provided
    #  (urllib will use ex. Python-urllib/3.6 otherwise)
    if 'User-Agent' not in headers and 'user-agent' not in headers and 'User-agent' not in headers:
        headers['User-Agent'] = 'Python-urllib'

    method = "GET"
    if data is not None:
        method = "POST"
        if isinstance(data, str):
            data = data.encode('ascii')
        elif not isinstance(data, bytes):
            data = urllib.parse.urlencode(data).encode('ascii')

    start_time = time.time()

    if cookiejar_send is not None or cookiejar_receive is not None:     # Use urllib
        req = urllib.request.Request(url, data=data, headers=headers)

        cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)

        if use_tor and settings.route_tor:
            opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9150), cookie_processor)
        else:
            opener = urllib.request.build_opener(cookie_processor)

        response = opener.open(req, timeout=timeout)
        response_time = time.time()


        content = response.read()

    else:           # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
        pool = get_pool(use_tor and settings.route_tor)

        response = pool.request(method, url, headers=headers, timeout=timeout, preload_content=False, decode_content=False)
        response_time = time.time()

        content = response.read()
        response.release_conn()

    read_finish = time.time()
    if report_text:
        print(report_text, '    Latency:', round(response_time - start_time,3), '    Read time:', round(read_finish - response_time,3))
    content = decode_content(content, response.getheader('Content-Encoding', default='identity'))

    if return_response:
        return content, response
    return content
Esempio n. 4
0
def fetch_url_response(url, headers=(), timeout=15, data=None,
                       cookiejar_send=None, cookiejar_receive=None,
                       use_tor=True, max_redirects=None):
    '''
    returns response, cleanup_function
    When cookiejar_send is set to a CookieJar object,
     those cookies will be sent in the request (but cookies in response will not be merged into it)
    When cookiejar_receive is set to a CookieJar object,
     cookies received in the response will be merged into the object (nothing will be sent from it)
    When both are set to the same object, cookies will be sent from the object,
     and response cookies will be merged into it.
    '''
    headers = dict(headers)     # Note: Calling dict() on a dict will make a copy
    if have_brotli:
        headers['Accept-Encoding'] = 'gzip, br'
    else:
        headers['Accept-Encoding'] = 'gzip'

    # prevent python version being leaked by urllib if User-Agent isn't provided
    #  (urllib will use ex. Python-urllib/3.6 otherwise)
    if 'User-Agent' not in headers and 'user-agent' not in headers and 'User-agent' not in headers:
        headers['User-Agent'] = 'Python-urllib'

    method = "GET"
    if data is not None:
        method = "POST"
        if isinstance(data, str):
            data = data.encode('utf-8')
        elif not isinstance(data, bytes):
            data = urllib.parse.urlencode(data).encode('utf-8')

    if cookiejar_send is not None or cookiejar_receive is not None:     # Use urllib
        req = urllib.request.Request(url, data=data, headers=headers)

        cookie_processor = HTTPAsymmetricCookieProcessor(cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)

        if use_tor and settings.route_tor:
            opener = urllib.request.build_opener(sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", settings.tor_port), cookie_processor)
        else:
            opener = urllib.request.build_opener(cookie_processor)

        response = opener.open(req, timeout=timeout)
        cleanup_func = (lambda r: None)

    else:           # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
        # default: Retry.DEFAULT = Retry(3)
        # (in connectionpool.py in urllib3)
        # According to the documentation for urlopen, a redirect counts as a
        # retry. So there are 3 redirects max by default.
        if max_redirects:
            retries = urllib3.Retry(3+max_redirects, redirect=max_redirects)
        else:
            retries = urllib3.Retry(3)
        pool = get_pool(use_tor and settings.route_tor)
        response = pool.request(method, url, headers=headers, body=data,
                                timeout=timeout, preload_content=False,
                                decode_content=False, retries=retries)
        cleanup_func = (lambda r: r.release_conn())

    return response, cleanup_func
Esempio n. 5
0
 def __init__(self,
              proxy_address='127.0.0.1',
              proxy_port=9050,
              *,
              user_agent=None):
     logger.info('Starting dummy browser')
     handler = sockshandler.SocksiPyHandler(socks.SOCKS5, proxy_address,
                                            proxy_port)
     self._opener = urllib.request.build_opener(handler)
     self._user_agent = user_agent
     self._open = True
     logger.info('Started dummy browser')
Esempio n. 6
0
 def test_urllib2_socks5_handler(self):
     content = b'zzz'
     self.test_server.response['data'] = content
     opener = urllib2.build_opener(
         sockshandler.SocksiPyHandler(socks.SOCKS5, PROXY_HOST_IP,
                                      SOCKS5_PROXY_PORT))
     res = opener.open(self.test_server.get_url())
     body = res.read()
     self.assertTrue(self.test_server.request['headers']
                     ['user-agent'].startswith('Python-urllib'))
     self.assertEqual('%s:%d' % (TEST_SERVER_HOST, TEST_SERVER_PORT),
                      self.test_server.request['headers']['host'])
     self.assertEqual(200, res.getcode())
     self.assertEqual(content, body)
Esempio n. 7
0
    def _additional_handlers(self):

        handlers = []

        if self.session.get('proxy'):
            protocol, host, port = self._get_proxy()

            if protocol and host and port:
                handlers.append(
                    sockshandler.SocksiPyHandler(protocol, host, port))
            else:
                raise ChannelException(messages.channels.error_proxy_format)

        return handlers
Esempio n. 8
0
def urllib2_handler_SOCKS5_test():
    opener = urllib2.build_opener(
        sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5, "127.0.0.1",
                                     1081))
    status = opener.open("http://api.externalip.net/ip/").getcode()
    assert status == 200
Esempio n. 9
0
def fetch_url(url,
              headers=(),
              timeout=15,
              report_text=None,
              data=None,
              cookiejar_send=None,
              cookiejar_receive=None,
              use_tor=True,
              return_response=False,
              debug_name=None):
    '''
    When cookiejar_send is set to a CookieJar object,
     those cookies will be sent in the request (but cookies in response will not be merged into it)
    When cookiejar_receive is set to a CookieJar object,
     cookies received in the response will be merged into the object (nothing will be sent from it)
    When both are set to the same object, cookies will be sent from the object,
     and response cookies will be merged into it.
    '''
    headers = dict(headers)  # Note: Calling dict() on a dict will make a copy
    if have_brotli:
        headers['Accept-Encoding'] = 'gzip, br'
    else:
        headers['Accept-Encoding'] = 'gzip'

    # prevent python version being leaked by urllib if User-Agent isn't provided
    #  (urllib will use ex. Python-urllib/3.6 otherwise)
    if 'User-Agent' not in headers and 'user-agent' not in headers and 'User-agent' not in headers:
        headers['User-Agent'] = 'Python-urllib'

    method = "GET"
    if data is not None:
        method = "POST"
        if isinstance(data, str):
            data = data.encode('ascii')
        elif not isinstance(data, bytes):
            data = urllib.parse.urlencode(data).encode('ascii')

    start_time = time.time()

    if cookiejar_send is not None or cookiejar_receive is not None:  # Use urllib
        req = urllib.request.Request(url, data=data, headers=headers)

        cookie_processor = HTTPAsymmetricCookieProcessor(
            cookiejar_send=cookiejar_send, cookiejar_receive=cookiejar_receive)

        if use_tor and settings.route_tor:
            opener = urllib.request.build_opener(
                sockshandler.SocksiPyHandler(socks.PROXY_TYPE_SOCKS5,
                                             "127.0.0.1", 9150),
                cookie_processor)
        else:
            opener = urllib.request.build_opener(cookie_processor)

        response = opener.open(req, timeout=timeout)
        response_time = time.time()

        content = response.read()

    else:  # Use a urllib3 pool. Cookies can't be used since urllib3 doesn't have easy support for them.
        pool = get_pool(use_tor and settings.route_tor)

        response = pool.request(method,
                                url,
                                headers=headers,
                                timeout=timeout,
                                preload_content=False,
                                decode_content=False)
        response_time = time.time()

        content = response.read()
        response.release_conn()

    if (response.status == 429 and content.startswith(b'<!DOCTYPE')
            and b'Our systems have detected unusual traffic' in content):
        ip = re.search(br'IP address: ((?:[\da-f]*:)+[\da-f]+|(?:\d+\.)+\d+)',
                       content)
        ip = ip.group(1).decode('ascii') if ip else None
        raise FetchError('429', reason=response.reason, ip=ip)

    elif response.status >= 400:
        raise FetchError(str(response.status), reason=response.reason, ip=None)

    read_finish = time.time()
    if report_text:
        print(report_text, '    Latency:', round(response_time - start_time,
                                                 3), '    Read time:',
              round(read_finish - response_time, 3))
    content = decode_content(
        content, response.getheader('Content-Encoding', default='identity'))

    if settings.debugging_save_responses and debug_name is not None:
        save_dir = os.path.join(settings.data_dir, 'debug')
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        with open(os.path.join(save_dir, debug_name), 'wb') as f:
            f.write(content)

    if return_response:
        return content, response
    return content
Esempio n. 10
0
def urllib2_handler_HTTP_test():
    opener = urllib2.build_opener(sockshandler.SocksiPyHandler(socks.HTTP, "127.0.0.1", 8080))
    status = opener.open("http://api.externalip.net/ip/").getcode()
    assert status == 200
Esempio n. 11
0
def build_urllib_opener(
        proxies=None, ssl_check_hostname=None,
        extra_handlers=[], extra_pre_handlers=[]):
    """
    A replacement to :py:func:`urllib.request.build_opener` that takes care of
    using current user's global settings (Keypirinha and/or system's) regarding
    network connections, by inserting and configuring one or several connection
    handlers (derived from :py:class:`urllib.request.BaseHandler`).

    Examples::

        # example 1
        opener = build_urllib_opener()
        with opener.open("http://httpbin.org/user-agent") as response:
            print(response.read())

        # example 2: HTTP proxy
        proxies = {'http': "http://*****:*****@127.0.0.1:8080"}
        opener = build_urllib_opener(proxies)
        with opener.open("http://httpbin.org/ip") as response:
            print(response.read())

    Args:
        proxies (dict): A dictionary of proxies to pass to the constructor of
            :py:class:`urllib.request.ProxyHandler`, if any. Notes:

            * ``None`` (default; **recommended**) means proxies configured by
              the user at Keypirinha's level, or by default at system level will
              be used.
            * An empty dictionary (i.e. ``{}``) means **no** proxy will be
              configured regardless of user or machine settings. Note that going
              against user's will is against Keypirinha's design policy!
            * See the notes below about ``SOCKS`` proxies.
            * See :py:func:`proxies_list_to_dict` if you need to convert a list
              of proxies URLs into a dictionary.

        extra_handlers (list): A list/tuple of extra handlers to **append** to
            the final handlers chain before passing it to
            :py:func:`urllib.request.build_opener`.

        extra_pre_handlers (list): A list/tuple of extra handlers to **prepend**
            to the final handlers chain before passing it to
            :py:func:`urllib.request.build_opener`.

            **CAUTION:** This parameter is here for convenience and you should
            use it only if you know what you are doing as it may interfere with
            the handlers added by this function.

        ssl_check_hostname (bool): Should the hostname be checked against
            received security certificate? This argument is equivalent to
            tweaking with :py:attr:`ssl.SSLContext.check_hostname` and
            :py:attr:`ssl.SSLContext.verify_mode`.

            Default behavior of the ``urllib`` module (i.e. ``None`` value) is
            to check the hostname unless explicitely specified here (boolean),
            in which case this function will either add an
            :py:class:`urllib.request.HTTPSHandler` handler with the appropriate
            arguments to the chain, or, if caller already added a
            :py:class:`urllib.request.HTTPSHandler` handler (either in the
            *extra_handlers* or *extra_pre_handlers* list), it will be modified
            accordingly.

    Returns:
        UrllibOpener: A
        :py:class:`urllib.request.OpenerDirector`-compatible opener object.

    Note:
        Notes about ``SOCKS`` proxy support:

        * Support for ``SOCKS`` proxy (v4 and v5) is **experimental** and uses
          the `PySocks <https://github.com/Anorov/PySocks>`_ third-party module
          under the hood.
        * DNS requests do not go through the proxy server.
        * IPv6 connections through the proxy server are not supported.
        * Tests have shown that if proxies for several schemes have been
          specified, ``UNKNOWN_PROTOCOL`` SSL error may occurs under some
          circumstances. For that reason, if a ``SOCKS`` proxy is specified, it
          takes precedence over the other proxy servers that might be in the
          dictionary as well so they will be purely ignored by this function in
          favor of the ``SOCKS`` proxy.
    """
    def _has_handler(handler_type):
        for h in (*extra_pre_handlers, *extra_handlers):
            if isinstance(h, handler_type):
                return h
        return None

    own_handlers = []

    # get proxies from the application if needed
    if proxies is None:
        proxies = proxies_to_dict(kp.settings().get_multiline(
            "proxy", section="network", fallback=[], keep_empty_lines=False))

    # proxy servers
    if proxies is not None:
        # socks proxy
        # in case user specified a "socks" proxy, we have to extract it from the
        # dict and insert it as a different handler in the final handlers chain
        # since it is not supported by the standard urrlib module
        got_socks_proxy = False
        for scheme, proxy_url in proxies.items():
            scheme_lc = scheme.lower()
            if scheme_lc not in ("socks", "socks4", "socks5"):
                continue

            if scheme_lc == "socks4":
                proxy_type = socks.PROXY_TYPE_SOCKS4
            else:
                proxy_type = socks.PROXY_TYPE_SOCKS5

            proxy_info = urllib.parse.urlsplit(proxy_url)
            if not proxy_info.hostname:
                raise ValueError("malformed proxy url: {}".format(proxy_url))
            if not proxy_info.port:
                raise ValueError("port number required for proxy: {}".format(proxy_url))

            # SOCKS5 only: DNS queries should be performed on the remote side
            # (default behavior in "socks" module). Unfortunately, in practive,
            # that does not prevent DNS requests to be made outside of the SOCKS
            # tunnel as it would require monkey-patching the "socket" module and
            # would not work in some cases anyway.
            # More info: https://github.com/Anorov/PySocks/issues/22
            proxy_rdns = True

            # note to self: sockshandler.SocksiPyHandler is derived from
            # urllib.request.HTTPSHandler!!!
            own_handlers.append(sockshandler.SocksiPyHandler(
                        proxy_type, proxy_info.hostname, proxy_info.port,
                        proxy_rdns, proxy_info.username, proxy_info.password))

            got_socks_proxy = True
            break

        # Tests have shown that if mixed proxies are specified (i.e. "http" +
        # "https" + "socks") and there's a SOCKS proxy in the list, "SSL:
        # UNKNOWN_PROTOCOL" errors occur with HTTPS urls. As a result, when a
        # SOCKS proxy is specified, it must be the only proxy in the list.
        if not got_socks_proxy:
            own_handlers.append(urllib.request.ProxyHandler(proxies))

    if ssl_check_hostname is None:
        # allow user to override default behavior if needed
        ssl_check_hostname = kp.settings().get_bool(
                        "ssl_check_hostname", section="network", fallback=None)

    if ssl_check_hostname is not None:
        https_handler = _has_handler(urllib.request.HTTPSHandler)
        if https_handler is not None:
            if ssl_check_hostname:
                https_handler._context.check_hostname = True
                https_handler._context.verify_mode = ssl.CERT_REQUIRED
            else:
                https_handler._context.check_hostname = False
                https_handler._context.verify_mode = ssl.CERT_NONE
        else:
            ssl_ctx = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH)
            if ssl_check_hostname:
                # This is the default behavior when create_default_context() is
                # passed the SERVER_AUTH purpose.
                # "Explicit is better than implicit" (c)
                ssl_ctx.check_hostname = True
                ssl_ctx.verify_mode = ssl.CERT_REQUIRED
            else:
                ssl_ctx.check_hostname = False
                ssl_ctx.verify_mode = ssl.CERT_NONE
            own_handlers.append(urllib.request.HTTPSHandler(context=ssl_ctx))

    return UrllibOpener(urllib.request.build_opener(
                        *extra_pre_handlers, *own_handlers, *extra_handlers))
Esempio n. 12
0
def urllib2_handler_SOCKS5_test():
    opener = urllib2.build_opener(sockshandler.SocksiPyHandler(socks.SOCKS5, "127.0.0.1", 1081))
    status = opener.open("http://ifconfig.me/ip").getcode()
    assert status == 200