Example #1
0
    def fetch(self, request, opener=None, summary=None):

        if not self.__enable_http:
            return (None, None)

        if opener is None:
            opener = OpenerDirector()
            opener.add_handler(HTTPDefaultErrorHandler())
            opener.add_handler(HTTPSHandler())

        t = time.clock()
        response = opener.open(request)
        body = response.read()
        t = timedelta(seconds=time.clock() - t)
        url = request.get_full_url()
        self.__context.get_logger().info('HTTP time: %s\n%s' % (t, url))

        if self.__log_http:
            log_dir = os.path.join(self.__context.get_config_dir(), 'http-log')
            makedirs(log_dir)
            log_file = os.path.join(log_dir,
                                    datetime.utcnow().strftime(
                                        '%Y-%m-%d-%H-%M-%S-%f'))
            if summary is not None:
                log_file += '-' + _safe_str(summary)
            fp = open(log_file, 'w')
            fp.write('\n\n'.join([
                request.get_full_url(),
                request.get_data() or 'No request data',
                body or 'No response body',
            ]))
            fp.close()

        return (response, body)
Example #2
0
 def test_proxy_https(self):
     o = OpenerDirector()
     ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128"))
     o.add_handler(ph)
     meth_spec = [[("https_open", "return response")]]
     handlers = add_ordered_mock_handlers(o, meth_spec)
     req = Request("https://www.example.com/")
     self.assertEqual(req.get_host(), "www.example.com")
     r = o.open(req)
     self.assertEqual(req.get_host(), "proxy.example.com:3128")
     self.assertEqual([(handlers[0], "https_open")], [tup[0:2] for tup in o.calls])
Example #3
0
def post_info(url, **kwargs):
    # opener = build_opener()
    opener = OpenerDirector()
    opener.add_handler(HTTPHandler())
    form_data = urlencode(kwargs)
    req = Request(
        url,
        data=form_data,
        headers={
            'Content-Type': 'application/x-www-form-urlencoded',
        }
    )
    return opener.open(req)
Example #4
0
    def test_proxy_https(self):
        o = OpenerDirector()
        ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128"))
        o.add_handler(ph)
        meth_spec = [[("https_open", "return response")]]
        handlers = add_ordered_mock_handlers(o, meth_spec)

        req = Request("https://www.example.com/")
        self.assertEqual(req.get_host(), "www.example.com")
        r = o.open(req)
        self.assertEqual(req.get_host(), "proxy.example.com:3128")
        self.assertEqual([(handlers[0], "https_open")],
                         [tup[0:2] for tup in o.calls])
Example #5
0
 def test_proxy_no_proxy(self):
     os.environ['no_proxy'] = 'python.org'
     o = OpenerDirector()
     ph = urllib.request.ProxyHandler(dict(http="proxy.example.com"))
     o.add_handler(ph)
     req = Request("http://www.perl.org/")
     self.assertEqual(req.get_host(), "www.perl.org")
     r = o.open(req)
     self.assertEqual(req.get_host(), "proxy.example.com")
     req = Request("http://www.python.org")
     self.assertEqual(req.get_host(), "www.python.org")
     r = o.open(req)
     self.assertEqual(req.get_host(), "www.python.org")
     del os.environ['no_proxy']
Example #6
0
 def test_basic_auth(self, quote_char='"'):
     opener = OpenerDirector()
     password_manager = MockPasswordManager()
     auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
     realm = "ACME Widget Store"
     http_handler = MockHTTPHandler(
         401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
         (quote_char, realm, quote_char) )
     opener.add_handler(auth_handler)
     opener.add_handler(http_handler)
     self._test_basic_auth(opener, auth_handler, "Authorization",
                           realm, http_handler, password_manager,
                           "http://acme.example.com/protected",
                           "http://acme.example.com/protected",
                           )
Example #7
0
 def test_proxy_basic_auth(self):
     opener = OpenerDirector()
     ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
     opener.add_handler(ph)
     password_manager = MockPasswordManager()
     auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
     realm = "ACME Networks"
     http_handler = MockHTTPHandler(
         407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
     opener.add_handler(auth_handler)
     opener.add_handler(http_handler)
     self._test_basic_auth(opener, auth_handler, "Proxy-authorization",
                           realm, http_handler, password_manager,
                           "http://acme.example.com:3128/protected",
                           "proxy.example.com:3128",
                           )
Example #8
0
    def test_badly_named_methods(self):
        # test work-around for three methods that accidentally follow the
        # naming conventions for handler methods
        # (*_open() / *_request() / *_response())

        # These used to call the accidentally-named methods, causing a
        # TypeError in real code; here, returning self from these mock
        # methods would either cause no exception, or AttributeError.

        from urllib.error import URLError

        o = OpenerDirector()
        meth_spec = [[("do_open", "return self"), ("proxy_open", "return self")], [("redirect_request", "return self")]]
        handlers = add_ordered_mock_handlers(o, meth_spec)
        o.add_handler(urllib.request.UnknownHandler())
        for scheme in "do", "proxy", "redirect":
            self.assertRaises(URLError, o.open, scheme + "://example.com/")
Example #9
0
    def test_handler_order(self):
        o = OpenerDirector()
        handlers = []
        for meths, handler_order in [
            ([("http_open", "return self")], 500),
            (["http_open"], 0),
            ]:
            class MockHandlerSubclass(MockHandler): pass
            h = MockHandlerSubclass(meths)
            h.handler_order = handler_order
            handlers.append(h)
            o.add_handler(h)

        r = o.open("http://example.com/")
        # handlers called in reverse order, thanks to their sort order
        self.assertEqual(o.calls[0][0], handlers[1])
        self.assertEqual(o.calls[1][0], handlers[0])
Example #10
0
 def test_basic_auth(self, quote_char='"'):
     opener = OpenerDirector()
     password_manager = MockPasswordManager()
     auth_handler = urllib.request.HTTPBasicAuthHandler(password_manager)
     realm = "ACME Widget Store"
     http_handler = MockHTTPHandler(
         401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
         (quote_char, realm, quote_char))
     opener.add_handler(auth_handler)
     opener.add_handler(http_handler)
     self._test_basic_auth(
         opener,
         auth_handler,
         "Authorization",
         realm,
         http_handler,
         password_manager,
         "http://acme.example.com/protected",
         "http://acme.example.com/protected",
     )
Example #11
0
    def test_handler_order(self):
        o = OpenerDirector()
        handlers = []
        for meths, handler_order in [
            ([("http_open", "return self")], 500),
            (["http_open"], 0),
        ]:

            class MockHandlerSubclass(MockHandler):
                pass

            h = MockHandlerSubclass(meths)
            h.handler_order = handler_order
            handlers.append(h)
            o.add_handler(h)

        r = o.open("http://example.com/")
        # handlers called in reverse order, thanks to their sort order
        self.assertEqual(o.calls[0][0], handlers[1])
        self.assertEqual(o.calls[1][0], handlers[0])
Example #12
0
    def test_badly_named_methods(self):
        # test work-around for three methods that accidentally follow the
        # naming conventions for handler methods
        # (*_open() / *_request() / *_response())

        # These used to call the accidentally-named methods, causing a
        # TypeError in real code; here, returning self from these mock
        # methods would either cause no exception, or AttributeError.

        from urllib.error import URLError

        o = OpenerDirector()
        meth_spec = [
            [("do_open", "return self"), ("proxy_open", "return self")],
            [("redirect_request", "return self")],
        ]
        handlers = add_ordered_mock_handlers(o, meth_spec)
        o.add_handler(urllib.request.UnknownHandler())
        for scheme in "do", "proxy", "redirect":
            self.assertRaises(URLError, o.open, scheme + "://example.com/")
Example #13
0
 def test_proxy_https_proxy_authorization(self):
     o = OpenerDirector()
     ph = urllib.request.ProxyHandler(dict(https="proxy.example.com:3128"))
     o.add_handler(ph)
     https_handler = MockHTTPSHandler()
     o.add_handler(https_handler)
     req = Request("https://www.example.com/")
     req.add_header("Proxy-Authorization", "FooBar")
     req.add_header("User-Agent", "Grail")
     self.assertEqual(req.get_host(), "www.example.com")
     self.assertTrue(req._tunnel_host is None)
     r = o.open(req)
     # Verify Proxy-Authorization gets tunneled to request.
     # httpsconn req_headers do not have the Proxy-Authorization header but
     # the req will have.
     self.assertFalse(("Proxy-Authorization", "FooBar") in https_handler.httpconn.req_headers)
     self.assertTrue(("User-Agent", "Grail") in https_handler.httpconn.req_headers)
     self.assertFalse(req._tunnel_host is None)
     self.assertEqual(req.get_host(), "proxy.example.com:3128")
     self.assertEqual(req.get_header("Proxy-authorization"), "FooBar")
Example #14
0
 def test_proxy_basic_auth(self):
     opener = OpenerDirector()
     ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128"))
     opener.add_handler(ph)
     password_manager = MockPasswordManager()
     auth_handler = urllib.request.ProxyBasicAuthHandler(password_manager)
     realm = "ACME Networks"
     http_handler = MockHTTPHandler(
         407, 'Proxy-Authenticate: Basic realm="%s"\r\n\r\n' % realm)
     opener.add_handler(auth_handler)
     opener.add_handler(http_handler)
     self._test_basic_auth(
         opener,
         auth_handler,
         "Proxy-authorization",
         realm,
         http_handler,
         password_manager,
         "http://acme.example.com:3128/protected",
         "proxy.example.com:3128",
     )
Example #15
0
def build_tor_opener(guard, hops_count=3, debuglevel=0):
    opener = OpenerDirector()
    default_classes = [
        ProxyHandler, UnknownHandler, HTTPDefaultErrorHandler,
        HTTPRedirectHandler, HTTPErrorProcessor
    ]
    for cls in default_classes:
        opener.add_handler(cls())
    opener.add_handler(TorHTTPHandler(guard, hops_count,
                                      debuglevel=debuglevel))
    opener.add_handler(
        TorHTTPSHandler(guard, hops_count, debuglevel=debuglevel))
    opener.addheaders = []
    return opener
def build_opener(*handlers, **kw):
    """Create an opener object from a list of handlers.

    The opener will use several default handlers, including support
    for HTTP and FTP.

    If any of the handlers passed as arguments are subclasses of the
    default handlers, the default handlers will not be used.
    """
    def isclass(obj):
        return isinstance(obj, class_type_) or hasattr(obj, "__bases__")

    opener = OpenerDirector()
    default_classes = [
        ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler,
        HTTPRedirectHandler, FTPHandler, FileHandler, HTTPErrorProcessor
    ]
    check_classes = list(default_classes)
    check_classes.append(HTTPSContextHandler)
    skip = []
    for klass in check_classes:
        for check in handlers:
            if isclass(check):
                if issubclass(check, klass):
                    skip.append(klass)
            elif isinstance(check, klass):
                skip.append(klass)

    for klass in default_classes:
        if klass not in skip:
            opener.add_handler(klass())

    # Pick up SSL context from keyword settings
    ssl_context = kw.get('ssl_context')

    # Add the HTTPS handler with ssl_context
    if HTTPSContextHandler not in skip:
        opener.add_handler(HTTPSContextHandler(ssl_context))

    for h in handlers:
        if isclass(h):
            h = h()
        opener.add_handler(h)

    return opener
Example #17
0
def build_opener(*handlers, **kw):
    """Create an opener object from a list of handlers.

    The opener will use several default handlers, including support
    for HTTP and FTP.

    If any of the handlers passed as arguments are subclasses of the
    default handlers, the default handlers will not be used.
    """
    def isclass(obj):
        return isinstance(obj, class_type_) or hasattr(obj, "__bases__")

    opener = OpenerDirector()
    default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
                       HTTPDefaultErrorHandler, HTTPRedirectHandler,
                       FTPHandler, FileHandler, HTTPErrorProcessor]
    check_classes = list(default_classes)
    check_classes.append(HTTPSContextHandler)
    skip = []
    for klass in check_classes:
        for check in handlers:
            if isclass(check):
                if issubclass(check, klass):
                    skip.append(klass)
            elif isinstance(check, klass):
                skip.append(klass)

    for klass in default_classes:
        if klass not in skip:
            opener.add_handler(klass())
            
    # Pick up SSL context from keyword settings
    ssl_context = kw.get('ssl_context')
    
    # Add the HTTPS handler with ssl_context
    if HTTPSContextHandler not in skip:
        opener.add_handler(HTTPSContextHandler(ssl_context))

    for h in handlers:
        if isclass(h):
            h = h()
        opener.add_handler(h)

    return opener
Example #18
0
def opener_setup():
    from urllib.request import OpenerDirector, ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler, DataHandler
    opener = OpenerDirector()
    for klass in [ProxyHandler, UnknownHandler, HTTPHandler, HTTPDefaultErrorHandler, DataHandler]:
        opener.add_handler(klass())
    install_opener(opener)
                                      type=socket.SOCK_STREAM,
                                      proto=0,
                                      fileno=None)
            self.sock.settimeout(self.timeout)
            self.sock.connect(self.socket_file)

    def socket_open(self, req):
        socket_file, path = req.selector.split(':', 1)
        req.host = socket_file
        req.selector = path
        return self.do_open(self.SocketFileToHttpConnectionAdaptor, req)


better_urllib_get = OpenerDirector()
better_urllib_get.addheaders = DEFAULT_HEADERS.copy()
better_urllib_get.add_handler(HTTPHandler())
better_urllib_get.add_handler(HTTPSHandler())
better_urllib_get.add_handler(SocketFileHandler())

# Util functions
#############################################################################################


@lru_cache()
def get_url(url):
    response = better_urllib_get.open(url, timeout=timeout)
    return process_urllib_response(response), response.status


def process_urllib_response(response):
    response_bytes = response.read()
Example #20
0
        auth_token = self._get_outh2_token(www_authenticate_header)

        request.add_unredirected_header('Authorization',
                                        'Bearer ' + auth_token)
        return self.parent.open(request, timeout=request.timeout)


# Got some help from this example https://gist.github.com/FiloSottile/2077115
class HeadRequest(Request):
    def get_method(self):
        return "HEAD"


better_urllib_get = OpenerDirector()
better_urllib_get.addheaders = DEFAULT_HEADERS.copy()
better_urllib_get.add_handler(HTTPHandler())
better_urllib_get.add_handler(HTTPSHandler())
better_urllib_get.add_handler(HTTPRedirectHandler())
better_urllib_get.add_handler(SocketFileHandler())
better_urllib_get.add_handler(Oauth2TokenAuthHandler())


class RegistryError(Exception):
    def __init__(self, response):
        self.response_obj = response


# Util functions
#############################################################################################
def parse_thresholds(spec, include_units=True, units_required=True):
    """
Example #21
0
def build_test_opener(*handler_instances):
    opener = OpenerDirector()
    for h in handler_instances:
        opener.add_handler(h)
    return opener
Example #22
0
def build_test_opener(*handler_instances):
    opener = OpenerDirector()
    for h in handler_instances:
        opener.add_handler(h)
    return opener
Example #23
0
 def resolve(self, item, captcha_cb=None, select_cb=None):
     item = item.copy()
     util.init_urllib()
     url = self._url(item['url'])
     page = ''
     try:
         opener = OpenerDirector()
         opener.add_handler(HTTPHandler())
         opener.add_handler(UnknownHandler())
         install_opener(opener)
         request = Request(url)
         request.add_header('User-Agent', util.UA)
         response = urlopen(request)
         page = response.read()
         response.close()
     except HTTPError as e:
         traceback.print_exc()
         return
     data = util.substr(page, '<form method=post target=\"iframe_dwn\"',
                        '</form>')
     action = re.search('action=(?P<url>[^>]+)', data,
                        re.IGNORECASE | re.DOTALL)
     img = re.search('<img src=\"(?P<url>[^\"]+)', data,
                     re.IGNORECASE | re.DOTALL)
     if img and action:
         sessid = []
         for cookie in re.finditer('(PHPSESSID=[^\;]+)',
                                   response.headers.get('Set-Cookie'),
                                   re.IGNORECASE | re.DOTALL):
             sessid.append(cookie.group(1))
         # we have to download image ourselves
         image = util.request(self._url(img.group('url')),
                              headers={
                                  'Referer': url,
                                  'Cookie': sessid[-1]
                              })
         img_file = os.path.join(self.tmp_dir, 'captcha.png')
         util.save_data_to_file(image, img_file)
         code = None
         if captcha_cb:
             code = captcha_cb({'id': '0', 'img': img_file})
         if not code:
             self.info('No captcha received, exit')
             return
         request = urllib.urlencode({'code': code})
         req = Request(self._url(action.group('url')), request)
         req.add_header('User-Agent', util.UA)
         req.add_header('Referer', url)
         req.add_header('Cookie', sessid[-1])
         try:
             resp = urlopen(req)
             if resp.code == 302:
                 file_url = resp.headers.get('location')
             else:
                 file_url = resp.geturl()
             if file_url.find(action.group('url')) > 0:
                 msg = resp.read()
                 resp.close()
                 js_msg = re.search('alert\(\'(?P<msg>[^\']+)', msg,
                                    re.IGNORECASE | re.DOTALL)
                 if js_msg:
                     raise ResolveException(js_msg.group('msg'))
                 self.error(msg)
                 raise ResolveException(
                     'Nelze ziskat soubor, zkuste to znovu')
             resp.close()
             if file_url.find('data') >= 0 or file_url.find(
                     'download_free') > 0:
                 item['url'] = file_url
                 return item
             self.error('wrong captcha, retrying')
             return self.resolve(item, captcha_cb, select_cb)
         except HTTPError:
             traceback.print_exc()
             return
Example #24
0
        auth_token = self._get_outh2_token(www_authenticate_header)

        request.add_unredirected_header('Authorization', 'Bearer ' + auth_token)
        return self.parent.open(request, timeout=request.timeout)


# Got some help from this example https://gist.github.com/FiloSottile/2077115
class HeadRequest(Request):
    def get_method(self):
        return "HEAD"


better_urllib_get = OpenerDirector()
better_urllib_get.addheaders = DEFAULT_HEADERS.copy()
better_urllib_get.add_handler(HTTPHandler())
better_urllib_get.add_handler(HTTPSHandler())
better_urllib_get.add_handler(HTTPRedirectHandler())
better_urllib_get.add_handler(SocketFileHandler())
better_urllib_get.add_handler(Oauth2TokenAuthHandler())


class RegistryError(Exception):
    def __init__(self, response):
        self.response_obj = response


# Util functions
#############################################################################################
def parse_thresholds(spec, include_units=True, units_required=True):
    """
Example #25
0
import sys
from urllib.request import OpenerDirector, HTTPRedirectHandler, HTTPSHandler, urlretrieve

REPO_URL = 'https://github.com/facebook/rocksdb'

assert len(sys.argv) > 1, 'Please provide a download directory, e.g. /build'
assert len(sys.argv) < 3, f'Please omit the unexpected arguments: {sys.argv[2:]}'
download_dir = sys.argv[1]

od = OpenerDirector()
od.add_handler(HTTPSHandler())
od.add_handler(HTTPRedirectHandler())

resp = od.open(f'{REPO_URL}/releases/latest/download/')
tag_name = resp.headers['location'].split('/')[-1]

release_url = f'{REPO_URL}/archive/{tag_name}.tar.gz'
file_path, headers = urlretrieve(release_url, f'{download_dir}/latest.tar.gz')

print(f'RocksDB {tag_name} was downloaded to {file_path}', file=sys.stderr)
print(tag_name[1:])
Example #26
0
        def connect(self):
            self.sock = socket.socket(family=socket.AF_UNIX, type=socket.SOCK_STREAM, proto=0, fileno=None)
            self.sock.settimeout(self.timeout)
            self.sock.connect(self.socket_file)

    def socket_open(self, req):
        socket_file, path = req.selector.split(':', 1)
        req.host = socket_file
        req.selector = path
        return self.do_open(self.SocketFileToHttpConnectionAdaptor, req)


better_urllib_get = OpenerDirector()
better_urllib_get.addheaders = DEFAULT_HEADERS.copy()
better_urllib_get.add_handler(HTTPHandler())
better_urllib_get.add_handler(HTTPSHandler())
better_urllib_get.add_handler(SocketFileHandler())


# Util functions
#############################################################################################


@lru_cache()
def get_url(url):
    response = better_urllib_get.open(url, timeout=timeout)
    return process_urllib_response(response), response.status


def process_urllib_response(response):