Exemple #1
0
def main():
    thing = mything()

    # Mandatory first argument is a URL to fetch over Tor (or whatever
    # SOCKS proxy that is running on localhost:9050).
    url = sys.argv[1]
    proxy = (None, 'localhost', 9050, True, None, None)

    f = client.HTTPClientFactory(url)
    f.deferred.addCallback(thing.clientcb)
    sw = thing.sockswrapper(proxy, url)
    d = sw.connect(f)
    d.addCallback(thing.wrappercb)
    thing.npages += 1

    # Optional second argument is a URL to fetch over whatever SOCKS
    # proxy that is running on localhost:1080 (possibly `twistd -n socks').
    if len(sys.argv) > 2:
        url2 = sys.argv[2]
        proxy2 = (None, 'localhost', 1080, True, None, None)
        f2 = client.HTTPClientFactory(url)
        f2.deferred.addCallback(thing.clientcb)
        sw2 = thing.sockswrapper(proxy2, url2)
        d2 = sw2.connect(f2)
        d2.addCallback(thing.wrappercb)
        thing.npages += 1

    reactor.run()
Exemple #2
0
    def ooni_main(self, cmd):
        # We don't have the Command object so cheating for now.
        url = cmd.hostname

        # FIXME: validate that url is on the form scheme://host[:port]/path
        scheme, host, port, path = client._parse(url)

        ctrl_dest = self.endpoint(scheme, host, port)
        if not ctrl_dest:
            raise Exception('unsupported scheme %s in %s' % (scheme, url))
        if cmd.controlproxy:
            assert scheme != 'https', "no support for proxied https atm, sorry"
            _, proxy_host, proxy_port, _ = client._parse(cmd.controlproxy)
            control = SOCKSWrapper(reactor, proxy_host, proxy_port, ctrl_dest)
            print "proxy: ", proxy_host, proxy_port
        else:
            control = ctrl_dest
        f = client.HTTPClientFactory(url)
        f.deferred.addCallback(lambda x: self.cb('control', x))
        control.connect(f)

        exp_dest = self.endpoint(scheme, host, port)
        if not exp_dest:
            raise Exception('unsupported scheme %s in %s' % (scheme, url))
        # FIXME: use the experiment proxy if there is one
        experiment = exp_dest
        f = client.HTTPClientFactory(url)
        f.deferred.addCallback(lambda x: self.cb('experiment', x))
        experiment.connect(f)

        reactor.run()
Exemple #3
0
    def ooni_main(self):
        # We don't have the Command object so cheating for now.
        url = 'http://check.torproject.org/'
        self.controlproxy = 'socks4a://127.0.0.1:9050'
        self.experimentalproxy = ''

        if not re.match("[a-zA-Z0-9]+\:\/\/[a-zA-Z0-9]+", url):
            return None
        scheme, host, port, path = client._parse(url)

        ctrl_dest = self.endpoint(scheme, host, port)
        if not ctrl_dest:
            raise Exception('unsupported scheme %s in %s' % (scheme, url))
        if self.controlproxy:
            _, proxy_host, proxy_port, _ = client._parse(self.controlproxy)
            control = SOCKSWrapper(reactor, proxy_host, proxy_port, ctrl_dest)
        else:
            control = ctrl_dest
        f = client.HTTPClientFactory(url)
        f.deferred.addCallback(lambda x: self.cb('control', x))
        control.connect(f)

        exp_dest = self.endpoint(scheme, host, port)
        if not exp_dest:
            raise Exception('unsupported scheme %s in %s' % (scheme, url))
        # FIXME: use the experiment proxy if there is one
        experiment = exp_dest
        f = client.HTTPClientFactory(url)
        f.deferred.addCallback(lambda x: self.cb('experiment', x))
        experiment.connect(f)

        reactor.run()
Exemple #4
0
def getPagePrxoy(url, proxy=None, contextFactory=None, *args, **kwargs):
    '''
    proxy=
    {
    host:192.168.1.111,
    port:6666
    }
    '''
    kwargs["timeout"] = 60
    if proxy is None:
        scheme, host, port, path = client._parse(url)
        factory = client.HTTPClientFactory(url, *args, **kwargs)
        if scheme == b'https':
            from twisted.internet import ssl
            if contextFactory is None:
                contextFactory = ssl.ClientContextFactory()
            reactor.connectSSL(client.nativeString(host), port, factory,
                               contextFactory)
        else:
            reactor.connectTCP(client.nativeString(host), port, factory)
        return factory.deferred
    else:
        factory = client.HTTPClientFactory(url, *args, **kwargs)
        reactor.connectTCP(proxy["host"], proxy["port"], factory)
        return factory.deferred
Exemple #5
0
def get_page_with_header(url, contextFactory=None, *args, **kwargs):
    """
    Download a web page as a string.

    Download a page. Return a deferred, which will callback with a
    page (as a string) or errback with a description of the error.

    See HTTPClientFactory to see what extra args can be passed.
    """
    if type(url) == str:
        url = url.encode("utf-8")
    purl = urllib.parse.urlparse(url)
    factory = client.HTTPClientFactory(url, *args, **kwargs)
    if purl.scheme == 'https':
        from twisted.internet import ssl
        if contextFactory is None:
            contextFactory = ssl.ClientContextFactory()
        reactor.connectSSL(purl.hostname, purl.port or 443, factory,
                           contextFactory)
    else:
        reactor.connectTCP(purl.hostname, purl.port or 80, factory)

    def cb(page):
        return defer.succeed((page, factory.response_headers))

    factory.deferred.addCallback(cb)
    return factory.deferred
    def getProxyPage(url,
                     contextFactory=None,
                     host=None,
                     port=None,
                     status=None,
                     *args,
                     **kwargs):
        """Download a web page as a string. (modified from twisted.web.client.getPage)

        Download a page. Return a deferred, which will callback with a
        page (as a string) or errback with a description of the error.

        See HTTPClientFactory to see what extra args can be passed.
        """
        if status > 300 and status < 304:
            factory = RedirHTTPClientFactory(url, *args, **kwargs)
        else:
            factory = client.HTTPClientFactory(url, *args, **kwargs)

        host = host or factory.host
        # We should connect to different port taken from URI if specified
        port = factory.port or port

        if factory.scheme == 'https':
            if contextFactory is None:
                contextFactory = SSLClientContextFactory(factory.host)
            reactor.connectSSL(host, port, factory, contextFactory)
        else:
            reactor.connectTCP(host, port, factory)
        return factory.deferred
Exemple #7
0
    def get_page(self, contextFactory=None, description=None, *args, **kwargs):
        if description is None:
            description = self.url

        scheme, _, _, _ = self.url_parse(self.url)
        factory = txwebclient.HTTPClientFactory(self.url, *args, **kwargs)
        if scheme == 'https':
            from twisted.internet import ssl
            if contextFactory is None:
                contextFactory = ssl.ClientContextFactory()
            if self.use_proxy:
                reactor.connectSSL(self.proxy_host, self.proxy_port, factory,
                                   contextFactory)
            else:
                reactor.connectSSL(self.host, self.port, factory,
                                   contextFactory)
        else:
            if self.use_proxy:
                reactor.connectTCP(self.proxy_host, self.proxy_port, factory)
            else:
                reactor.connectTCP(self.host, self.port, factory)

        if self.return_headers:
            return factory.deferred.addCallback(
                lambda page: (page, factory.response_headers))
        else:
            return factory.deferred
 def testCookieHeaderParsing(self):
     factory = client.HTTPClientFactory('http://foo.example.com/')
     proto = factory.buildProtocol('127.42.42.42')
     proto.transport = FakeTransport()
     proto.connectionMade()
     for line in [
             '200 Ok',
             'Squash: yes',
             'Hands: stolen',
             'Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/; expires=Wednesday, 09-Nov-99 23:12:40 GMT',
             'Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/',
             'Set-Cookie: SHIPPING=FEDEX; path=/foo',
             '',
             'body',
             'more body',
     ]:
         proto.dataReceived(line + '\r\n')
     self.assertEquals(proto.transport.data, [
         'GET / HTTP/1.0\r\n', 'Host: foo.example.com\r\n',
         'User-Agent: Twisted PageGetter\r\n', '\r\n'
     ])
     self.assertEquals(
         factory.cookies, {
             'CUSTOMER': 'WILE_E_COYOTE',
             'PART_NUMBER': 'ROCKET_LAUNCHER_0001',
             'SHIPPING': 'FEDEX',
         })
 def testFactoryInfo(self):
     url = self.getURL('file')
     scheme, host, port, path = client._parse(url)
     factory = client.HTTPClientFactory(url)
     reactor.connectSSL(host, port, factory, ssl.ClientContextFactory())
     # The base class defines _cbFactoryInfo correctly for this
     return factory.deferred.addCallback(self._cbFactoryInfo, factory)
Exemple #10
0
 def test_earlyHeaders(self):
     """
     When a connection is made, L{HTTPPagerGetter} sends the headers from
     its factory's C{headers} dict.  If I{Host} or I{Content-Length} is
     present in this dict, the values are not sent, since they are sent with
     special values before the C{headers} dict is processed.  If
     I{User-Agent} is present in the dict, it overrides the value of the
     C{agent} attribute of the factory.  If I{Cookie} is present in the
     dict, its value is added to the values from the factory's C{cookies}
     attribute.
     """
     factory = client.HTTPClientFactory(
         b'http://foo/bar',
         agent=b"foobar",
         cookies={b'baz': b'quux'},
         postdata=b"some data",
         headers={
             b'Host': b'example.net',
             b'User-Agent': b'fooble',
             b'Cookie': b'blah blah',
             b'Content-Length': b'12981',
             b'Useful': b'value'})
     transport = StringTransport()
     protocol = client.HTTPPageGetter()
     protocol.factory = factory
     protocol.makeConnection(transport)
     result = transport.value()
     for expectedHeader in [
         b"Host: example.net\r\n",
         b"User-Agent: foobar\r\n",
         b"Content-Length: 9\r\n",
         b"Useful: value\r\n",
         b"connection: close\r\n",
         b"Cookie: blah blah; baz=quux\r\n"]:
         self.assertIn(expectedHeader, result)
    def getProxyPage(url,
                     contextFactory=None,
                     host=None,
                     port=None,
                     *args,
                     **kwargs):
        """Download a web page as a string. (modified from twisted.web.client.getPage)
    
        Download a page. Return a deferred, which will callback with a
        page (as a string) or errback with a description of the error.
    
        See HTTPClientFactory to see what extra args can be passed.
        """

        factory = client.HTTPClientFactory(url, *args, **kwargs)

        host = host or factory.host
        port = port or factory.port

        if factory.scheme == 'https':
            from twisted.internet import ssl
            if contextFactory is None:
                contextFactory = ssl.ClientContextFactory()
            reactor.connectSSL(host, port, factory, contextFactory)
        else:
            reactor.connectTCP(host, port, factory)
        return factory.deferred
Exemple #12
0
 def testCookieHeaderParsing(self):
     factory = client.HTTPClientFactory(b'http://foo.example.com/')
     proto = factory.buildProtocol('127.42.42.42')
     transport = StringTransport()
     proto.makeConnection(transport)
     for line in [
         b'200 Ok',
         b'Squash: yes',
         b'Hands: stolen',
         b'Set-Cookie: CUSTOMER=WILE_E_COYOTE; path=/; expires=Wednesday, 09-Nov-99 23:12:40 GMT',
         b'Set-Cookie: PART_NUMBER=ROCKET_LAUNCHER_0001; path=/',
         b'Set-Cookie: SHIPPING=FEDEX; path=/foo',
         b'',
         b'body',
         b'more body',
         ]:
         proto.dataReceived(line + b'\r\n')
     self.assertEqual(transport.value(),
                      b'GET / HTTP/1.0\r\n'
                      b'Host: foo.example.com\r\n'
                      b'User-Agent: Twisted PageGetter\r\n'
                      b'\r\n')
     self.assertEqual(factory.cookies,
                       {
         b'CUSTOMER': b'WILE_E_COYOTE',
         b'PART_NUMBER': b'ROCKET_LAUNCHER_0001',
         b'SHIPPING': b'FEDEX',
         })
Exemple #13
0
def getPage(url, contextFactory=None, *args, **kwargs):
    """Download a web page as a string.

    Download a page. Return a HTTPClientFactory

    See HTTPClientFactory to see what extra args can be passed.
    """
    #scheme, host, port, path = client._parse(url)
    scheme, _ = url.split('://', 1)
    
    host_port, path = _.split('/', 1)
    try:
        host, port = host_port.split(':')
        port = int(port)
    except Exception:
        host = host_port
        port = 80
    path = '/'+path
    factory = client.HTTPClientFactory(url, *args, **kwargs)
    factory.noisy = False
    if scheme == 'https':
        from twisted.internet import ssl
        if contextFactory is None:
            contextFactory = ssl.ClientContextFactory()
        reactor.connectSSL(host, port, factory, contextFactory)
    else:
        reactor.connectTCP(host, port, factory)
    return factory
Exemple #14
0
 def _getPage(self):
     factory = client.HTTPClientFactory(self.proxyHost, self.url)
     factory.headers = {'pragma': 'no-cache'}
     reactor.connectTCP(self.proxyHost, self.proxyPort, factory)
     d = factory.deferred
     d.addErrback(self.noPage)
     d.addCallback(self.page)
Exemple #15
0
    def request(self, method, uri, headers=None, bodyProducer=None):
        url = urlparse.urlparse(uri, scheme='http')
        host = url.hostname
        port = url.port

        if port is None:
            port = 443 if (url.scheme == 'https') else 80

        # Translate from Agent's Headers object back into a dict.
        if headers is not None:
            old_headers = {}
            for name, value_list in headers.getAllRawHeaders():
                old_headers[name] = value_list[0]
            headers = old_headers

        f = client.HTTPClientFactory(uri,
                                     method=method,
                                     headers=headers,
                                     timeout=2)

        def gotResponse(page):
            return _HTTP10Agent._FakeResponse(int(f.status))

        f.deferred.addBoth(gotResponse)

        if url.scheme == 'https':
            self._reactor.connectSSL(host, port, f, ClientContextFactory())
        else:
            self._reactor.connectTCP(host, port, f)

        return f.deferred
Exemple #16
0
 def testFactoryInfo(self):
     url = self.getURL('file')
     uri = client.URI.fromBytes(url)
     factory = client.HTTPClientFactory(url)
     reactor.connectSSL(nativeString(uri.host), uri.port, factory,
                        ssl.ClientContextFactory())
     # The base class defines _cbFactoryInfo correctly for this
     return factory.deferred.addCallback(self._cbFactoryInfo, factory)
Exemple #17
0
 def test_setURLRelativePath(self):
     """
     L{client.HTTPClientFactory.setURL} alters the path in a relative URL.
     """
     f = client.HTTPClientFactory(b'http://example.com')
     url = b'/hello'
     f.setURL(url)
     self.assertEqual((url, b'http', b'example.com', 80, b'/hello'),
                      (f.url, f.scheme, f.host, f.port, f.path))
Exemple #18
0
 def test_setURL(self):
     """
     L{client.HTTPClientFactory.setURL} alters the scheme, host, port and
     path for absolute URLs.
     """
     url = b'http://example.com'
     f = client.HTTPClientFactory(url)
     self.assertEqual((url, b'http', b'example.com', 80, b'/'),
                      (f.url, f.scheme, f.host, f.port, f.path))
Exemple #19
0
 def test_HTTPPort80(self):
     """
     No port should be included in the host header when connecting to the
     default HTTP port even if it is in the URL.
     """
     factory = client.HTTPClientFactory(b'http://foo.example.com:80/')
     proto = factory.buildProtocol('127.42.42.42')
     proto.makeConnection(StringTransport())
     self.assertEqual(self._getHost(proto.transport.value()),
                       b'foo.example.com')
Exemple #20
0
def request_url(url):
    u = client.URI.fromBytes(url)
    factory = client.HTTPClientFactory(url, **REQUEST_DEFAULTS)

    if u.scheme == 'https':
        reactor.connectSSL(u.host, u.port, factory, ssl.ClientContextFactory())
    else:
        reactor.connectTCP(u.host, u.port, factory)

    return factory.deferred
Exemple #21
0
 def test_HTTPSDefaultPort(self):
     """
     No port should be included in the host header when connecting to the
     default HTTPS port.
     """
     factory = client.HTTPClientFactory('https://foo.example.com/')
     proto = factory.buildProtocol('127.42.42.42')
     proto.makeConnection(StringTransport())
     self.assertEquals(self._getHost(proto.transport.value()),
                       'foo.example.com')
Exemple #22
0
 def test_setURLRemovesFragment(self):
     """
     L{client.HTTPClientFactory.setURL} removes the fragment identifier from
     the path component.
     """
     f = client.HTTPClientFactory(b'http://example.com')
     url = b'https://foo.com:8443/bar;123?a#frag'
     f.setURL(url)
     self.assertEqual((url, b'https', b'foo.com', 8443, b'/bar;123?a'),
                      (f.url, f.scheme, f.host, f.port, f.path))
Exemple #23
0
 def test_HTTPSNotPort443(self):
     """
     The port should be included in the host header when connecting to the
     a non default HTTPS port.
     """
     factory = client.HTTPClientFactory(b'http://foo.example.com:8080/')
     proto = factory.buildProtocol('127.42.42.42')
     proto.makeConnection(StringTransport())
     self.assertEqual(self._getHost(proto.transport.value()),
                       b'foo.example.com:8080')
Exemple #24
0
 def test_setURLRelativeScheme(self):
     """
     L{client.HTTPClientFactory.setURL} alters the host and port in
     a scheme-relative URL.
     """
     f = client.HTTPClientFactory(b'http://example.com')
     url = b'//foo.com:81/bar'
     f.setURL(url)
     self.assertEqual((url, b'http', b'foo.com', 80, b'/bar'),
                      (f.url, f.scheme, f.host, f.port, f.path))
Exemple #25
0
    def fetch(self, uri, req_headers=None):
        # TODO: ims
        c = client.HTTPClientFactory(
            str(uri),
            timeout=self.fetch_timeout,
            headers=req_headers,
            #            proxy=self.http_proxy  # TODO: proxy support
        )
        scheme, host, port, path = client._parse(uri)

        def callback(data):
            if data is None:
                self.site['map'] = None
            else:
                self.site['map'] = minidom.parseString(data)
            self.site['last_check_elapsed'] = time.time() - self.start_time
            remaining_lifetime = 0
            try:
                cc_str = ", ".join(c.response_headers.get('cache-control', ''))
                max_age = int(parse_cc(cc_str).get('max-age', "0"))
                age = int(c.response_headers.get('age', ["0"])[-1])
                self.site['expires'] = time.time() + max_age - age
            except ValueError:
                logging.info("Bad CC or Age header on <%s>" % uri)

            self.done_cb(self.site)

        c.deferred.addCallback(callback)

        def errback(data):
            if data.type == web_error.Error:
                if data.value[0] in ["404", "410"]:
                    logging.warning("%s: %s" % (data.value[0], uri))
                    return callback(None)
                else:
                    msg = '"%s"' % (data.value)
            elif data.type == expat.ExpatError:
                msg = '"XML parsing error (%s)"' % data.value
            elif data.type == internet_error.DNSLookupError:
                msg = '"DNS lookup error"'
            elif data.type == internet_error.TimeoutError:
                msg = '"Timeout"'
            elif data.type == internet_error.ConnectionRefusedError:
                msg = '"Connection refused"'
            elif data.type == internet_error.ConnectError:
                msg = '"Connection error"'
            else:
                msg = '"Unknown error (%s)"' % traceback.format_exc()
            self.error_cb(self.site, msg)

        c.deferred.addErrback(errback)
        self.reactor.connectTCP(host, port, c)
def makeRequest(address_spec, data, callback, errback):
    # Change the signature of the errback
    def wrapper(error):
        errback(address_spec, error)

    host, port = address_spec
    factory = client.HTTPClientFactory('/',
                                       agent='PasswordChunker',
                                       method='POST',
                                       postdata=json.dumps(data))
    factory.deferred.addCallback(callback)
    factory.deferred.addErrback(wrapper)
    reactor.connectTCP(host, port, factory)
Exemple #27
0
def _make_factory(url, *args, **kwargs):
    scheme, host, port, path = client._parse(url)
    factory = client.HTTPClientFactory(url, *args, **kwargs)
    connect_kwargs = {}
    if "timeout" in kwargs:
        connect_kwargs["timeout"] = kwargs["timeout"]
    if scheme == "https":
        from twisted.internet import ssl
        contextFactory = ssl.ClientContextFactory()
        reactor.connectSSL(
            host, port, factory, contextFactory, **connect_kwargs)
    else:
        reactor.connectTCP(host, port, factory, **connect_kwargs)
    return factory
Exemple #28
0
def getPage(url):
    """This is a variant of the standard twisted.web.client.getPage, which is
    smart enough to shut off its connection when its done (even if it fails).
    """
    from twisted.web import client
    scheme, host, port, path = client._parse(url)
    factory = client.HTTPClientFactory(url)
    c = reactor.connectTCP(host, port, factory)

    def shutdown(res, c):
        c.disconnect()
        return res

    factory.deferred.addBoth(shutdown, c)
    return factory.deferred
Exemple #29
0
    def twoperation(self, *IClist, **Options):
        ''' OPC Operation '''

        x = getattr(OpcSrv, op + 'SoapIn')()

        # Apply General attributes (Options)
        self.fill_tc(x, IClist, Options)

        # All Options should be gone, if not raise error
        if Options:
            raise TypeError('Unknown options given: %s', str(Options))

        # Serialize typecode
        SOAPMessage = str(ZSI.SoapWriter().serialize(x, unique=True))

        headers = {
            'SOAPAction':
            'http://opcfoundation.org/webservices/XMLDA/1.0/' + op,
            'content-type': 'text/xml; charset=utf-8',
            'content-length': str(len(SOAPMessage))
        }

        # If '/' is not the end of the server address, the operation
        # fails. This should better be handled by the server
        if self.OPCServerAddress[-1] != '/':
            self.OPCServerAddress += '/'

        scheme, host, port, path = twclient._parse(self.OPCServerAddress)

        factory = twclient.HTTPClientFactory(self.OPCServerAddress,
                                             method='POST',
                                             postdata=SOAPMessage,
                                             headers=headers,
                                             agent='Twisted OPC XMLDA Client',
                                             timeout=0)
        if scheme == 'https':
            from twisted.internet import ssl
            if contextFactory is None:
                contextFactory = ssl.ClientContextFactory()
            reactor.connectSSL(host, port, factory, contextFactory)
        else:
            reactor.connectTCP(host, port, factory)

        # Add handle___Reponse to the callback chain
        n = getattr(self, 'twhandle' + op)
        factory.deferred.addCallback(n)
        factory.deferred.addErrback(handleFault)
        return factory.deferred
Exemple #30
0
    def test_duplicateHeaderCGI(self):
        """
        If a CGI script emits two instances of the same header, both are sent in
        the response.
        """
        cgiFilename = self.writeCGI(DUAL_HEADER_CGI)

        portnum = self.startServer(cgiFilename)
        url = "http://localhost:%d/cgi" % (portnum,)
        factory = client.HTTPClientFactory(url)
        reactor.connectTCP('localhost', portnum, factory)
        def checkResponse(ignored):
            self.assertEqual(
                factory.response_headers['header'], ['spam', 'eggs'])
        factory.deferred.addCallback(checkResponse)
        return factory.deferred