Example #1
0
    def test_proxy_negotiate_fail(self, cr_conn_mock):
        loop_mock = mock.Mock()
        cr_conn_mock.side_effect = \
            fake_coroutine(aiosocks.SocksError()).side_effect

        req = ClientRequest('GET', 'http://python.org', loop=self.loop)
        connector = SocksConnector(aiosocks.Socks5Addr('127.0.0.1'),
                                   None, loop=loop_mock)

        loop_mock.getaddrinfo = fake_coroutine([mock.MagicMock()])

        with self.assertRaises(aiosocks.SocksError):
            self.loop.run_until_complete(connector.connect(req))
Example #2
0
def get_tor_connector(string):
    ip, port = parse_proxy_address(string)
    login, password = generate_credentials()
    addr = aiosocks.Socks5Addr(ip, int(port))
    auth = aiosocks.Socks5Auth(login, password=password)
    conn = SocksConnector(proxy=addr, proxy_auth=auth, remote_resolve=True)
    return conn
Example #3
0
    def test_connect_remote_resolve(self, cr_conn_mock):
        tr, proto = mock.Mock(name='transport'), mock.Mock(name='protocol')
        cr_conn_mock.side_effect = \
            fake_coroutine((tr, proto)).side_effect

        req = ClientRequest('GET', 'http://python.org', loop=self.loop)
        connector = SocksConnector(aiosocks.Socks5Addr('127.0.0.1'),
                                   None, loop=self.loop, remote_resolve=True)

        connector._resolve_host = fake_coroutine([mock.MagicMock()])

        conn = self.loop.run_until_complete(connector.connect(req))

        self.assertEqual(connector._resolve_host.call_count, 1)

        conn.close()
Example #4
0
    def test_fingerprint_success(self):
        with fake_socks4_srv(self.loop) as proxy_port:
            addr = aiosocks.Socks4Addr('127.0.0.1', proxy_port)
            fp = (b's\x93\xfd:\xed\x08\x1do\xa9\xaeq9'
                  b'\x1a\xe3\xc5\x7f\x89\xe7l\xf9')

            conn = SocksConnector(proxy=addr,
                                  proxy_auth=None,
                                  loop=self.loop,
                                  remote_resolve=False,
                                  verify_ssl=False,
                                  fingerprint=fp)

            with http_srv(self.loop, use_ssl=True) as url:
                with aiohttp.ClientSession(connector=conn,
                                           loop=self.loop) as ses:

                    @asyncio.coroutine
                    def make_req():
                        return (yield from ses.request('get', url=url))

                    resp = self.loop.run_until_complete(make_req())

                    self.assertEqual(resp.status, 200)

                    content = self.loop.run_until_complete(resp.text())
                    self.assertEqual(content, 'Test message')

                    resp.close()
Example #5
0
    def test_https_connect(self):
        with fake_socks4_srv(self.loop) as proxy_port:
            addr = aiosocks.Socks4Addr('127.0.0.1', proxy_port)

            conn = SocksConnector(proxy=addr,
                                  proxy_auth=None,
                                  loop=self.loop,
                                  remote_resolve=False,
                                  verify_ssl=False)

            with http_srv(self.loop, use_ssl=True) as url:
                with aiohttp.ClientSession(connector=conn,
                                           loop=self.loop) as ses:

                    @asyncio.coroutine
                    def make_req():
                        return (yield from ses.request('get', url=url))

                    resp = self.loop.run_until_complete(make_req())

                    self.assertEqual(resp.status, 200)

                    content = self.loop.run_until_complete(resp.text())
                    self.assertEqual(content, 'Test message')

                    resp.close()
Example #6
0
async def worker(urls_q, proxies_q, proxies_q_good):
    while True:
        # Get proxy server from Queue and make proxy row string
        if not proxies_q_good.empty():
            proxy = await proxies_q_good.get()
        else:
            proxy = await proxies_q.get()

        if proxy is None:
            await asyncio.sleep(1)
            continue
        row = 'http://{host}:{port}'.format(host=proxy.host, port=proxy.port)

        # Get url from Queue

        if urls_q.empty():
            return
        else:
            keyword = await urls_q.get()

        url = base_link.format(keyword.replace(' ', '+'))
        print(row, '-->', 'url', '| WPQ:', proxies_q_good.qsize(), '| APQ:',
              proxies_q.qsize())
        # except Exception as e:
        #     print(type(e), e, '[worker, print.Exception]')
        #     continue

        # Make http request with SOCKS proxy
        try:
            addr = aiosocks.Socks5Addr(proxy.host, proxy.port)
            conn = SocksConnector(proxy=addr)
            with async_timeout.timeout(30):
                async with aiohttp.ClientSession(
                        connector=conn) as http_client:
                    async with http_client.get(url, headers=headers) as resp:
                        assert resp.status == 200
                        code = await resp.text()
            assert 'body' in code
        except Exception as e:
            print(type(e), e, '[worker, http_client.Exception]')
            await urls_q.put(keyword)
            continue

        # If proxy is working put it into good (working) proxies Queue again
        proxies_q_good.put_nowait(proxy)

        # Create dictionary data, to save it into database
        try:
            position = get_position(code)
            with open('data/google_positions_result.txt',
                      'a',
                      encoding='utf-8') as result:
                result.write('{}\t{}\n'.format(keyword, position))
        except Exception as e:
            print(type(e), e, '[data_formatting Exception]')
            continue

        await asyncio.sleep(1)
Example #7
0
    def test_connect_proxy_domain(self, cr_conn_mock):
        tr, proto = mock.Mock(name='transport'), mock.Mock(name='protocol')
        cr_conn_mock.side_effect = \
            fake_coroutine((tr, proto)).side_effect
        loop_mock = mock.Mock()

        req = ClientRequest('GET', 'http://python.org', loop=self.loop)
        connector = SocksConnector(aiosocks.Socks5Addr('proxy.example'),
                                   None, loop=loop_mock)

        connector._resolve_host = fake_coroutine([mock.MagicMock()])

        conn = self.loop.run_until_complete(connector.connect(req))

        self.assertTrue(connector._resolve_host.is_called)
        self.assertEqual(connector._resolve_host.call_count, 1)
        self.assertIs(conn._transport, tr)

        conn.close()
async def main():
  response = await aiohttp.get('http://icanhazip.com/')
  body = await response.text()
  print('ip: {}'.format(body.strip()))

  addr = aiosocks.Socks5Addr('127.0.0.1', 9050)
  conn = SocksConnector(proxy=addr, remote_resolve=False)
  response = await aiohttp.get('http://icanhazip.com/', connector=conn)
  body = await response.text()
  print('tor ip: {}'.format(body.strip()))
Example #9
0
    def test_connect_proxy_ip(self, cr_conn_mock):
        tr, proto = mock.Mock(name='transport'), mock.Mock(name='protocol')
        cr_conn_mock.side_effect = \
            fake_coroutine((tr, proto)).side_effect

        loop_mock = mock.Mock()

        req = ClientRequest('GET', 'http://python.org', loop=self.loop)
        connector = SocksConnector(aiosocks.Socks5Addr('127.0.0.1'),
                                   None, loop=loop_mock)

        loop_mock.getaddrinfo = fake_coroutine([mock.MagicMock()])

        conn = self.loop.run_until_complete(connector.connect(req))

        self.assertTrue(loop_mock.getaddrinfo.is_called)
        self.assertIs(conn._transport, tr)

        conn.close()
Example #10
0
async def worker(urls_q, proxies_q, proxies_q_good):
    while True:
        # Get proxy server from Queue and make proxy row string
        if not proxies_q_good.empty():
            proxy = await proxies_q_good.get()
        else:
            proxy = await proxies_q.get()

        if proxy is None:
            await asyncio.sleep(1)
            continue
        row = 'http://{host}:{port}'.format(host=proxy.host, port=proxy.port)

        # Get url from Queue

        if urls_q.empty():
            return
        else:
            page_url, link_text, link_url = await urls_q.get()

        url = base_link.format(quote('cache:' + link_url))
        print(row, '-->', url, '| WPQ:', proxies_q_good.qsize(), '| APQ:', proxies_q.qsize())

        # Make http request with SOCKS proxy
        try:
            addr = aiosocks.Socks5Addr(proxy.host, proxy.port)
            conn = SocksConnector(proxy=addr)
            with async_timeout.timeout(30):
                async with aiohttp.ClientSession(connector=conn) as http_client:
                    async with http_client.get(url, headers=headers) as resp:
                        # assert resp.status == 200
                        code = await resp.text()
            assert (link_url in code or 'Not Found' in code)
        except Exception as e:
            print(type(e), e, '[worker, http_client.Exception]', link_url)
            await urls_q.put(tuple(page_url, link_text, link_url))
            continue

        # If proxy is working put it into good (working) proxies Queue again
        proxies_q_good.put_nowait(proxy)

        # Create dictionary data, to save it into database
        try:
            indexed = get_data(code, page_url)
            with open('data/google_cache_result.txt', 'a', encoding='utf-8') as result:
                if indexed:
                    result.write('{}; {}\n'.format(link_url, 'indexed'))
                else:
                    result.write('{}; {}\n'.format(link_url, 'no'))
        except Exception as e:
            print(type(e), e, '[data_formatting Exception]', link_url)
            continue

        await asyncio.sleep(1)
Example #11
0
def socks_connector(proxy, loop=None):
    loop = loop or get_event_loop()
    proxy = URL(proxy)
    auth = None
    if proxy.scheme == 'socks4':
        addr = Socks4Addr(proxy.host, proxy.port)
    else:
        addr = Socks5Addr(proxy.host, proxy.port)
        if proxy.user and proxy.password:
            auth = Socks5Auth(proxy.user, proxy.password)
    return SocksConnector(proxy=addr,
                          proxy_auth=auth,
                          limit=300,
                          loop=loop,
                          remote_resolve=False,
                          verify_ssl=False)
Example #12
0
 def __init__(self,
              adress,
              port,
              login=None,
              password=None,
              timeout=10,
              loop=None):
     super().__init__(timeout)
     self.close()
     addr = aiosocks.Socks5Addr(adress, port)
     if login and password:
         auth = aiosocks.Socks5Auth(login, password=password)
     else:
         auth = None
     conn = SocksConnector(proxy=addr, proxy_auth=auth, loop=loop)
     self.session = aiohttp.ClientSession(
         connector=conn, response_class=CustomClientResponse)
Example #13
0
 async def get_session(self, *args, **kwargs):
     circuit_id = kwargs.pop('circuit_id', None)
     skip_auto = kwargs.pop('skip_auto_headers', [])
     headers = kwargs.pop('headers', {})
     if self.user_agent is not None and 'User-Agent' not in headers:
         skip_auto.append('User-Agent')
         headers['User-Agent'] = self.user_agent() if callable(self.user_agent) else self.user_agent
     socks_proxy = aiosocks.Socks5Addr('127.0.0.1', self.socks_port)
     # note very important limit parameter, thanks to this session can be bound to particular circuit id
     connector = SocksConnector(proxy=socks_proxy, remote_resolve=False, force_close=False, limit=1)
     session = aiohttp.ClientSession(connector=connector, skip_auto_headers=skip_auto, headers=headers)
     if circuit_id:
         session.circuit_id = circuit_id
     elif not self._circuit_ids and circuit_id is None:
         raise IndexError('No circuits in self.circuit_ids')
     else:
         session.circuit_id = random.choice(tuple(self._circuit_ids))
     session.get = functools.partial(self.get, session=session)
     return session
Example #14
0
def cli(url, debug, force_update, conn_count, proxy):
    click.echo('version: {}'.format(__version__))

    if debug:
        logging.root.setLevel(logging.DEBUG)
        asyncio.get_event_loop().set_debug(True)

    logging.info('conn number is %s', conn_count)
    logging.info('proxy is %s', proxy)
    logging.info('force-update is %s', force_update)

    logging.debug('settings: {}'.format(settings))

    conn_kwargs = dict(use_dns_cache=True, limit=conn_count, conn_timeout=60)

    if not proxy:
        _conn = aiohttp.TCPConnector(**conn_kwargs)
    else:
        _conn = SocksConnector(aiosocks.Socks5Addr(proxy[0], proxy[1]),
                               **conn_kwargs)

    network.session = aiohttp.ClientSession(connector=_conn)

    try:
        try:
            if proxy:
                logging.info('Test whether proxy config is correct')
                loop = asyncio.get_event_loop()
                loop.run_until_complete(verify_proxy())
        except (aiohttp.errors.ProxyConnectionError, ConnectionRefusedError,
                AssertionError) as e:
            print('Proxy config is wrong!\n {}'.format(e))

        else:
            start(url, force_update)

    finally:
        network.session.close()
        if bundle_env:
            click.echo('\n')
            input('Press any key to exit')
Example #15
0
    def __init__(self,
                 take_ownership=True, # Tor dies when the Sorter does
                 torrc_config={"ControlPort": "9051",
                               "CookieAuth": "1"},
                 socks_port=9050,
                 page_load_timeout=20,
                 max_tasks=10,
                 db_handler=None):

        self.logger = setup_logging(_log_dir, "sorter")
        self.db_handler = db_handler

        self.logger.info("Opening event loop for Sorter...")
        self.loop = asyncio.get_event_loop()
        self.max_tasks = max_tasks
        self.logger.info("Creating Sorter queue...")
        self.q = asyncio.Queue()

        # Start tor and create an aiohttp tor connector
        self.torrc_config = torrc_config
        self.socks_port = str(find_free_port(socks_port))
        self.torrc_config.update({"SocksPort": self.socks_port})
        self.logger.info("Starting tor process with config "
                         "{self.torrc_config}.".format(**locals()))
        self.tor_process = launch_tor_with_config(config=self.torrc_config,
                                                  take_ownership=take_ownership)
        onion_proxy = aiosocks.Socks5Addr('127.0.0.1', socks_port)
        conn = SocksConnector(proxy=onion_proxy, remote_resolve=True)

        # aiohttp's ClientSession does connection pooling and HTTP keep-alives
        # for us
        self.logger.info("Creating aiohttp ClientSession with our event loop "
                         "and tor proxy connector...")
        self.session = aiohttp.ClientSession(loop=self.loop, connector=conn)

        # Pretend we're Tor Browser in order to get rejected by less sites/WAFs
        u = "Mozilla/5.0 (Windows NT 6.1; rv:45.0) Gecko/20100101 Firefox/45.0"
        self.headers = {'user-agent': u}

        self.page_load_timeout = page_load_timeout
Example #16
0
async def cn_proxy_spider(ip_all, ip_num, proxy=None):
    cn_proxy_url = "http://cn-proxy.com/"
    proxy_addr, proxy_port = list(proxy.items())[0]
    conn = SocksConnector(proxy=aiosocks.Socks5Addr(proxy_addr, proxy_port),
                          proxy_auth=None,
                          remote_resolve=True)
    async with aiohttp.ClientSession(connector=conn) as session:
        async with session.get(cn_proxy_url) as resp:
            content = await resp.text()
            soup = BeautifulSoup(content, 'lxml')
            tbodys = soup.find_all('tbody')
            for tbody in tbodys:
                for _ in tbody.find_all('tr'):
                    td = _.find_all('td')
                    ip_all.append({
                        'ip':
                        'http://' + td[0].string + ':' + td[1].string,
                        'addr':
                        td[2].string.split(" ")[0],
                        'time':
                        td[-1].string
                    })
Example #17
0
    def test_fingerprint_fail(self):
        with fake_socks4_srv(self.loop) as proxy_port:
            addr = aiosocks.Socks4Addr('127.0.0.1', proxy_port)
            fp = (b's\x93\xfd:\xed\x08\x1do\xa9\xaeq9'
                  b'\x1a\xe3\xc5\x7f\x89\xe7l\x10')

            conn = SocksConnector(proxy=addr,
                                  proxy_auth=None,
                                  loop=self.loop,
                                  remote_resolve=False,
                                  verify_ssl=False,
                                  fingerprint=fp)

            with http_srv(self.loop, use_ssl=True) as url:
                with aiohttp.ClientSession(connector=conn,
                                           loop=self.loop) as ses:

                    @asyncio.coroutine
                    def make_req():
                        return (yield from ses.request('get', url=url))

                    with self.assertRaises(aiohttp.FingerprintMismatch):
                        self.loop.run_until_complete(make_req())
Example #18
0
 def test_properties(self):
     addr = aiosocks.Socks4Addr('localhost')
     auth = aiosocks.Socks4Auth('login')
     conn = SocksConnector(addr, auth, loop=self.loop)
     self.assertIs(conn.proxy, addr)
     self.assertIs(conn.proxy_auth, auth)