コード例 #1
0
    def test_get_proxies_with_filter(self):
        proxy1 = Proxy('host1', 'port1', 'code', 'country', 'anonymous',
                       'type', 'source')
        proxy2 = Proxy('host2', 'por2', 'code', 'country', 'anonymous', 'type',
                       'source')
        proxies = {proxy1, proxy2}

        store_mock = Mock()
        store_mock.return_value = store_mock  # Ensure same instance when initialized
        store_mock.get_proxies.return_value = [proxy1, proxy2]

        proxy_resource_mock = Mock()
        proxy_resource_mock.return_value = proxy_resource_mock  # Ensure same instance when initialized
        proxy_resource_mock.refresh.return_value = True, proxies

        ps.Store = store_mock
        ps.ProxyResource = proxy_resource_mock

        collector = ps.Collector('http', 10, None)
        actual = collector.get_proxies({'code': 'us'})

        for _, attrs in collector._resource_map.items():
            store_mock.update_store.assert_called_with(attrs['id'], proxies)

        store_mock.get_proxies.assert_called_once_with({'code': {
            'us',
        }}, collector._blacklist)

        for proxy in proxies:
            self.assertIn(proxy, actual)
コード例 #2
0
    def test_free_proxy_list_proxies_success(self):
        with open(
                os.path.join(cwd, 'mock_pages', 'free-proxy-list-proxy.html'),
                'r') as html:
            response = Mock()
            response.content = html
            response.ok = True
            self.requests.get = lambda url: response

            expected = {
                Proxy('179.124.59.232', '53281', 'br', 'brazil', True, 'https',
                      'free-proxy-list'),
                Proxy('200.107.59.98', '8080', 'ua', 'ukraine', False, 'http',
                      'free-proxy-list'),
                Proxy('217.172.244.7', '8080', 'ru', 'russian federation',
                      True, 'http', 'free-proxy-list')
            }

            func = RESOURCE_MAP['free-proxy-list']
            pr = ProxyResource(func, 10)

            _, proxies = pr.refresh()

            for proxy in proxies:
                self.assertIn(proxy, expected)
コード例 #3
0
    def test_proxyscrape_success(self):
        with open(os.path.join(cwd, 'mock_pages', 'proxyscrape.txt'),
                  'r') as html:
            response = Mock()
            response.text = html.read()
            response.ok = True
            self.requests.get = lambda url: response

            resource_name = get_proxyscrape_resource()

            expected = {
                Proxy('179.124.59.232', '53281', None, None, False, None,
                      resource_name),
                Proxy('200.107.59.98', '8080', None, None, False, None,
                      resource_name),
                Proxy('217.172.244.7', '8080', None, None, False, None,
                      resource_name)
            }

            func = pss.RESOURCE_MAP[resource_name]
            pr = ProxyResource(func, 10)

            _, proxies = pr.refresh()

            for proxy in proxies:
                self.assertIn(proxy, expected)
コード例 #4
0
    def test_remove_blacklist_multiple(self):
        collector = ps.Collector('http', 10, None)
        proxies = {
            Proxy('host1', 'port', 'code', 'country', 'anonymous', 'type',
                  'source'),
            Proxy('host2', 'port', 'code', 'country', 'anonymous', 'type',
                  'source')
        }

        collector.blacklist_proxy(proxies)
        collector.remove_blacklist(proxies)

        self.assertEqual(0, len(collector._blacklist))
コード例 #5
0
    def test_remove_blacklist_removes_correct_proxy(self):
        collector = ps.Collector('http', 10, None)
        proxy1 = Proxy('host1', 'port', 'code', 'country', 'anonymous', 'type',
                       'source')
        proxy2 = Proxy('host2', 'port', 'code', 'country', 'anonymous', 'type',
                       'source')
        proxies = {proxy1, proxy2}

        collector.blacklist_proxy(proxies)
        collector.remove_blacklist(proxy1)

        self.assertEqual(1, len(collector._blacklist))
        self.assertIn((proxy2[0], proxy2[1]), collector._blacklist)
コード例 #6
0
    def test_blacklist_proxy_multiple(self):
        collector = ps.Collector('http', 10, None)
        proxies = {
            Proxy('host1', 'port', 'code', 'country', 'anonymous', 'type',
                  'source'),
            Proxy('host2', 'port', 'code', 'country', 'anonymous', 'type',
                  'source')
        }

        collector.blacklist_proxy(proxies)

        for proxy in proxies:
            self.assertIn((proxy[0], proxy[1]), collector._blacklist)
コード例 #7
0
    def test_refresh_proxies_with_no_force(self):
        proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type',
                      'source')
        proxies = {
            proxy,
        }

        store_mock = Mock()
        store_mock.return_value = store_mock  # Ensure same instance when initialized
        store_mock.get_proxy.return_value = None

        proxy_resource_mock = Mock()
        proxy_resource_mock.return_value = proxy_resource_mock  # Ensure same instance when initialized
        proxy_resource_mock.refresh.return_value = True, proxies

        ps.Store = store_mock
        ps.ProxyResource = proxy_resource_mock

        collector = ps.Collector('http', 10, None)
        collector.refresh_proxies(False)

        proxy_resource_mock.refresh.assert_called_with(False)

        for _, attrs in collector._resource_map.items():
            store_mock.update_store.assert_called_with(attrs['id'], proxies)
コード例 #8
0
    def test_remove_proxy_exception_if_invalid_resource_type(self):
        proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type',
                      'invalid-source')
        collector = ps.Collector('http', 10, None)

        with self.assertRaises(InvalidResourceTypeError):
            collector.remove_proxy(proxy)
コード例 #9
0
    def test_get_proxy_no_filter(self):
        proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type',
                      'source')
        proxies = {
            proxy,
        }

        store_mock = Mock()
        store_mock.return_value = store_mock  # Ensure same instance when initialized
        store_mock.get_proxy.return_value = proxy

        proxy_resource_mock = Mock()
        proxy_resource_mock.return_value = proxy_resource_mock  # Ensure same instance when initialized
        proxy_resource_mock.refresh.return_value = True, proxies

        ps.Store = store_mock
        ps.ProxyResource = proxy_resource_mock

        collector = ps.Collector('http', 10, None)
        actual = collector.get_proxy()

        for _, attrs in collector._resource_map.items():
            store_mock.update_store.assert_called_with(attrs['id'], proxies)

        store_mock.get_proxy.assert_called_once_with({}, collector._blacklist)
        self.assertEqual(proxy, actual)
コード例 #10
0
    def test_blacklist_proxy_single(self):
        collector = ps.Collector('http', 10, None)
        proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type',
                      'source')
        collector.blacklist_proxy(proxy)

        self.assertEqual((proxy[0], proxy[1]), collector._blacklist.pop())
コード例 #11
0
    def test_remove_blacklist_single_with_host_and_port(self):
        collector = ps.Collector('http', 10, None)
        proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type',
                      'source')
        collector.blacklist_proxy(proxy)
        collector.remove_blacklist(host='host', port='port')

        self.assertEqual(0, len(collector._blacklist))
コード例 #12
0
    def test_clear_blacklist_clears_correctly(self):
        collector = ps.Collector('http', 10, None)
        proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type',
                      'source')

        collector.blacklist_proxy(proxy)
        collector.clear_blacklist()

        self.assertSetEqual(set(), collector._blacklist)
コード例 #13
0
    def test_remove_proxy_multiple(self):
        store_mock = Mock()
        store_mock.return_value = store_mock  # Ensure same instance when initialized
        store_mock.get_proxy.return_value = None
        ps.Store = store_mock

        proxy1 = Proxy('host1', 'port', 'code', 'country', 'anonymous', 'type',
                       'anonymous-proxy')
        proxy2 = Proxy('host2', 'port', 'code', 'country', 'anonymous', 'type',
                       'us-proxy')
        proxies = {proxy1, proxy2}

        collector = ps.Collector('http', 10, None)
        collector.remove_proxy(proxies)

        id = collector._resource_map['anonymous-proxy']['id']
        store_mock.remove_proxy.assert_any_call(id, proxy1)

        id = collector._resource_map['us-proxy']['id']
        store_mock.remove_proxy.assert_any_call(id, proxy2)
コード例 #14
0
    def test_remove_proxy_single(self):
        store_mock = Mock()
        store_mock.return_value = store_mock  # Ensure same instance when initialized
        store_mock.get_proxy.return_value = None
        ps.Store = store_mock

        proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type',
                      'anonymous-proxy')

        collector = ps.Collector('http', 10, None)
        collector.remove_proxy(proxy)

        id = collector._resource_map['anonymous-proxy']['id']
        store_mock.remove_proxy.assert_called_with(id, proxy)
コード例 #15
0
    def test_proxy_daily_http_proxies_success(self):
        with open(os.path.join(cwd, 'mock_pages', 'proxy-daily-proxy.html'),
                  'r') as html:
            response = Mock()
            response.content = html
            response.ok = True
            self.requests.get = lambda url: response

            expected = {
                Proxy('93.190.253.50', '80', None, None, None, 'http',
                      'proxy-daily-http'),
                Proxy('207.154.231.209', '3128', None, None, None, 'http',
                      'proxy-daily-http'),
                Proxy('88.255.101.177', '53281', None, None, None, 'http',
                      'proxy-daily-http')
            }

            func = RESOURCE_MAP['proxy-daily-http']
            pr = ProxyResource(func, 10)

            _, proxies = pr.refresh()

            for proxy in proxies:
                self.assertIn(proxy, expected)
コード例 #16
0
    def test_proxy_daily_socks4_proxies_success(self):
        with open(os.path.join(cwd, 'mock_pages', 'proxy-daily-proxy.html'),
                  'r') as html:
            response = Mock()
            response.content = html
            response.ok = True
            self.requests.get = lambda url: response

            expected = {
                Proxy('54.38.156.185', '8888', None, None, None, 'socks4',
                      'proxy-daily-socks4'),
                Proxy('194.85.174.74', '1080', None, None, None, 'socks4',
                      'proxy-daily-socks4'),
                Proxy('41.79.237.135', '1080', None, None, None, 'socks4',
                      'proxy-daily-socks4')
            }

            func = RESOURCE_MAP['proxy-daily-socks4']
            pr = ProxyResource(func, 10)

            _, proxies = pr.refresh()

            for proxy in proxies:
                self.assertIn(proxy, expected)
コード例 #17
0
    def test_proxy_daily_socks5_proxies_success(self):
        with open(os.path.join(cwd, 'mock_pages', 'proxy-daily-proxy.html'),
                  'r') as html:
            response = Mock()
            response.content = html
            response.ok = True
            self.requests.get = lambda url: response

            expected = {
                Proxy('176.9.19.170', '1080', None, None, None, 'socks5',
                      'proxy-daily-socks5'),
                Proxy('188.26.83.105', '1080', None, None, None, 'socks5',
                      'proxy-daily-socks5'),
                Proxy('150.129.151.44', '6667', None, None, None, 'socks5',
                      'proxy-daily-socks5')
            }

            func = RESOURCE_MAP['proxy-daily-socks5']
            pr = ProxyResource(func, 10)

            _, proxies = pr.refresh()

            for proxy in proxies:
                self.assertIn(proxy, expected)
コード例 #18
0
    def test_us_proxies_success(self):
        with open(os.path.join(cwd, 'mock_pages', 'us-proxy.html'),
                  'r') as html:
            response = Mock()
            response.content = html
            response.ok = True
            self.requests.get = lambda url: response

            expected = {
                Proxy('179.124.59.232', '53281', 'us', 'united states', True,
                      'https', 'us-proxy'),
                Proxy('200.107.59.98', '8080', 'us', 'united states', True,
                      'http', 'us-proxy'),
                Proxy('217.172.244.7', '8080', 'us', 'united states', False,
                      'http', 'us-proxy')
            }

            func = RESOURCE_MAP['us-proxy']
            pr = ProxyResource(func, 10)

            _, proxies = pr.refresh()

            for proxy in proxies:
                self.assertIn(proxy, expected)
コード例 #19
0
    def test_doesnt_refresh_if_not_expired(self):
        expected = [
            Proxy('host', 'port', 'code', 'country', 'anonymous', 'type',
                  'source')
        ]

        def func():
            return expected

        pr = ProxyResource(func, 5)

        refreshed, actual = pr.refresh()
        self.assertEqual(True, refreshed)
        self.assertEqual(expected[0], actual[0])

        refreshed, actual = pr.refresh()
        self.assertEqual(False, refreshed)
        self.assertEqual(None, actual)
コード例 #20
0
    def test_doesnt_refresh_if_lock_check(self):
        expected = [
            Proxy('host', 'port', 'code', 'country', 'anonymous', 'type',
                  'source')
        ]

        def func():
            return expected

        pr = ProxyResource(func, 5)

        refreshed, actual = pr.refresh()
        self.assertEqual(True, refreshed)
        self.assertEqual(expected[0], actual[0])

        with patch('proxyscrape.scrapers.time') as time_mock:
            times = [time.time() + 10, -1, 0]
            time_iter = iter(times)
            time_mock.time = lambda: next(time_iter)

            refreshed, actual = pr.refresh()
            self.assertEqual(False, refreshed)
            self.assertIsNone(actual)