def test_blacklist_proxy_multiple(self):
        collector = ps.Collector('http', 10, None)
        proxies = {
            Proxy('host1', 'port', 'code', 'country', 'anonymous', 'type',
                  'source'),
            Proxy('host2', 'port', 'code', 'country', 'anonymous', 'type',
                  'source')
        }

        collector.blacklist_proxy(proxies)

        for proxy in proxies:
            self.assertIn(proxy, collector._blacklist)
    def test_refresh_proxies_with_no_force(self):
        proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type',
                      'source')
        proxies = {
            proxy,
        }

        store_mock = Mock()
        store_mock.return_value = store_mock  # Ensure same instance when initialized
        store_mock.get_proxy.return_value = None

        proxy_resource_mock = Mock()
        proxy_resource_mock.return_value = proxy_resource_mock  # Ensure same instance when initialized
        proxy_resource_mock.refresh.return_value = True, proxies

        ps.Store = store_mock
        ps.ProxyResource = proxy_resource_mock

        collector = ps.Collector('http', 10, None)
        collector.refresh_proxies(False)

        proxy_resource_mock.refresh.assert_called_with(False)

        for _, attrs in collector._resource_map.items():
            store_mock.update_store.assert_called_with(attrs['id'], proxies)
Beispiel #3
0
    def test_update_store_invalid_id_does_nothing(self):
        store = Store()
        proxy = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source')
        store.update_store(1, proxy)

        proxy = store.get_proxy()
        self.assertIsNone(proxy)
    def test_blacklist_proxy_single(self):
        collector = ps.Collector('http', 10, None)
        proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type',
                      'source')
        collector.blacklist_proxy(proxy)

        self.assertEqual(proxy, collector._blacklist.pop())
    def test_remove_proxy_exception_if_invalid_resource_type(self):
        proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type',
                      'invalid-source')
        collector = ps.Collector('http', 10, None)

        with self.assertRaises(InvalidResourceTypeError):
            collector.remove_proxy(proxy)
    def test_get_proxy_with_filter(self):
        proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type',
                      'source')
        proxies = {
            proxy,
        }

        store_mock = Mock()
        store_mock.return_value = store_mock  # Ensure same instance when initialized
        store_mock.get_proxy.return_value = proxy

        proxy_resource_mock = Mock()
        proxy_resource_mock.return_value = proxy_resource_mock  # Ensure same instance when initialized
        proxy_resource_mock.refresh.return_value = True, proxies

        ps.Store = store_mock
        ps.ProxyResource = proxy_resource_mock

        collector = ps.Collector('http', 10, None)
        actual = collector.get_proxy({'code': 'us'})

        for _, attrs in collector._resource_map.items():
            store_mock.update_store.assert_called_with(attrs['id'], proxies)

        store_mock.get_proxy.assert_called_once_with(
            {
                'type': {
                    'http',
                },
                'code': {
                    'us',
                }
            }, collector._blacklist)
        self.assertEqual(proxy, actual)
Beispiel #7
0
    def test_get_proxy_returns_proxy_if_any(self):
        store = Store()
        id = store.add_store()
        expected = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source')

        store.update_store(id, {expected, })
        actual = store.get_proxy()

        self.assertEqual(expected, actual)
Beispiel #8
0
    def test_get_proxy_returns_proxy_if_not_filtered(self):
        store = Store()
        id = store.add_store()
        proxy = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source')

        store.update_store(id, {proxy, })
        actual = store.get_proxy(filter_opts={'code': {'us', }})

        self.assertEqual(actual, proxy)
Beispiel #9
0
    def test_get_proxy_returns_empty_if_blacklisted(self):
        store = Store()
        id = store.add_store()
        proxy = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source')

        store.update_store(id, {proxy, })
        actual = store.get_proxy(blacklist={(proxy[0], proxy[1]), })

        self.assertIsNone(actual)
Beispiel #10
0
    def test_update_store_updates_proxies(self):
        store = Store()
        id = store.add_store()
        proxy = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source')

        store.update_store(id, {proxy, })
        actual = store.get_proxy()

        self.assertEqual(proxy, actual)
    def test_clear_blacklist_clears_correctly(self):
        collector = ps.Collector('http', 10, None)
        proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type',
                      'source')

        collector.blacklist_proxy(proxy)
        collector.clear_blacklist()

        self.assertSetEqual(set(), collector._blacklist)
Beispiel #12
0
    def test_get_proxy_returns_empty_if_filtered_and_blacklisted(self):
        store = Store()
        id = store.add_store()
        proxy = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source')

        store.update_store(id, {proxy, })
        actual = store.get_proxy(filter_opts={'country': {'uk', }},
                                 blacklist={(proxy[0], proxy[1]), })

        self.assertIsNone(actual)
    def test_us_proxies_success(self):
        with open(os.path.join(cwd, 'mock_pages', 'us-proxy.html'), 'r') as html:
            response = Mock()
            response.content = html
            response.ok = True
            self.requests.get = lambda url: response

            expected = {
                Proxy('179.124.59.232', '53281', 'us', 'united states', True, 'https', 'us-proxy'),
                Proxy('200.107.59.98', '8080', 'us', 'united states', True, 'http', 'us-proxy'),
                Proxy('217.172.244.7', '8080', 'us', 'united states', False, 'http', 'us-proxy')
            }

            func = RESOURCE_MAP['us-proxy']
            pr = ProxyResource(func, 10)

            _, proxies = pr.refresh()

            for proxy in proxies:
                self.assertIn(proxy, expected)
    def test_proxy_daily_socks5_proxies_success(self):
        with open(os.path.join(cwd, 'mock_pages', 'proxy-daily-proxy.html'), 'r') as html:
            response = Mock()
            response.content = html
            response.ok = True
            self.requests.get = lambda url: response

            expected = {
                Proxy('176.9.19.170', '1080', None, None, None, 'socks5', 'proxy-daily-socks5'),
                Proxy('188.26.83.105', '1080', None, None, None, 'socks5', 'proxy-daily-socks5'),
                Proxy('150.129.151.44', '6667', None, None, None, 'socks5', 'proxy-daily-socks5')
            }

            func = RESOURCE_MAP['proxy-daily-socks5']
            pr = ProxyResource(func, 10)

            _, proxies = pr.refresh()

            for proxy in proxies:
                self.assertIn(proxy, expected)
    def test_proxy_daily_socks4_proxies_success(self):
        with open(os.path.join(cwd, 'mock_pages', 'proxy-daily-proxy.html'), 'r') as html:
            response = Mock()
            response.content = html
            response.ok = True
            self.requests.get = lambda url: response

            expected = {
                Proxy('54.38.156.185', '8888', None, None, None, 'socks4', 'proxy-daily-socks4'),
                Proxy('194.85.174.74', '1080', None, None, None, 'socks4', 'proxy-daily-socks4'),
                Proxy('41.79.237.135', '1080', None, None, None, 'socks4', 'proxy-daily-socks4')
            }

            func = RESOURCE_MAP['proxy-daily-socks4']
            pr = ProxyResource(func, 10)

            _, proxies = pr.refresh()

            for proxy in proxies:
                self.assertIn(proxy, expected)
    def test_proxy_daily_http_proxies_success(self):
        with open(os.path.join(cwd, 'mock_pages', 'proxy-daily-proxy.html'), 'r') as html:
            response = Mock()
            response.content = html
            response.ok = True
            self.requests.get = lambda url: response

            expected = {
                Proxy('93.190.253.50', '80', None, None, None, 'http', 'proxy-daily-http'),
                Proxy('207.154.231.209', '3128', None, None, None, 'http', 'proxy-daily-http'),
                Proxy('88.255.101.177', '53281', None, None, None, 'http', 'proxy-daily-http')
            }

            func = RESOURCE_MAP['proxy-daily-http']
            pr = ProxyResource(func, 10)

            _, proxies = pr.refresh()

            for proxy in proxies:
                self.assertIn(proxy, expected)
    def test_remove_proxy_multiple(self):
        store_mock = Mock()
        store_mock.return_value = store_mock  # Ensure same instance when initialized
        store_mock.get_proxy.return_value = None
        ps.Store = store_mock

        proxy1 = Proxy('host1', 'port', 'code', 'country', 'anonymous', 'type',
                       'anonymous-proxy')
        proxy2 = Proxy('host2', 'port', 'code', 'country', 'anonymous', 'type',
                       'us-proxy')
        proxies = {proxy1, proxy2}

        collector = ps.Collector('http', 10, None)
        collector.remove_proxy(proxies)

        id = collector._resource_map['anonymous-proxy']['id']
        store_mock.remove_proxy.assert_any_call(id, proxy1)

        id = collector._resource_map['us-proxy']['id']
        store_mock.remove_proxy.assert_any_call(id, proxy2)
    def test_socks_proxy_proxies_success(self):
        with open(os.path.join(cwd, 'mock_pages', 'socks-proxy.html'), 'r') as html:
            response = Mock()
            response.content = html
            response.ok = True
            self.requests.get = lambda url: response

            expected = {
                Proxy('179.124.59.232', '53281', 'br', 'brazil', True, 'socks4', 'socks-proxy'),
                Proxy('200.107.59.98', '8080', 'ua', 'ukraine', True, 'socks5', 'socks-proxy'),
                Proxy('217.172.244.7', '8080', 'ru', 'russian federation', True, 'socks4', 'socks-proxy')
            }

            func = RESOURCE_MAP['socks-proxy']
            pr = ProxyResource(func, 10)

            _, proxies = pr.refresh()

            for proxy in proxies:
                self.assertIn(proxy, expected)
Beispiel #19
0
    def test_remove_proxy_removes_from_set(self):
        store = Store()
        id = store.add_store()
        proxy = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source')

        store.update_store(id, {proxy, })
        actual1 = store.get_proxy()

        store.remove_proxy(id, proxy)
        actual2 = store.get_proxy()

        self.assertEqual(actual1, proxy)
        self.assertIsNone(actual2)
Beispiel #20
0
    def test_update_store_clears_if_none(self):
        store = Store()
        id = store.add_store()
        proxy = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source')

        store.update_store(id, {proxy, })
        actual1 = store.get_proxy()

        store.update_store(id, None)
        actual2 = store.get_proxy()

        self.assertEqual(proxy, actual1)
        self.assertIsNone(actual2)
    def test_get_proxies_returns_empty_if_filtered(self):
        store = Store()
        id = store.add_store()
        proxy = Proxy('host', 'source', 'us', 'united states', True, 'type',
                      'source')

        store.update_store(id, {
            proxy,
        })
        actual = store.get_proxies(filter_opts={'code': {
            'uk',
        }})

        self.assertIsNone(actual)
    def test_remove_proxy_single(self):
        store_mock = Mock()
        store_mock.return_value = store_mock  # Ensure same instance when initialized
        store_mock.get_proxy.return_value = None
        ps.Store = store_mock

        proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type',
                      'anonymous-proxy')

        collector = ps.Collector('http', 10, None)
        collector.remove_proxy(proxy)

        id = collector._resource_map['anonymous-proxy']['id']
        store_mock.remove_proxy.assert_called_with(id, proxy)
    def test_refreshes_if_forced(self):
        expected = [Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source')]

        def func():
            return expected

        pr = ProxyResource(func, 5)

        refreshed, actual = pr.refresh()
        self.assertEqual(True, refreshed)
        self.assertEqual(expected[0], actual[0])

        refreshed, actual = pr.refresh(True)
        self.assertEqual(True, refreshed)
        self.assertEqual(expected[0], actual[0])
    def test_doesnt_refresh_if_lock_check(self):
        expected = [Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source')]

        def func():
            return expected

        pr = ProxyResource(func, 5)

        refreshed, actual = pr.refresh()
        self.assertEqual(True, refreshed)
        self.assertEqual(expected[0], actual[0])

        with patch('proxyscrape.scrapers.time') as time_mock:
            times = [time.time() + 10, -1, 0]
            time_iter = iter(times)
            time_mock.time = lambda: next(time_iter)

            refreshed, actual = pr.refresh()
            self.assertEqual(False, refreshed)
            self.assertIsNone(actual)