def test_blacklist_proxy_multiple(self): collector = ps.Collector('http', 10, None) proxies = { Proxy('host1', 'port', 'code', 'country', 'anonymous', 'type', 'source'), Proxy('host2', 'port', 'code', 'country', 'anonymous', 'type', 'source') } collector.blacklist_proxy(proxies) for proxy in proxies: self.assertIn(proxy, collector._blacklist)
def test_refresh_proxies_with_no_force(self): proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source') proxies = { proxy, } store_mock = Mock() store_mock.return_value = store_mock # Ensure same instance when initialized store_mock.get_proxy.return_value = None proxy_resource_mock = Mock() proxy_resource_mock.return_value = proxy_resource_mock # Ensure same instance when initialized proxy_resource_mock.refresh.return_value = True, proxies ps.Store = store_mock ps.ProxyResource = proxy_resource_mock collector = ps.Collector('http', 10, None) collector.refresh_proxies(False) proxy_resource_mock.refresh.assert_called_with(False) for _, attrs in collector._resource_map.items(): store_mock.update_store.assert_called_with(attrs['id'], proxies)
def test_update_store_invalid_id_does_nothing(self): store = Store() proxy = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source') store.update_store(1, proxy) proxy = store.get_proxy() self.assertIsNone(proxy)
def test_blacklist_proxy_single(self): collector = ps.Collector('http', 10, None) proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source') collector.blacklist_proxy(proxy) self.assertEqual(proxy, collector._blacklist.pop())
def test_remove_proxy_exception_if_invalid_resource_type(self): proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'invalid-source') collector = ps.Collector('http', 10, None) with self.assertRaises(InvalidResourceTypeError): collector.remove_proxy(proxy)
def test_get_proxy_with_filter(self): proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source') proxies = { proxy, } store_mock = Mock() store_mock.return_value = store_mock # Ensure same instance when initialized store_mock.get_proxy.return_value = proxy proxy_resource_mock = Mock() proxy_resource_mock.return_value = proxy_resource_mock # Ensure same instance when initialized proxy_resource_mock.refresh.return_value = True, proxies ps.Store = store_mock ps.ProxyResource = proxy_resource_mock collector = ps.Collector('http', 10, None) actual = collector.get_proxy({'code': 'us'}) for _, attrs in collector._resource_map.items(): store_mock.update_store.assert_called_with(attrs['id'], proxies) store_mock.get_proxy.assert_called_once_with( { 'type': { 'http', }, 'code': { 'us', } }, collector._blacklist) self.assertEqual(proxy, actual)
def test_get_proxy_returns_proxy_if_any(self): store = Store() id = store.add_store() expected = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source') store.update_store(id, {expected, }) actual = store.get_proxy() self.assertEqual(expected, actual)
def test_get_proxy_returns_proxy_if_not_filtered(self): store = Store() id = store.add_store() proxy = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source') store.update_store(id, {proxy, }) actual = store.get_proxy(filter_opts={'code': {'us', }}) self.assertEqual(actual, proxy)
def test_get_proxy_returns_empty_if_blacklisted(self): store = Store() id = store.add_store() proxy = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source') store.update_store(id, {proxy, }) actual = store.get_proxy(blacklist={(proxy[0], proxy[1]), }) self.assertIsNone(actual)
def test_update_store_updates_proxies(self): store = Store() id = store.add_store() proxy = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source') store.update_store(id, {proxy, }) actual = store.get_proxy() self.assertEqual(proxy, actual)
def test_clear_blacklist_clears_correctly(self): collector = ps.Collector('http', 10, None) proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source') collector.blacklist_proxy(proxy) collector.clear_blacklist() self.assertSetEqual(set(), collector._blacklist)
def test_get_proxy_returns_empty_if_filtered_and_blacklisted(self): store = Store() id = store.add_store() proxy = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source') store.update_store(id, {proxy, }) actual = store.get_proxy(filter_opts={'country': {'uk', }}, blacklist={(proxy[0], proxy[1]), }) self.assertIsNone(actual)
def test_us_proxies_success(self): with open(os.path.join(cwd, 'mock_pages', 'us-proxy.html'), 'r') as html: response = Mock() response.content = html response.ok = True self.requests.get = lambda url: response expected = { Proxy('179.124.59.232', '53281', 'us', 'united states', True, 'https', 'us-proxy'), Proxy('200.107.59.98', '8080', 'us', 'united states', True, 'http', 'us-proxy'), Proxy('217.172.244.7', '8080', 'us', 'united states', False, 'http', 'us-proxy') } func = RESOURCE_MAP['us-proxy'] pr = ProxyResource(func, 10) _, proxies = pr.refresh() for proxy in proxies: self.assertIn(proxy, expected)
def test_proxy_daily_socks5_proxies_success(self): with open(os.path.join(cwd, 'mock_pages', 'proxy-daily-proxy.html'), 'r') as html: response = Mock() response.content = html response.ok = True self.requests.get = lambda url: response expected = { Proxy('176.9.19.170', '1080', None, None, None, 'socks5', 'proxy-daily-socks5'), Proxy('188.26.83.105', '1080', None, None, None, 'socks5', 'proxy-daily-socks5'), Proxy('150.129.151.44', '6667', None, None, None, 'socks5', 'proxy-daily-socks5') } func = RESOURCE_MAP['proxy-daily-socks5'] pr = ProxyResource(func, 10) _, proxies = pr.refresh() for proxy in proxies: self.assertIn(proxy, expected)
def test_proxy_daily_socks4_proxies_success(self): with open(os.path.join(cwd, 'mock_pages', 'proxy-daily-proxy.html'), 'r') as html: response = Mock() response.content = html response.ok = True self.requests.get = lambda url: response expected = { Proxy('54.38.156.185', '8888', None, None, None, 'socks4', 'proxy-daily-socks4'), Proxy('194.85.174.74', '1080', None, None, None, 'socks4', 'proxy-daily-socks4'), Proxy('41.79.237.135', '1080', None, None, None, 'socks4', 'proxy-daily-socks4') } func = RESOURCE_MAP['proxy-daily-socks4'] pr = ProxyResource(func, 10) _, proxies = pr.refresh() for proxy in proxies: self.assertIn(proxy, expected)
def test_proxy_daily_http_proxies_success(self): with open(os.path.join(cwd, 'mock_pages', 'proxy-daily-proxy.html'), 'r') as html: response = Mock() response.content = html response.ok = True self.requests.get = lambda url: response expected = { Proxy('93.190.253.50', '80', None, None, None, 'http', 'proxy-daily-http'), Proxy('207.154.231.209', '3128', None, None, None, 'http', 'proxy-daily-http'), Proxy('88.255.101.177', '53281', None, None, None, 'http', 'proxy-daily-http') } func = RESOURCE_MAP['proxy-daily-http'] pr = ProxyResource(func, 10) _, proxies = pr.refresh() for proxy in proxies: self.assertIn(proxy, expected)
def test_remove_proxy_multiple(self): store_mock = Mock() store_mock.return_value = store_mock # Ensure same instance when initialized store_mock.get_proxy.return_value = None ps.Store = store_mock proxy1 = Proxy('host1', 'port', 'code', 'country', 'anonymous', 'type', 'anonymous-proxy') proxy2 = Proxy('host2', 'port', 'code', 'country', 'anonymous', 'type', 'us-proxy') proxies = {proxy1, proxy2} collector = ps.Collector('http', 10, None) collector.remove_proxy(proxies) id = collector._resource_map['anonymous-proxy']['id'] store_mock.remove_proxy.assert_any_call(id, proxy1) id = collector._resource_map['us-proxy']['id'] store_mock.remove_proxy.assert_any_call(id, proxy2)
def test_socks_proxy_proxies_success(self): with open(os.path.join(cwd, 'mock_pages', 'socks-proxy.html'), 'r') as html: response = Mock() response.content = html response.ok = True self.requests.get = lambda url: response expected = { Proxy('179.124.59.232', '53281', 'br', 'brazil', True, 'socks4', 'socks-proxy'), Proxy('200.107.59.98', '8080', 'ua', 'ukraine', True, 'socks5', 'socks-proxy'), Proxy('217.172.244.7', '8080', 'ru', 'russian federation', True, 'socks4', 'socks-proxy') } func = RESOURCE_MAP['socks-proxy'] pr = ProxyResource(func, 10) _, proxies = pr.refresh() for proxy in proxies: self.assertIn(proxy, expected)
def test_remove_proxy_removes_from_set(self): store = Store() id = store.add_store() proxy = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source') store.update_store(id, {proxy, }) actual1 = store.get_proxy() store.remove_proxy(id, proxy) actual2 = store.get_proxy() self.assertEqual(actual1, proxy) self.assertIsNone(actual2)
def test_update_store_clears_if_none(self): store = Store() id = store.add_store() proxy = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source') store.update_store(id, {proxy, }) actual1 = store.get_proxy() store.update_store(id, None) actual2 = store.get_proxy() self.assertEqual(proxy, actual1) self.assertIsNone(actual2)
def test_get_proxies_returns_empty_if_filtered(self): store = Store() id = store.add_store() proxy = Proxy('host', 'source', 'us', 'united states', True, 'type', 'source') store.update_store(id, { proxy, }) actual = store.get_proxies(filter_opts={'code': { 'uk', }}) self.assertIsNone(actual)
def test_remove_proxy_single(self): store_mock = Mock() store_mock.return_value = store_mock # Ensure same instance when initialized store_mock.get_proxy.return_value = None ps.Store = store_mock proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'anonymous-proxy') collector = ps.Collector('http', 10, None) collector.remove_proxy(proxy) id = collector._resource_map['anonymous-proxy']['id'] store_mock.remove_proxy.assert_called_with(id, proxy)
def test_refreshes_if_forced(self): expected = [Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source')] def func(): return expected pr = ProxyResource(func, 5) refreshed, actual = pr.refresh() self.assertEqual(True, refreshed) self.assertEqual(expected[0], actual[0]) refreshed, actual = pr.refresh(True) self.assertEqual(True, refreshed) self.assertEqual(expected[0], actual[0])
def test_doesnt_refresh_if_lock_check(self): expected = [Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source')] def func(): return expected pr = ProxyResource(func, 5) refreshed, actual = pr.refresh() self.assertEqual(True, refreshed) self.assertEqual(expected[0], actual[0]) with patch('proxyscrape.scrapers.time') as time_mock: times = [time.time() + 10, -1, 0] time_iter = iter(times) time_mock.time = lambda: next(time_iter) refreshed, actual = pr.refresh() self.assertEqual(False, refreshed) self.assertIsNone(actual)