def test_get_proxies_with_filter(self): proxy1 = Proxy('host1', 'port1', 'code', 'country', 'anonymous', 'type', 'source') proxy2 = Proxy('host2', 'por2', 'code', 'country', 'anonymous', 'type', 'source') proxies = {proxy1, proxy2} store_mock = Mock() store_mock.return_value = store_mock # Ensure same instance when initialized store_mock.get_proxies.return_value = [proxy1, proxy2] proxy_resource_mock = Mock() proxy_resource_mock.return_value = proxy_resource_mock # Ensure same instance when initialized proxy_resource_mock.refresh.return_value = True, proxies ps.Store = store_mock ps.ProxyResource = proxy_resource_mock collector = ps.Collector('http', 10, None) actual = collector.get_proxies({'code': 'us'}) for _, attrs in collector._resource_map.items(): store_mock.update_store.assert_called_with(attrs['id'], proxies) store_mock.get_proxies.assert_called_once_with({'code': { 'us', }}, collector._blacklist) for proxy in proxies: self.assertIn(proxy, actual)
def test_free_proxy_list_proxies_success(self): with open( os.path.join(cwd, 'mock_pages', 'free-proxy-list-proxy.html'), 'r') as html: response = Mock() response.content = html response.ok = True self.requests.get = lambda url: response expected = { Proxy('179.124.59.232', '53281', 'br', 'brazil', True, 'https', 'free-proxy-list'), Proxy('200.107.59.98', '8080', 'ua', 'ukraine', False, 'http', 'free-proxy-list'), Proxy('217.172.244.7', '8080', 'ru', 'russian federation', True, 'http', 'free-proxy-list') } func = RESOURCE_MAP['free-proxy-list'] pr = ProxyResource(func, 10) _, proxies = pr.refresh() for proxy in proxies: self.assertIn(proxy, expected)
def test_proxyscrape_success(self): with open(os.path.join(cwd, 'mock_pages', 'proxyscrape.txt'), 'r') as html: response = Mock() response.text = html.read() response.ok = True self.requests.get = lambda url: response resource_name = get_proxyscrape_resource() expected = { Proxy('179.124.59.232', '53281', None, None, False, None, resource_name), Proxy('200.107.59.98', '8080', None, None, False, None, resource_name), Proxy('217.172.244.7', '8080', None, None, False, None, resource_name) } func = pss.RESOURCE_MAP[resource_name] pr = ProxyResource(func, 10) _, proxies = pr.refresh() for proxy in proxies: self.assertIn(proxy, expected)
def test_remove_blacklist_multiple(self): collector = ps.Collector('http', 10, None) proxies = { Proxy('host1', 'port', 'code', 'country', 'anonymous', 'type', 'source'), Proxy('host2', 'port', 'code', 'country', 'anonymous', 'type', 'source') } collector.blacklist_proxy(proxies) collector.remove_blacklist(proxies) self.assertEqual(0, len(collector._blacklist))
def test_remove_blacklist_removes_correct_proxy(self): collector = ps.Collector('http', 10, None) proxy1 = Proxy('host1', 'port', 'code', 'country', 'anonymous', 'type', 'source') proxy2 = Proxy('host2', 'port', 'code', 'country', 'anonymous', 'type', 'source') proxies = {proxy1, proxy2} collector.blacklist_proxy(proxies) collector.remove_blacklist(proxy1) self.assertEqual(1, len(collector._blacklist)) self.assertIn((proxy2[0], proxy2[1]), collector._blacklist)
def test_blacklist_proxy_multiple(self): collector = ps.Collector('http', 10, None) proxies = { Proxy('host1', 'port', 'code', 'country', 'anonymous', 'type', 'source'), Proxy('host2', 'port', 'code', 'country', 'anonymous', 'type', 'source') } collector.blacklist_proxy(proxies) for proxy in proxies: self.assertIn((proxy[0], proxy[1]), collector._blacklist)
def test_refresh_proxies_with_no_force(self): proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source') proxies = { proxy, } store_mock = Mock() store_mock.return_value = store_mock # Ensure same instance when initialized store_mock.get_proxy.return_value = None proxy_resource_mock = Mock() proxy_resource_mock.return_value = proxy_resource_mock # Ensure same instance when initialized proxy_resource_mock.refresh.return_value = True, proxies ps.Store = store_mock ps.ProxyResource = proxy_resource_mock collector = ps.Collector('http', 10, None) collector.refresh_proxies(False) proxy_resource_mock.refresh.assert_called_with(False) for _, attrs in collector._resource_map.items(): store_mock.update_store.assert_called_with(attrs['id'], proxies)
def test_remove_proxy_exception_if_invalid_resource_type(self): proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'invalid-source') collector = ps.Collector('http', 10, None) with self.assertRaises(InvalidResourceTypeError): collector.remove_proxy(proxy)
def test_get_proxy_no_filter(self): proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source') proxies = { proxy, } store_mock = Mock() store_mock.return_value = store_mock # Ensure same instance when initialized store_mock.get_proxy.return_value = proxy proxy_resource_mock = Mock() proxy_resource_mock.return_value = proxy_resource_mock # Ensure same instance when initialized proxy_resource_mock.refresh.return_value = True, proxies ps.Store = store_mock ps.ProxyResource = proxy_resource_mock collector = ps.Collector('http', 10, None) actual = collector.get_proxy() for _, attrs in collector._resource_map.items(): store_mock.update_store.assert_called_with(attrs['id'], proxies) store_mock.get_proxy.assert_called_once_with({}, collector._blacklist) self.assertEqual(proxy, actual)
def test_blacklist_proxy_single(self): collector = ps.Collector('http', 10, None) proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source') collector.blacklist_proxy(proxy) self.assertEqual((proxy[0], proxy[1]), collector._blacklist.pop())
def test_remove_blacklist_single_with_host_and_port(self): collector = ps.Collector('http', 10, None) proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source') collector.blacklist_proxy(proxy) collector.remove_blacklist(host='host', port='port') self.assertEqual(0, len(collector._blacklist))
def test_clear_blacklist_clears_correctly(self): collector = ps.Collector('http', 10, None) proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source') collector.blacklist_proxy(proxy) collector.clear_blacklist() self.assertSetEqual(set(), collector._blacklist)
def test_remove_proxy_multiple(self): store_mock = Mock() store_mock.return_value = store_mock # Ensure same instance when initialized store_mock.get_proxy.return_value = None ps.Store = store_mock proxy1 = Proxy('host1', 'port', 'code', 'country', 'anonymous', 'type', 'anonymous-proxy') proxy2 = Proxy('host2', 'port', 'code', 'country', 'anonymous', 'type', 'us-proxy') proxies = {proxy1, proxy2} collector = ps.Collector('http', 10, None) collector.remove_proxy(proxies) id = collector._resource_map['anonymous-proxy']['id'] store_mock.remove_proxy.assert_any_call(id, proxy1) id = collector._resource_map['us-proxy']['id'] store_mock.remove_proxy.assert_any_call(id, proxy2)
def test_remove_proxy_single(self): store_mock = Mock() store_mock.return_value = store_mock # Ensure same instance when initialized store_mock.get_proxy.return_value = None ps.Store = store_mock proxy = Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'anonymous-proxy') collector = ps.Collector('http', 10, None) collector.remove_proxy(proxy) id = collector._resource_map['anonymous-proxy']['id'] store_mock.remove_proxy.assert_called_with(id, proxy)
def test_proxy_daily_http_proxies_success(self): with open(os.path.join(cwd, 'mock_pages', 'proxy-daily-proxy.html'), 'r') as html: response = Mock() response.content = html response.ok = True self.requests.get = lambda url: response expected = { Proxy('93.190.253.50', '80', None, None, None, 'http', 'proxy-daily-http'), Proxy('207.154.231.209', '3128', None, None, None, 'http', 'proxy-daily-http'), Proxy('88.255.101.177', '53281', None, None, None, 'http', 'proxy-daily-http') } func = RESOURCE_MAP['proxy-daily-http'] pr = ProxyResource(func, 10) _, proxies = pr.refresh() for proxy in proxies: self.assertIn(proxy, expected)
def test_proxy_daily_socks4_proxies_success(self): with open(os.path.join(cwd, 'mock_pages', 'proxy-daily-proxy.html'), 'r') as html: response = Mock() response.content = html response.ok = True self.requests.get = lambda url: response expected = { Proxy('54.38.156.185', '8888', None, None, None, 'socks4', 'proxy-daily-socks4'), Proxy('194.85.174.74', '1080', None, None, None, 'socks4', 'proxy-daily-socks4'), Proxy('41.79.237.135', '1080', None, None, None, 'socks4', 'proxy-daily-socks4') } func = RESOURCE_MAP['proxy-daily-socks4'] pr = ProxyResource(func, 10) _, proxies = pr.refresh() for proxy in proxies: self.assertIn(proxy, expected)
def test_proxy_daily_socks5_proxies_success(self): with open(os.path.join(cwd, 'mock_pages', 'proxy-daily-proxy.html'), 'r') as html: response = Mock() response.content = html response.ok = True self.requests.get = lambda url: response expected = { Proxy('176.9.19.170', '1080', None, None, None, 'socks5', 'proxy-daily-socks5'), Proxy('188.26.83.105', '1080', None, None, None, 'socks5', 'proxy-daily-socks5'), Proxy('150.129.151.44', '6667', None, None, None, 'socks5', 'proxy-daily-socks5') } func = RESOURCE_MAP['proxy-daily-socks5'] pr = ProxyResource(func, 10) _, proxies = pr.refresh() for proxy in proxies: self.assertIn(proxy, expected)
def test_us_proxies_success(self): with open(os.path.join(cwd, 'mock_pages', 'us-proxy.html'), 'r') as html: response = Mock() response.content = html response.ok = True self.requests.get = lambda url: response expected = { Proxy('179.124.59.232', '53281', 'us', 'united states', True, 'https', 'us-proxy'), Proxy('200.107.59.98', '8080', 'us', 'united states', True, 'http', 'us-proxy'), Proxy('217.172.244.7', '8080', 'us', 'united states', False, 'http', 'us-proxy') } func = RESOURCE_MAP['us-proxy'] pr = ProxyResource(func, 10) _, proxies = pr.refresh() for proxy in proxies: self.assertIn(proxy, expected)
def test_doesnt_refresh_if_not_expired(self): expected = [ Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source') ] def func(): return expected pr = ProxyResource(func, 5) refreshed, actual = pr.refresh() self.assertEqual(True, refreshed) self.assertEqual(expected[0], actual[0]) refreshed, actual = pr.refresh() self.assertEqual(False, refreshed) self.assertEqual(None, actual)
def test_doesnt_refresh_if_lock_check(self): expected = [ Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source') ] def func(): return expected pr = ProxyResource(func, 5) refreshed, actual = pr.refresh() self.assertEqual(True, refreshed) self.assertEqual(expected[0], actual[0]) with patch('proxyscrape.scrapers.time') as time_mock: times = [time.time() + 10, -1, 0] time_iter = iter(times) time_mock.time = lambda: next(time_iter) refreshed, actual = pr.refresh() self.assertEqual(False, refreshed) self.assertIsNone(actual)