def testInitWithSeconds(self): """Tests that sepcifying seconds sets the correct fields.""" self.requester = requester.Requester(None, None, None, 0.1, seconds=2) self.assertEqual(self.requester._time_between_requests, 0.1) self.assertEqual(self.requester._last_request_start_time, 0.0) self.assertEqual(self.requester._timedelta, 2) self.assertFalse(self.requester._use_requests_as_stop_signal)
def testShouldSendMoreRequestsStopsAfterTimeout(self): """Tests that ShouldSendMoreRequests stops after the given timeout value.""" self.requester = requester.Requester(MockGenerator(), None, None, 0.1, seconds=10) # Mock out _ShouldSendMoreRequests so that we can invoke Start to set the # start and stop time, while otherwise being a no-op. should_send_more_requests = self.requester._ShouldSendMoreRequests self.requester._ShouldSendMoreRequests = MockMethod( '_ShouldSendMoreRequests', [()], [False]) current_time = time.time() # _GetCurrentTime will get invoked once in Start to set the start time, then # on each invocation of _ShouldSendMoreRequests. self.requester._GetCurrentTime = MockMethod('_GetCurrentTime', [ (), (), (), () ], [ current_time, current_time + 1, current_time + 9, current_time + 10 ]) # Sets _start_time and _stop_time. self.requester.Start() # Check that start and stop times are as expected. self.assertEqual(self.requester._start_time, current_time) self.assertEqual(self.requester._stop_time, current_time + 10) # Restore _ShouldSendMoreRequests so we can test it. self.requester._ShouldSendMoreRequests = should_send_more_requests # 1 second has passed. self.assertTrue(self.requester._ShouldSendMoreRequests()) # 9 seconds has passed. self.assertTrue(self.requester._ShouldSendMoreRequests()) # 10 seconds has passed. self.assertFalse(self.requester._ShouldSendMoreRequests())
def testInitWithRequests(self): """Tests that specifying requests sets the correct fields.""" self.requester = requester.Requester(None, None, None, 0.1, requests=2) self.assertEqual(self.requester._time_between_requests, 0.1) self.assertEqual(self.requester._last_request_start_time, 0.0) self.assertEqual(self.requester._max_requests, 2) self.assertTrue(self.requester._use_requests_as_stop_signal)
def testGenerateRequest(self): generator = MockGenerator() self.requester = requester.Requester(generator, None, None, 0.1, requests=2) self.assertEqual(0, self.requester._generated_requests) request = self.requester._GenerateRequest() self.assertEqual(1, self.requester._generated_requests) self.assertEqual(generator.request, request)
def build_domain(self, url): """ :param domaininfo: :return: """ d = parse(url, rule='IRI') li = d['authority'].split('.') domain = li[len(li) - 2] self.is_valid(domain) module = self.my_import('domains.' + domain) self.fetcher = module.Fetcher(requester.Requester())
def testShouldSendMoreRequestsStopsOnMaxRequests(self): """Tests that ShouldSendMoreRequests stops at the maximum request count.""" generator = MockGenerator() self.requester = requester.Requester(generator, None, None, 0.1, requests=2) # 0 requests sent. self.assertTrue(self.requester._ShouldSendMoreRequests()) self.requester._GenerateRequest() # 1 requests sent. self.assertTrue(self.requester._ShouldSendMoreRequests()) self.requester._GenerateRequest() # 2 requests sent, we ignore the lock since no multi-threading is going on. self.assertFalse(self.requester._ShouldSendMoreRequests())
def testWait(self): """Tests that _Wait sleeps for the correct amount of time.""" time_to_wait = 0.1 self.requester = requester.Requester(None, None, None, time_to_wait, seconds=10) # current time = 1.0, last_request_start_time = 0.0 (i.e. first request), # time_to_wait = 0.1. # Wait shouldn't call _GetCurrentTime because this is the first request. # Wait should call sleep with time 0.1. time.sleep = MockMethod('sleep', [(0.1, )], [None]) self.requester._GetCurrentTime = MockMethod('_GetCurrentTime', [], []) self.requester._Wait() self.assertEqual(1, time.sleep._call_count) # current time = 1.04, last_request_start_time = 1.01, time_to_wait = 0.1. # Wait should call _GetCurrentTime once to calculate time to wait. # Wait should call sleep with time of 0.1 - (1.04 - 1.01) = 0.07. last_request_time = 1.01 self.requester._last_request_start_time = last_request_time current_time = 1.04 self.requester._GetCurrentTime = MockMethod('_GetCurrentTime', [()], [current_time]) time.sleep = MockMethod( 'sleep', [(time_to_wait - (current_time - last_request_time), )], [None]) self.requester._Wait() self.assertEqual(1, time.sleep._call_count) self.assertEqual(1, self.requester._GetCurrentTime._call_count) # Current time = 1.12, last_request_time = 1.01, time_to_wait = 0.1. # Wait should call _GetCurrentTime once to calculate time to wait. # Wait should not call sleep since too much time has elapsed. current_time = 1.12 self.requester._GetCurrentTime = MockMethod('_GetCurrentTime', [()], [current_time]) time.sleep = MockMethod('sleep', [], []) self.requester._Wait() self.assertEqual(0, time.sleep._call_count) self.assertEqual(1, self.requester._GetCurrentTime._call_count)
def testWaitsBetweenRequestInvocations(self): """Tests that Requester waits between requests.""" generator = MockGenerator() logger = log.Logger() self.requester = requester.Requester(generator, logger, None, 0.1, seconds=10) # Send two requests. self.requester._ShouldSendMoreRequests = MockMethod( '_ShouldSendMoreRequests', [(), (), ()], [True, True, False]) # Wait should get called twice. self.requester._Wait = MockMethod('_Wait', [(), ()], [None, None]) request_payload = generator.request.SerializeToString() self.requester._sender = MockMethod( '_sender', [(request_payload,), (request_payload,)], [(1, 'return value'), (1, 'return value')]) self.requester.Start() self.assertEqual(2, self.requester._Wait._call_count)
print(name) time.sleep(randint(3, 10)) # print(url) resp = req(url) if resp: with open('images/' + name, 'wb') as outfile: outfile.write(resp.content) else: print('BADBADBADBAD') data = pandas.read_excel('Pots.xlsx') data.reset_index(inplace=True, drop=True) req = requester.Requester() bad = [] allList = [] for i, row in data.iterrows(): pics = row['Pics'].replace('[', '').replace(']', '').replace("'", '').split(', ') pot, *_ = row['Text'].split(',') for pic in pics: *_, picName, _ = pic.split('/') picName = '%s__%s.jpg' % (pot, picName) allList.append([picName, pic]) dSet = {x.replace('.jpg', '') for x in os.listdir('images')}
"headers" : [], "cache_dir" : "/var/mod_dsrc_cache", "azure_containers" : [ { "account_name" : "", "account_key" : "", "container" : "", "compression" : True, "decompression" : False , } ], } ################################################## queue = Queue.Queue() _obj_watcher = watcher.Watcher(_config_watcher, True) _obj_request = requester.Requester(_config_request, True) while True: _obj_watcher.set_test_loop(1) _obj_watcher.worker_init(queue) _obj_request.set_test_loop(7) _obj_request.worker_init(queue) queue.join()