async def respond(self, job: JobRequest, response: JobResponse): logger.info('Scan results for job %s', job.key, extra={'extra': response.asdict()}) key = f'{self.queue}_{job.guid}_{job.chain}_results' with await self.redis as redis: await redis.rpush(key, json.dumps(response.asdict()))
async def respond(self, job: JobRequest, response: JobResponse): try: logger.info('Scan results for job %s', job.key, extra={'extra': response.asdict()}) key = f'{self.queue}_{job.guid}_{job.chain}_results' await self.redis.rpush(key, json.dumps(response.asdict())) except OSError: logger.exception('Error pushing results for %s', job.guid) raise
async def test_results_after_timeout(redis_client, job_processor): # Add response before registering job_response = JobResponse(index=0, bit=True, verdict=False, confidence=.5, metadata='') await redis_client.rpush('test_results_after_timeout', json.dumps(job_response.asdict())) jobs = [JobRequest('polyswarmd-addr', 'guid', i, 'uri', ArtifactType.FILE.value, 1, None, 'side', int(time.time())) for i in range(1)] future = Future() await job_processor.register_jobs('guid', 'test_results_after_timeout', jobs, future) scan_results = await future assert len(scan_results) == 1 assert scan_results[0].bit
async def test_results_after_complete(redis_client, job_processor): job = JobRequest('polyswarmd-addr', 'guid', 0, 'uri', ArtifactType.FILE.value, 10, None, 'side', int(time.time())) future = Future() await job_processor.register_jobs('guid', 'test_results_after_complete', [job], future) # Add response before waiting on the future job_response = JobResponse(index=0, bit=True, verdict=False, confidence=.5, metadata='') await redis_client.rpush('test_results_after_complete', json.dumps(job_response.asdict())) scan_results = await future assert scan_results assert scan_results[0].bit assert not scan_results[0].verdict assert scan_results[0].confidence == .5
async def process_job(self, job: JobRequest, session: ClientSession): remaining_time = 0 loop = asyncio.get_event_loop() try: async with self.liveness_recorder.waiting_task( job.key, round(time.time())): remaining_time = self.get_remaining_time(job) # Don't waste bandwidth and resources downloading files if we hit the rate limit if await self.rate_limit_aggregate.use(peek=True): content = await asyncio.wait_for(self.download( job, session), timeout=remaining_time) remaining_time = self.get_remaining_time(job) # Double check there are still scans remaining, and also increment the account just before scanning if await self.rate_limit_aggregate.use(): scan_result = await asyncio.wait_for( self.scan(job, content), timeout=remaining_time) response = JobResponse(job.index, scan_result.bit, scan_result.verdict, scan_result.confidence, scan_result.metadata) loop.create_task(self.respond(job, response)) else: self.rate_limit_respond(job) else: self.rate_limit_respond(job) self.tries = 0 except OSError: logger.exception('Redis connection down') except TimeoutError: logger.exception('Redis request timed out') except aioredis.errors.ReplyError: logger.exception('Redis out of memory') except ExpiredException: logger.exception('Received expired job', extra={'extra': job.asdict()}) except aiohttp.ClientResponseError: logger.exception('Error fetching artifact', extra={'extra': job.asdict()}) except DecodeError: logger.exception('Error Decoding artifact', extra={'extra': job.asdict()}) except ApiKeyException: logger.exception( 'Refusing to send API key over insecure transport') except asyncio.TimeoutError: logger.exception('Timeout processing artifact after %s seconds', remaining_time, extra={'extra': job.asdict()}) except asyncio.CancelledError: logger.exception('Worker shutdown while processing job', extra={'extra': job.asdict()}) finally: async with self.task_count_lock: self.current_task_count -= 1 self.task_count_lock.notify()
async def test_result_values_in_redis(redis_client, job_processor): job = JobRequest('polyswarmd-addr', 'guid', 0, 'uri', ArtifactType.FILE.value, 10, None, 'side', int(time.time())) future = Future() await job_processor.register_jobs('guid', 'test_result_values_in_redis', [job], future) # Add response before waiting on the future job_response = JobResponse(index=0, bit=True, verdict=False, confidence=.5, metadata='') await asyncio.sleep(1) await redis_client.rpush('test_result_values_in_redis', json.dumps(job_response.asdict())) await future # wait for the loop to finish in job_processor await asyncio.sleep(1) assert float(await redis_client.get('QUEUE_job_completion_time_accum')) > 0 assert int(await redis_client.get('QUEUE_job_completion_times_count')) == 1 assert 0 < float(await redis_client.get('QUEUE_job_completion_time_ratios_accum')) < 1 assert int(await redis_client.get('QUEUE_job_completion_time_ratios_count')) == 1 assert int(await redis_client.get('QUEUE_scan_result_counter')) == 1
async def test_results_thousands_pending_jobs(redis_client, job_processor): count = 10_000 futures = [] def create_job(i): return JobRequest('polyswarmd-addr', f'guid:{i}', 0, 'uri', ArtifactType.FILE.value, 19, None, 'side', int(time.time())) def create_future(): future = Future() futures.append(future) return future await asyncio.gather(*[job_processor.register_jobs(f'guid:{i}', f'test_results_thousands_pending_jobs:{i}', [create_job(i)], create_future()) for i in range(count)]) job_response = JobResponse(index=0, bit=True, verdict=False, confidence=.5, metadata='') await asyncio.gather(*[redis_client.rpush(f'test_results_thousands_pending_jobs:{i}', json.dumps(job_response.asdict())) for i in range(count)]) aggregated_scan_results = await asyncio.gather(*futures) assert len(aggregated_scan_results) == count assert all([scan_results[0].bit for scan_results in aggregated_scan_results])
async def process_job(self, job: JobRequest, session: ClientSession): remaining_time = 0 try: await self.liveness_recorder.add_waiting_task( job.key, round(time.time())) remaining_time = self.get_remaining_time(job) content = await self.download(job, session) scan_result = await asyncio.wait_for(self.scan(job, content), timeout=remaining_time) response = JobResponse(job.index, scan_result.bit, scan_result.verdict, scan_result.confidence, scan_result.metadata) await self.respond(job, response) self.tries = 0 except OSError: logger.exception('Redis connection down') except aioredis.errors.ReplyError: logger.exception('Redis out of memory') except ExpiredException: logger.exception(f'Received expired job', extra={'extra': job.asdict()}) except aiohttp.ClientResponseError: logger.exception(f'Error fetching artifact', extra={'extra': job.asdict()}) except DecodeError: logger.exception('Error Decoding artifact', extra={'extra': job.asdict()}) except ApiKeyException: logger.exception( 'Refusing to send API key over insecure transport') except asyncio.TimeoutError: logger.exception(f'Timeout processing artifact after %s seconds', remaining_time, extra={'extra': job.asdict()}) except asyncio.CancelledError: logger.exception(f'Worker shutdown while processing job', extra={'extra': job.asdict()}) finally: await self.liveness_recorder.remove_waiting_task(job.key) async with self.task_count_lock: self.current_task_count -= 1 self.task_count_lock.notify()
def rate_limit_respond(self, job: JobRequest): loop = asyncio.get_event_loop() blank = ScanResult() response = JobResponse(job.index, blank.bit, blank.verdict, blank.confidence, blank.metadata) loop.create_task(self.respond(job, response))