async def process_job(self, job: JobRequest, session: ClientSession): remaining_time = 0 loop = asyncio.get_event_loop() try: async with self.liveness_recorder.waiting_task( job.key, round(time.time())): remaining_time = self.get_remaining_time(job) # Don't waste bandwidth and resources downloading files if we hit the rate limit if await self.rate_limit_aggregate.use(peek=True): content = await asyncio.wait_for(self.download( job, session), timeout=remaining_time) remaining_time = self.get_remaining_time(job) # Double check there are still scans remaining, and also increment the account just before scanning if await self.rate_limit_aggregate.use(): scan_result = await asyncio.wait_for( self.scan(job, content), timeout=remaining_time) response = JobResponse(job.index, scan_result.bit, scan_result.verdict, scan_result.confidence, scan_result.metadata) loop.create_task(self.respond(job, response)) else: self.rate_limit_respond(job) else: self.rate_limit_respond(job) self.tries = 0 except OSError: logger.exception('Redis connection down') except TimeoutError: logger.exception('Redis request timed out') except aioredis.errors.ReplyError: logger.exception('Redis out of memory') except ExpiredException: logger.exception('Received expired job', extra={'extra': job.asdict()}) except aiohttp.ClientResponseError: logger.exception('Error fetching artifact', extra={'extra': job.asdict()}) except DecodeError: logger.exception('Error Decoding artifact', extra={'extra': job.asdict()}) except ApiKeyException: logger.exception( 'Refusing to send API key over insecure transport') except asyncio.TimeoutError: logger.exception('Timeout processing artifact after %s seconds', remaining_time, extra={'extra': job.asdict()}) except asyncio.CancelledError: logger.exception('Worker shutdown while processing job', extra={'extra': job.asdict()}) finally: async with self.task_count_lock: self.current_task_count -= 1 self.task_count_lock.notify()
async def get_jobs(self) -> AsyncGenerator[JobRequest, None]: while not self.finished: try: with await self.redis as redis: async with self.task_count_lock: # Wait for task to complete, if at maximum if 0 < self.max_task_count <= self.current_task_count: await self.task_count_lock.wait() job = await redis.rpop(self.queue) if not job: continue job = json.loads(job.decode('utf-8')) logger.info('Received job', extra={'extra': job}) self.current_task_count += 1 yield JobRequest(**job) except OSError: logger.exception('Redis connection down') except TimeoutError: logger.exception('Redis request timed out') except aioredis.errors.ReplyError: logger.exception('Redis out of memory') except (TypeError, KeyError): logger.exception('Invalid job received, ignoring')
async def process_job(self, job: JobRequest, session: ClientSession): remaining_time = 0 try: await self.liveness_recorder.add_waiting_task( job.key, round(time.time())) remaining_time = self.get_remaining_time(job) content = await self.download(job, session) scan_result = await asyncio.wait_for(self.scan(job, content), timeout=remaining_time) response = JobResponse(job.index, scan_result.bit, scan_result.verdict, scan_result.confidence, scan_result.metadata) await self.respond(job, response) self.tries = 0 except OSError: logger.exception('Redis connection down') except aioredis.errors.ReplyError: logger.exception('Redis out of memory') except ExpiredException: logger.exception(f'Received expired job', extra={'extra': job.asdict()}) except aiohttp.ClientResponseError: logger.exception(f'Error fetching artifact', extra={'extra': job.asdict()}) except DecodeError: logger.exception('Error Decoding artifact', extra={'extra': job.asdict()}) except ApiKeyException: logger.exception( 'Refusing to send API key over insecure transport') except asyncio.TimeoutError: logger.exception(f'Timeout processing artifact after %s seconds', remaining_time, extra={'extra': job.asdict()}) except asyncio.CancelledError: logger.exception(f'Worker shutdown while processing job', extra={'extra': job.asdict()}) finally: await self.liveness_recorder.remove_waiting_task(job.key) async with self.task_count_lock: self.current_task_count -= 1 self.task_count_lock.notify()
async def test_redis_error_recovery(job_processor): reset_redis = asynctest.CoroutineMock() job_processor.reset_callback = reset_redis job = JobRequest('polyswarmd-addr', 'guid', 0, 'uri', ArtifactType.FILE.value, 1, None, 'side', int(time.time())) future = Future() job_processor.stop() job_processor.redis.close() await job_processor.redis.wait_closed() await job_processor.register_jobs('guid', 'test_redis_error_recovery', [job], future) await job_processor.fetch_results() reset_redis.assert_called()
async def test_results_after_timeout(redis_client, job_processor): # Add response before registering job_response = JobResponse(index=0, bit=True, verdict=False, confidence=.5, metadata='') await redis_client.rpush('test_results_after_timeout', json.dumps(job_response.asdict())) jobs = [JobRequest('polyswarmd-addr', 'guid', i, 'uri', ArtifactType.FILE.value, 1, None, 'side', int(time.time())) for i in range(1)] future = Future() await job_processor.register_jobs('guid', 'test_results_after_timeout', jobs, future) scan_results = await future assert len(scan_results) == 1 assert scan_results[0].bit
async def test_results_after_complete(redis_client, job_processor): job = JobRequest('polyswarmd-addr', 'guid', 0, 'uri', ArtifactType.FILE.value, 10, None, 'side', int(time.time())) future = Future() await job_processor.register_jobs('guid', 'test_results_after_complete', [job], future) # Add response before waiting on the future job_response = JobResponse(index=0, bit=True, verdict=False, confidence=.5, metadata='') await redis_client.rpush('test_results_after_complete', json.dumps(job_response.asdict())) scan_results = await future assert scan_results assert scan_results[0].bit assert not scan_results[0].verdict assert scan_results[0].confidence == .5
async def test_result_values_in_redis(redis_client, job_processor): job = JobRequest('polyswarmd-addr', 'guid', 0, 'uri', ArtifactType.FILE.value, 10, None, 'side', int(time.time())) future = Future() await job_processor.register_jobs('guid', 'test_result_values_in_redis', [job], future) # Add response before waiting on the future job_response = JobResponse(index=0, bit=True, verdict=False, confidence=.5, metadata='') await asyncio.sleep(1) await redis_client.rpush('test_result_values_in_redis', json.dumps(job_response.asdict())) await future # wait for the loop to finish in job_processor await asyncio.sleep(1) assert float(await redis_client.get('QUEUE_job_completion_time_accum')) > 0 assert int(await redis_client.get('QUEUE_job_completion_times_count')) == 1 assert 0 < float(await redis_client.get('QUEUE_job_completion_time_ratios_accum')) < 1 assert int(await redis_client.get('QUEUE_job_completion_time_ratios_count')) == 1 assert int(await redis_client.get('QUEUE_scan_result_counter')) == 1
async def scan(self, job: JobRequest, content: bytes) -> ScanResult: artifact_type = job.get_artifact_type() async with self.scan_semaphore: return await self.scanner.scan( job.guid, artifact_type, artifact_type.decode_content(content), job.metadata, job.chain)
def create_job(i): return JobRequest('polyswarmd-addr', f'guid:{i}', 0, 'uri', ArtifactType.FILE.value, 19, None, 'side', int(time.time()))