Ejemplo n.º 1
0
    async def process_job(self, job: JobRequest, session: ClientSession):
        remaining_time = 0
        loop = asyncio.get_event_loop()
        try:
            async with self.liveness_recorder.waiting_task(
                    job.key, round(time.time())):
                remaining_time = self.get_remaining_time(job)

                # Don't waste bandwidth and resources downloading files if we hit the rate limit
                if await self.rate_limit_aggregate.use(peek=True):
                    content = await asyncio.wait_for(self.download(
                        job, session),
                                                     timeout=remaining_time)
                    remaining_time = self.get_remaining_time(job)

                    # Double check there are still scans remaining, and also increment the account just before scanning
                    if await self.rate_limit_aggregate.use():
                        scan_result = await asyncio.wait_for(
                            self.scan(job, content), timeout=remaining_time)
                        response = JobResponse(job.index, scan_result.bit,
                                               scan_result.verdict,
                                               scan_result.confidence,
                                               scan_result.metadata)
                        loop.create_task(self.respond(job, response))
                    else:
                        self.rate_limit_respond(job)
                else:
                    self.rate_limit_respond(job)

                self.tries = 0
        except OSError:
            logger.exception('Redis connection down')
        except TimeoutError:
            logger.exception('Redis request timed out')
        except aioredis.errors.ReplyError:
            logger.exception('Redis out of memory')
        except ExpiredException:
            logger.exception('Received expired job',
                             extra={'extra': job.asdict()})
        except aiohttp.ClientResponseError:
            logger.exception('Error fetching artifact',
                             extra={'extra': job.asdict()})
        except DecodeError:
            logger.exception('Error Decoding artifact',
                             extra={'extra': job.asdict()})
        except ApiKeyException:
            logger.exception(
                'Refusing to send API key over insecure transport')
        except asyncio.TimeoutError:
            logger.exception('Timeout processing artifact after %s seconds',
                             remaining_time,
                             extra={'extra': job.asdict()})
        except asyncio.CancelledError:
            logger.exception('Worker shutdown while processing job',
                             extra={'extra': job.asdict()})
        finally:
            async with self.task_count_lock:
                self.current_task_count -= 1
                self.task_count_lock.notify()
Ejemplo n.º 2
0
    async def get_jobs(self) -> AsyncGenerator[JobRequest, None]:
        while not self.finished:
            try:
                with await self.redis as redis:
                    async with self.task_count_lock:
                        # Wait for task to complete, if at maximum
                        if 0 < self.max_task_count <= self.current_task_count:
                            await self.task_count_lock.wait()

                        job = await redis.rpop(self.queue)
                        if not job:
                            continue

                        job = json.loads(job.decode('utf-8'))
                        logger.info('Received job', extra={'extra': job})
                        self.current_task_count += 1
                    yield JobRequest(**job)
            except OSError:
                logger.exception('Redis connection down')
            except TimeoutError:
                logger.exception('Redis request timed out')
            except aioredis.errors.ReplyError:
                logger.exception('Redis out of memory')
            except (TypeError, KeyError):
                logger.exception('Invalid job received, ignoring')
Ejemplo n.º 3
0
 async def process_job(self, job: JobRequest, session: ClientSession):
     remaining_time = 0
     try:
         await self.liveness_recorder.add_waiting_task(
             job.key, round(time.time()))
         remaining_time = self.get_remaining_time(job)
         content = await self.download(job, session)
         scan_result = await asyncio.wait_for(self.scan(job, content),
                                              timeout=remaining_time)
         response = JobResponse(job.index, scan_result.bit,
                                scan_result.verdict, scan_result.confidence,
                                scan_result.metadata)
         await self.respond(job, response)
         self.tries = 0
     except OSError:
         logger.exception('Redis connection down')
     except aioredis.errors.ReplyError:
         logger.exception('Redis out of memory')
     except ExpiredException:
         logger.exception(f'Received expired job',
                          extra={'extra': job.asdict()})
     except aiohttp.ClientResponseError:
         logger.exception(f'Error fetching artifact',
                          extra={'extra': job.asdict()})
     except DecodeError:
         logger.exception('Error Decoding artifact',
                          extra={'extra': job.asdict()})
     except ApiKeyException:
         logger.exception(
             'Refusing to send API key over insecure transport')
     except asyncio.TimeoutError:
         logger.exception(f'Timeout processing artifact after %s seconds',
                          remaining_time,
                          extra={'extra': job.asdict()})
     except asyncio.CancelledError:
         logger.exception(f'Worker shutdown while processing job',
                          extra={'extra': job.asdict()})
     finally:
         await self.liveness_recorder.remove_waiting_task(job.key)
         async with self.task_count_lock:
             self.current_task_count -= 1
             self.task_count_lock.notify()
async def test_redis_error_recovery(job_processor):
    reset_redis = asynctest.CoroutineMock()
    job_processor.reset_callback = reset_redis
    job = JobRequest('polyswarmd-addr', 'guid', 0, 'uri', ArtifactType.FILE.value, 1, None, 'side', int(time.time()))

    future = Future()
    job_processor.stop()
    job_processor.redis.close()
    await job_processor.redis.wait_closed()
    await job_processor.register_jobs('guid', 'test_redis_error_recovery', [job], future)
    await job_processor.fetch_results()

    reset_redis.assert_called()
async def test_results_after_timeout(redis_client, job_processor):
    # Add response before registering
    job_response = JobResponse(index=0, bit=True, verdict=False, confidence=.5, metadata='')
    await redis_client.rpush('test_results_after_timeout', json.dumps(job_response.asdict()))

    jobs = [JobRequest('polyswarmd-addr', 'guid', i, 'uri', ArtifactType.FILE.value, 1, None, 'side', int(time.time()))
            for i in range(1)]

    future = Future()
    await job_processor.register_jobs('guid', 'test_results_after_timeout', jobs, future)

    scan_results = await future

    assert len(scan_results) == 1
    assert scan_results[0].bit
async def test_results_after_complete(redis_client, job_processor):
    job = JobRequest('polyswarmd-addr', 'guid', 0, 'uri', ArtifactType.FILE.value, 10, None, 'side', int(time.time()))
    future = Future()
    await job_processor.register_jobs('guid', 'test_results_after_complete', [job], future)

    # Add response before waiting on the future
    job_response = JobResponse(index=0, bit=True, verdict=False, confidence=.5, metadata='')

    await redis_client.rpush('test_results_after_complete', json.dumps(job_response.asdict()))
    scan_results = await future

    assert scan_results
    assert scan_results[0].bit
    assert not scan_results[0].verdict
    assert scan_results[0].confidence == .5
async def test_result_values_in_redis(redis_client, job_processor):
    job = JobRequest('polyswarmd-addr', 'guid', 0, 'uri', ArtifactType.FILE.value, 10, None, 'side', int(time.time()))
    future = Future()
    await job_processor.register_jobs('guid', 'test_result_values_in_redis', [job], future)

    # Add response before waiting on the future
    job_response = JobResponse(index=0, bit=True, verdict=False, confidence=.5, metadata='')

    await asyncio.sleep(1)
    await redis_client.rpush('test_result_values_in_redis', json.dumps(job_response.asdict()))
    await future

    # wait for the loop to finish in job_processor
    await asyncio.sleep(1)

    assert float(await redis_client.get('QUEUE_job_completion_time_accum')) > 0
    assert int(await redis_client.get('QUEUE_job_completion_times_count')) == 1
    assert 0 < float(await redis_client.get('QUEUE_job_completion_time_ratios_accum')) < 1
    assert int(await redis_client.get('QUEUE_job_completion_time_ratios_count')) == 1
    assert int(await redis_client.get('QUEUE_scan_result_counter')) == 1
Ejemplo n.º 8
0
 async def scan(self, job: JobRequest, content: bytes) -> ScanResult:
     artifact_type = job.get_artifact_type()
     async with self.scan_semaphore:
         return await self.scanner.scan(
             job.guid, artifact_type, artifact_type.decode_content(content),
             job.metadata, job.chain)
 def create_job(i):
     return JobRequest('polyswarmd-addr', f'guid:{i}', 0, 'uri', ArtifactType.FILE.value, 19, None, 'side', int(time.time()))