class FileDispatchServer(ServerBase): def __init__(self, datastore=None, redis=None, redis_persist=None, logger=None): super().__init__('assemblyline.dispatcher.file', logger) config: Config = forge.get_config() datastore: AssemblylineDatastore = datastore or forge.get_datastore( config) self.dispatcher = Dispatcher(redis=redis, redis_persist=redis_persist, datastore=datastore, logger=self.log) if config.core.metrics.apm_server.server_url is not None: self.log.info( f"Exporting application metrics to: {config.core.metrics.apm_server.server_url}" ) elasticapm.instrument() self.apm_client = elasticapm.Client( server_url=config.core.metrics.apm_server.server_url, service_name="dispatcher") else: self.apm_client = None def close(self): if self.apm_client: elasticapm.uninstrument() def try_run(self): queue = self.dispatcher.file_queue cpu_mark = time.process_time() time_mark = time.time() while self.running: try: self.heartbeat() self.dispatcher.counter.increment_execution_time( 'cpu_seconds', time.process_time() - cpu_mark) self.dispatcher.counter.increment_execution_time( 'busy_seconds', time.time() - time_mark) message = queue.pop(timeout=1) cpu_mark = time.process_time() time_mark = time.time() if not message: continue # Start of process dispatcher transaction if self.apm_client: self.apm_client.begin_transaction( 'Process dispatcher message') if 'service_timeout' in message: continue message = FileTask(message) if self.apm_client: elasticapm.tag(sid=message.sid) elasticapm.tag(sha256=message.file_info.sha256) self.dispatcher.dispatch_file(message) # End of process dispatcher transaction (success) if self.apm_client: self.apm_client.end_transaction('file_message', 'success') except Exception as error: self.log.exception(error) # End of process dispatcher transaction (success) if self.apm_client: self.apm_client.end_transaction('file_message', 'exception') def stop(self): self.dispatcher.file_queue.push(None) super().stop()
def test_simple(clean_redis, clean_datastore): ds = clean_datastore redis = clean_redis def service_queue(name): return get_service_queue(name, redis) file = random_model_obj(File) file_hash = file.sha256 file.type = 'unknown' ds.file.save(file_hash, file) sub: Submission = random_model_obj(models.submission.Submission) sub.sid = sid = 'first-submission' sub.params.ignore_cache = False sub.params.max_extracted = 5 sub.params.classification = get_classification().UNRESTRICTED sub.params.initial_data = json.dumps({'cats': 'big'}) sub.files = [dict(sha256=file_hash, name='file')] disp = Dispatcher(ds, redis, redis) disp.running = ToggleTrue() client = DispatchClient(ds, redis, redis) client.dispatcher_data_age = time.time() client.dispatcher_data.append(disp.instance_id) # Submit a problem, and check that it gets added to the dispatch hash # and the right service queues logger.info('==== first dispatch') # task = SubmissionTask(sub.as_primitives(), 'some-completion-queue') client.dispatch_submission(sub) disp.pull_submissions() disp.service_worker(disp.process_queue_index(sid)) task = disp.tasks.get(sid) assert task.queue_keys[(file_hash, 'extract')] is not None assert task.queue_keys[(file_hash, 'wrench')] is not None assert service_queue('extract').length() == 1 assert service_queue('wrench').length() == 1 # Making the same call again will queue it up again logger.info('==== second dispatch') disp.dispatch_file(task, file_hash) assert task.queue_keys[(file_hash, 'extract')] is not None assert task.queue_keys[(file_hash, 'wrench')] is not None assert service_queue('extract').length() == 1 # the queue doesn't pile up assert service_queue('wrench').length() == 1 logger.info('==== third dispatch') job = client.request_work('0', 'extract', '0') assert job.temporary_submission_data == [{'name': 'cats', 'value': 'big'}] client.service_failed(sid, 'abc123', make_error(file_hash, 'extract')) # Deliberately do in the wrong order to make sure that works disp.pull_service_results() disp.service_worker(disp.process_queue_index(sid)) assert task.queue_keys[(file_hash, 'extract')] is not None assert task.queue_keys[(file_hash, 'wrench')] is not None assert service_queue('extract').length() == 1 # Mark extract as finished, wrench as failed logger.info('==== fourth dispatch') client.request_work('0', 'extract', '0') client.request_work('0', 'wrench', '0') client.service_finished(sid, 'extract-result', make_result(file_hash, 'extract')) client.service_failed(sid, 'wrench-error', make_error(file_hash, 'wrench', False)) for _ in range(2): disp.pull_service_results() disp.service_worker(disp.process_queue_index(sid)) assert wait_error(task, file_hash, 'wrench') assert wait_result(task, file_hash, 'extract') assert service_queue('av-a').length() == 1 assert service_queue('av-b').length() == 1 assert service_queue('frankenstrings').length() == 1 # Have the AVs fail, frankenstrings finishes logger.info('==== fifth dispatch') client.request_work('0', 'av-a', '0') client.request_work('0', 'av-b', '0') client.request_work('0', 'frankenstrings', '0') client.service_failed(sid, 'av-a-error', make_error(file_hash, 'av-a', False)) client.service_failed(sid, 'av-b-error', make_error(file_hash, 'av-b', False)) client.service_finished(sid, 'f-result', make_result(file_hash, 'frankenstrings')) for _ in range(3): disp.pull_service_results() disp.service_worker(disp.process_queue_index(sid)) assert wait_result(task, file_hash, 'frankenstrings') assert wait_error(task, file_hash, 'av-a') assert wait_error(task, file_hash, 'av-b') assert service_queue('xerox').length() == 1 # Finish the xerox service and check if the submission completion got checked logger.info('==== sixth dispatch') client.request_work('0', 'xerox', '0') client.service_finished(sid, 'xerox-result-key', make_result(file_hash, 'xerox')) disp.pull_service_results() disp.service_worker(disp.process_queue_index(sid)) disp.save_submission() assert wait_result(task, file_hash, 'xerox') assert disp.tasks.get(sid) is None
def test_dispatch_file(clean_redis): service_queue = lambda name: get_service_queue(name, clean_redis) ds = MockDatastore(collections=[ 'submission', 'result', 'service', 'error', 'file', 'filescore' ]) file_hash = get_random_hash(64) sub = random_model_obj(models.submission.Submission) sub.sid = sid = 'first-submission' sub.params.ignore_cache = False disp = Dispatcher(ds, clean_redis, clean_redis, logging) disp.active_submissions.add( sid, SubmissionTask(dict(submission=sub)).as_primitives()) dh = DispatchHash(sid=sid, client=clean_redis) print('==== first dispatch') # Submit a problem, and check that it gets added to the dispatch hash # and the right service queues file_task = FileTask({ 'sid': 'first-submission', 'min_classification': get_classification().UNRESTRICTED, 'file_info': dict(sha256=file_hash, type='unknown', magic='a', md5=get_random_hash(32), mime='a', sha1=get_random_hash(40), size=10), 'depth': 0, 'max_files': 5 }) disp.dispatch_file(file_task) assert dh.dispatch_time(file_hash, 'extract') > 0 assert dh.dispatch_time(file_hash, 'wrench') > 0 assert service_queue('extract').length() == 1 assert service_queue('wrench').length() == 1 # Making the same call again will queue it up again print('==== second dispatch') disp.dispatch_file(file_task) assert dh.dispatch_time(file_hash, 'extract') > 0 assert dh.dispatch_time(file_hash, 'wrench') > 0 assert service_queue('extract').length() == 2 assert service_queue('wrench').length() == 2 # assert len(mq) == 4 # Push back the timestamp in the dispatch hash to simulate a timeout, # make sure it gets pushed into that service queue again print('==== third dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.fail_recoverable(file_hash, 'extract') disp.dispatch_file(file_task) assert dh.dispatch_time(file_hash, 'extract') > 0 assert dh.dispatch_time(file_hash, 'wrench') > 0 assert service_queue('extract').length() == 1 # assert len(mq) == 1 # Mark extract as finished, wrench as failed print('==== fourth dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.finish(file_hash, 'extract', 'result-key', 0, 'U') dh.fail_nonrecoverable(file_hash, 'wrench', 'error-key') disp.dispatch_file(file_task) assert dh.finished(file_hash, 'extract') assert dh.finished(file_hash, 'wrench') assert service_queue('av-a').length() == 1 assert service_queue('av-b').length() == 1 assert service_queue('frankenstrings').length() == 1 # Have the AVs fail, frankenstrings finishes print('==== fifth dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.fail_nonrecoverable(file_hash, 'av-a', 'error-a') dh.fail_nonrecoverable(file_hash, 'av-b', 'error-b') dh.finish(file_hash, 'frankenstrings', 'result-key', 0, 'U') disp.dispatch_file(file_task) assert dh.finished(file_hash, 'av-a') assert dh.finished(file_hash, 'av-b') assert dh.finished(file_hash, 'frankenstrings') assert service_queue('xerox').length() == 1 # Finish the xerox service and check if the submission completion got checked print('==== sixth dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.finish(file_hash, 'xerox', 'result-key', 0, 'U') disp.dispatch_file(file_task) assert dh.finished(file_hash, 'xerox') assert len(disp.submission_queue) == 1