def __init__(self, datastore=None, redis=None, redis_persist=None, logger=None): super().__init__('assemblyline.dispatcher.file', logger) config: Config = forge.get_config() datastore: AssemblylineDatastore = datastore or forge.get_datastore( config) self.dispatcher = Dispatcher(redis=redis, redis_persist=redis_persist, datastore=datastore, logger=self.log) if config.core.metrics.apm_server.server_url is not None: self.log.info( f"Exporting application metrics to: {config.core.metrics.apm_server.server_url}" ) elasticapm.instrument() self.apm_client = elasticapm.Client( server_url=config.core.metrics.apm_server.server_url, service_name="dispatcher") else: self.apm_client = None
def is_dispatcher(self, dispatcher_id) -> bool: if dispatcher_id in self.dead_dispatchers: return False if time.time() - self.dispatcher_data_age > 120 or dispatcher_id not in self.dispatcher_data: self.dispatcher_data = Dispatcher.all_instances(self.redis_persist) self.dispatcher_data_age = time.time() if dispatcher_id in self.dispatcher_data: return True else: self.dead_dispatchers.append(dispatcher_id) return False
def test_dispatch_submission(clean_redis): ds = MockDatastore( collections=['submission', 'result', 'service', 'error', 'file']) file_hash = get_random_hash(64) ds.file.save(file_hash, random_model_obj(models.file.File)) ds.file.get(file_hash).sha256 = file_hash # ds.file.get(file_hash).sha256 = '' submission = random_model_obj(models.submission.Submission) submission.files.clear() submission.files.append(dict(name='./file', sha256=file_hash)) submission.sid = 'first-submission' disp = Dispatcher(ds, logger=logging, redis=clean_redis, redis_persist=clean_redis) # Submit a problem, and check that it gets added to the dispatch hash # and the right service queues task = SubmissionTask(dict(submission=submission)) disp.dispatch_submission(task) file_task = FileTask(disp.file_queue.pop()) assert file_task.sid == submission.sid assert file_task.file_info.sha256 == file_hash assert file_task.depth == 0 assert file_task.file_info.type == ds.file.get(file_hash).type dh = DispatchHash(submission.sid, clean_redis) for service_name in disp.scheduler.services.keys(): dh.fail_nonrecoverable(file_hash, service_name, 'error-code') disp.dispatch_submission(task) assert ds.submission.get(submission.sid).state == 'completed' assert ds.submission.get( submission.sid).errors == ['error-code'] * len(disp.scheduler.services)
def send_heartbeat(self, m_type, m_name, m_data, instances): if m_type == "dispatcher": try: instances = sorted(Dispatcher.all_instances( self.redis_persist)) inflight = { _i: Dispatcher.instance_assignment_size( self.redis_persist, _i) for _i in instances } queues = { _i: Dispatcher.all_queue_lengths(self.redis, _i) for _i in instances } msg = { "sender": self.sender, "msg": { "inflight": { "max": self.config.core.dispatcher.max_inflight, "outstanding": self.dispatch_active_hash.length(), "per_instance": [inflight[_i] for _i in instances] }, "instances": len(instances), "metrics": m_data, "queues": { "ingest": self.dispatcher_submission_queue.length(), "start": [queues[_i]['start'] for _i in instances], "result": [queues[_i]['result'] for _i in instances], "command": [queues[_i]['command'] for _i in instances] }, "component": m_name, } } self.status_queue.publish( DispatcherMessage(msg).as_primitives()) self.log.info(f"Sent dispatcher heartbeat: {msg['msg']}") except Exception: self.log.exception( "An exception occurred while generating DispatcherMessage") elif m_type == "ingester": try: c_q_len = self.ingest_unique_queue.count(*self.c_rng) h_q_len = self.ingest_unique_queue.count(*self.h_rng) m_q_len = self.ingest_unique_queue.count(*self.m_rng) l_q_len = self.ingest_unique_queue.count(*self.l_rng) msg = { "sender": self.sender, "msg": { "instances": instances, "metrics": m_data, "processing": { "inflight": self.ingest_scanning.length() }, "processing_chance": { "critical": 1 - drop_chance(c_q_len, self.c_s_at), "high": 1 - drop_chance(h_q_len, self.h_s_at), "low": 1 - drop_chance(l_q_len, self.l_s_at), "medium": 1 - drop_chance(m_q_len, self.m_s_at) }, "queues": { "critical": c_q_len, "high": h_q_len, "ingest": self.ingest_queue.length(), "complete": self.ingest_complete_queue.length(), "low": l_q_len, "medium": m_q_len } } } self.status_queue.publish(IngestMessage(msg).as_primitives()) self.log.info(f"Sent ingester heartbeat: {msg['msg']}") except Exception: self.log.exception( "An exception occurred while generating IngestMessage") elif m_type == "alerter": try: msg = { "sender": self.sender, "msg": { "instances": instances, "metrics": m_data, "queues": { "alert": self.alert_queue.length() } } } self.status_queue.publish(AlerterMessage(msg).as_primitives()) self.log.info(f"Sent alerter heartbeat: {msg['msg']}") except Exception: self.log.exception( "An exception occurred while generating AlerterMessage") elif m_type == "expiry": try: msg = { "sender": self.sender, "msg": { "instances": instances, "metrics": m_data, "queues": self.to_expire } } self.status_queue.publish(ExpiryMessage(msg).as_primitives()) self.log.info(f"Sent expiry heartbeat: {msg['msg']}") except Exception: self.log.exception( "An exception occurred while generating ExpiryMessage") elif m_type == "archive": try: msg = { "sender": self.sender, "msg": { "instances": instances, "metrics": m_data } } self.status_queue.publish(ArchiveMessage(msg).as_primitives()) self.log.info(f"Sent archive heartbeat: {msg['msg']}") except Exception: self.log.exception( "An exception occurred while generating ArchiveMessage") elif m_type == "scaler": try: msg = { "sender": self.sender, "msg": { "instances": instances, "metrics": m_data, } } self.status_queue.publish(ScalerMessage(msg).as_primitives()) self.log.info(f"Sent scaler heartbeat: {msg['msg']}") except Exception: self.log.exception( "An exception occurred while generating WatcherMessage") elif m_type == "scaler_status": try: msg = { "sender": self.sender, "msg": { "service_name": m_name, "metrics": m_data, } } self.status_queue.publish( ScalerStatusMessage(msg).as_primitives()) self.log.info(f"Sent scaler status heartbeat: {msg['msg']}") except Exception: self.log.exception( "An exception occurred while generating WatcherMessage") elif m_type == "service": try: busy, idle = get_working_and_idle(self.redis, m_name) msg = { "sender": self.sender, "msg": { "instances": len(busy) + len(idle), "metrics": m_data, "activity": { 'busy': len(busy), 'idle': len(idle) }, "queue": get_service_queue(m_name, self.redis).length(), "service_name": m_name } } self.status_queue.publish(ServiceMessage(msg).as_primitives()) self.log.info(f"Sent service heartbeat: {msg['msg']}") except Exception: self.log.exception( "An exception occurred while generating ServiceMessage") else: self.log.warning( f"Skipping unknown counter: {m_name} [{m_type}] ==> {m_data}")
def test_simple(clean_redis, clean_datastore): ds = clean_datastore redis = clean_redis def service_queue(name): return get_service_queue(name, redis) file = random_model_obj(File) file_hash = file.sha256 file.type = 'unknown' ds.file.save(file_hash, file) sub: Submission = random_model_obj(models.submission.Submission) sub.sid = sid = 'first-submission' sub.params.ignore_cache = False sub.params.max_extracted = 5 sub.params.classification = get_classification().UNRESTRICTED sub.params.initial_data = json.dumps({'cats': 'big'}) sub.files = [dict(sha256=file_hash, name='file')] disp = Dispatcher(ds, redis, redis) disp.running = ToggleTrue() client = DispatchClient(ds, redis, redis) client.dispatcher_data_age = time.time() client.dispatcher_data.append(disp.instance_id) # Submit a problem, and check that it gets added to the dispatch hash # and the right service queues logger.info('==== first dispatch') # task = SubmissionTask(sub.as_primitives(), 'some-completion-queue') client.dispatch_submission(sub) disp.pull_submissions() disp.service_worker(disp.process_queue_index(sid)) task = disp.tasks.get(sid) assert task.queue_keys[(file_hash, 'extract')] is not None assert task.queue_keys[(file_hash, 'wrench')] is not None assert service_queue('extract').length() == 1 assert service_queue('wrench').length() == 1 # Making the same call again will queue it up again logger.info('==== second dispatch') disp.dispatch_file(task, file_hash) assert task.queue_keys[(file_hash, 'extract')] is not None assert task.queue_keys[(file_hash, 'wrench')] is not None assert service_queue('extract').length() == 1 # the queue doesn't pile up assert service_queue('wrench').length() == 1 logger.info('==== third dispatch') job = client.request_work('0', 'extract', '0') assert job.temporary_submission_data == [{'name': 'cats', 'value': 'big'}] client.service_failed(sid, 'abc123', make_error(file_hash, 'extract')) # Deliberately do in the wrong order to make sure that works disp.pull_service_results() disp.service_worker(disp.process_queue_index(sid)) assert task.queue_keys[(file_hash, 'extract')] is not None assert task.queue_keys[(file_hash, 'wrench')] is not None assert service_queue('extract').length() == 1 # Mark extract as finished, wrench as failed logger.info('==== fourth dispatch') client.request_work('0', 'extract', '0') client.request_work('0', 'wrench', '0') client.service_finished(sid, 'extract-result', make_result(file_hash, 'extract')) client.service_failed(sid, 'wrench-error', make_error(file_hash, 'wrench', False)) for _ in range(2): disp.pull_service_results() disp.service_worker(disp.process_queue_index(sid)) assert wait_error(task, file_hash, 'wrench') assert wait_result(task, file_hash, 'extract') assert service_queue('av-a').length() == 1 assert service_queue('av-b').length() == 1 assert service_queue('frankenstrings').length() == 1 # Have the AVs fail, frankenstrings finishes logger.info('==== fifth dispatch') client.request_work('0', 'av-a', '0') client.request_work('0', 'av-b', '0') client.request_work('0', 'frankenstrings', '0') client.service_failed(sid, 'av-a-error', make_error(file_hash, 'av-a', False)) client.service_failed(sid, 'av-b-error', make_error(file_hash, 'av-b', False)) client.service_finished(sid, 'f-result', make_result(file_hash, 'frankenstrings')) for _ in range(3): disp.pull_service_results() disp.service_worker(disp.process_queue_index(sid)) assert wait_result(task, file_hash, 'frankenstrings') assert wait_error(task, file_hash, 'av-a') assert wait_error(task, file_hash, 'av-b') assert service_queue('xerox').length() == 1 # Finish the xerox service and check if the submission completion got checked logger.info('==== sixth dispatch') client.request_work('0', 'xerox', '0') client.service_finished(sid, 'xerox-result-key', make_result(file_hash, 'xerox')) disp.pull_service_results() disp.service_worker(disp.process_queue_index(sid)) disp.save_submission() assert wait_result(task, file_hash, 'xerox') assert disp.tasks.get(sid) is None
def test_dispatch_extracted(clean_redis, clean_datastore): redis = clean_redis ds = clean_datastore # def service_queue(name): return get_service_queue(name, redis) # Setup the fake datastore file_hash = get_random_hash(64) second_file_hash = get_random_hash(64) for fh in [file_hash, second_file_hash]: obj = random_model_obj(models.file.File) obj.sha256 = fh ds.file.save(fh, obj) # Inject the fake submission submission = random_model_obj(models.submission.Submission) submission.files = [dict(name='./file', sha256=file_hash)] sid = submission.sid = 'first-submission' disp = Dispatcher(ds, redis, redis) disp.running = ToggleTrue() client = DispatchClient(ds, redis, redis) client.dispatcher_data_age = time.time() client.dispatcher_data.append(disp.instance_id) # Launch the submission client.dispatch_submission(submission) disp.pull_submissions() disp.service_worker(disp.process_queue_index(sid)) # Finish one service extracting a file job = client.request_work('0', 'extract', '0') assert job.fileinfo.sha256 == file_hash assert job.filename == './file' new_result: Result = random_minimal_obj(Result) new_result.sha256 = file_hash new_result.response.service_name = 'extract' new_result.response.extracted = [ dict(sha256=second_file_hash, name='second-*', description='abc', classification='U') ] client.service_finished(sid, 'extracted-done', new_result) # process the result disp.pull_service_results() disp.service_worker(disp.process_queue_index(sid)) disp.service_worker(disp.process_queue_index(sid)) # job = client.request_work('0', 'extract', '0') assert job.fileinfo.sha256 == second_file_hash assert job.filename == 'second-*'
from assemblyline_core.dispatching.dispatcher import Dispatcher with Dispatcher() as server: server.serve_forever()
class SubmissionDispatchServer(ServerBase): def __init__(self, datastore=None, redis=None, redis_persist=None, logger=None): super().__init__('assemblyline.dispatcher.submissions', logger) config = forge.get_config() datastore = datastore or forge.get_datastore(config) self.dispatcher = Dispatcher(logger=self.log, redis=redis, redis_persist=redis_persist, datastore=datastore) if config.core.metrics.apm_server.server_url is not None: self.log.info( f"Exporting application metrics to: {config.core.metrics.apm_server.server_url}" ) elasticapm.instrument() self.apm_client = elasticapm.Client( server_url=config.core.metrics.apm_server.server_url, service_name="dispatcher") else: self.apm_client = None def close(self): if self.apm_client: elasticapm.uninstrument() def try_run(self): queue = self.dispatcher.submission_queue cpu_mark = time.process_time() time_mark = time.time() while self.running: try: self.heartbeat() self.dispatcher.counter.increment_execution_time( 'cpu_seconds', time.process_time() - cpu_mark) self.dispatcher.counter.increment_execution_time( 'busy_seconds', time.time() - time_mark) message = queue.pop(timeout=1) cpu_mark = time.process_time() time_mark = time.time() if not message: continue # Start of process dispatcher transaction if self.apm_client: self.apm_client.begin_transaction( 'Process dispatcher message') # This is probably a complete task if 'submission' in message: task = SubmissionTask(message) if self.apm_client: elasticapm.tag(sid=task.submission.sid) # This is just as sid nudge, this submission should already be running elif 'sid' in message: active_task = self.dispatcher.active_submissions.get( message['sid']) if self.apm_client: elasticapm.tag(sid=message['sid']) if active_task is None: self.log.warning( f"[{message['sid']}] Dispatcher was nudged for inactive submission." ) # End of process dispatcher transaction (success) if self.apm_client: self.apm_client.end_transaction( 'submission_message', 'inactive') continue task = SubmissionTask(active_task) else: self.log.error( f'Corrupted submission message in dispatcher {message}' ) # End of process dispatcher transaction (success) if self.apm_client: self.apm_client.end_transaction( 'submission_message', 'corrupted') continue self.dispatcher.dispatch_submission(task) # End of process dispatcher transaction (success) if self.apm_client: self.apm_client.end_transaction('submission_message', 'success') except Exception as error: self.log.exception(error) # End of process dispatcher transaction (success) if self.apm_client: self.apm_client.end_transaction('submission_message', 'exception') def stop(self): self.dispatcher.submission_queue.push(None) super().stop()
class FileDispatchServer(ServerBase): def __init__(self, datastore=None, redis=None, redis_persist=None, logger=None): super().__init__('assemblyline.dispatcher.file', logger) config: Config = forge.get_config() datastore: AssemblylineDatastore = datastore or forge.get_datastore( config) self.dispatcher = Dispatcher(redis=redis, redis_persist=redis_persist, datastore=datastore, logger=self.log) if config.core.metrics.apm_server.server_url is not None: self.log.info( f"Exporting application metrics to: {config.core.metrics.apm_server.server_url}" ) elasticapm.instrument() self.apm_client = elasticapm.Client( server_url=config.core.metrics.apm_server.server_url, service_name="dispatcher") else: self.apm_client = None def close(self): if self.apm_client: elasticapm.uninstrument() def try_run(self): queue = self.dispatcher.file_queue cpu_mark = time.process_time() time_mark = time.time() while self.running: try: self.heartbeat() self.dispatcher.counter.increment_execution_time( 'cpu_seconds', time.process_time() - cpu_mark) self.dispatcher.counter.increment_execution_time( 'busy_seconds', time.time() - time_mark) message = queue.pop(timeout=1) cpu_mark = time.process_time() time_mark = time.time() if not message: continue # Start of process dispatcher transaction if self.apm_client: self.apm_client.begin_transaction( 'Process dispatcher message') if 'service_timeout' in message: continue message = FileTask(message) if self.apm_client: elasticapm.tag(sid=message.sid) elasticapm.tag(sha256=message.file_info.sha256) self.dispatcher.dispatch_file(message) # End of process dispatcher transaction (success) if self.apm_client: self.apm_client.end_transaction('file_message', 'success') except Exception as error: self.log.exception(error) # End of process dispatcher transaction (success) if self.apm_client: self.apm_client.end_transaction('file_message', 'exception') def stop(self): self.dispatcher.file_queue.push(None) super().stop()
def test_dispatch_file(clean_redis): service_queue = lambda name: get_service_queue(name, clean_redis) ds = MockDatastore(collections=[ 'submission', 'result', 'service', 'error', 'file', 'filescore' ]) file_hash = get_random_hash(64) sub = random_model_obj(models.submission.Submission) sub.sid = sid = 'first-submission' sub.params.ignore_cache = False disp = Dispatcher(ds, clean_redis, clean_redis, logging) disp.active_submissions.add( sid, SubmissionTask(dict(submission=sub)).as_primitives()) dh = DispatchHash(sid=sid, client=clean_redis) print('==== first dispatch') # Submit a problem, and check that it gets added to the dispatch hash # and the right service queues file_task = FileTask({ 'sid': 'first-submission', 'min_classification': get_classification().UNRESTRICTED, 'file_info': dict(sha256=file_hash, type='unknown', magic='a', md5=get_random_hash(32), mime='a', sha1=get_random_hash(40), size=10), 'depth': 0, 'max_files': 5 }) disp.dispatch_file(file_task) assert dh.dispatch_time(file_hash, 'extract') > 0 assert dh.dispatch_time(file_hash, 'wrench') > 0 assert service_queue('extract').length() == 1 assert service_queue('wrench').length() == 1 # Making the same call again will queue it up again print('==== second dispatch') disp.dispatch_file(file_task) assert dh.dispatch_time(file_hash, 'extract') > 0 assert dh.dispatch_time(file_hash, 'wrench') > 0 assert service_queue('extract').length() == 2 assert service_queue('wrench').length() == 2 # assert len(mq) == 4 # Push back the timestamp in the dispatch hash to simulate a timeout, # make sure it gets pushed into that service queue again print('==== third dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.fail_recoverable(file_hash, 'extract') disp.dispatch_file(file_task) assert dh.dispatch_time(file_hash, 'extract') > 0 assert dh.dispatch_time(file_hash, 'wrench') > 0 assert service_queue('extract').length() == 1 # assert len(mq) == 1 # Mark extract as finished, wrench as failed print('==== fourth dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.finish(file_hash, 'extract', 'result-key', 0, 'U') dh.fail_nonrecoverable(file_hash, 'wrench', 'error-key') disp.dispatch_file(file_task) assert dh.finished(file_hash, 'extract') assert dh.finished(file_hash, 'wrench') assert service_queue('av-a').length() == 1 assert service_queue('av-b').length() == 1 assert service_queue('frankenstrings').length() == 1 # Have the AVs fail, frankenstrings finishes print('==== fifth dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.fail_nonrecoverable(file_hash, 'av-a', 'error-a') dh.fail_nonrecoverable(file_hash, 'av-b', 'error-b') dh.finish(file_hash, 'frankenstrings', 'result-key', 0, 'U') disp.dispatch_file(file_task) assert dh.finished(file_hash, 'av-a') assert dh.finished(file_hash, 'av-b') assert dh.finished(file_hash, 'frankenstrings') assert service_queue('xerox').length() == 1 # Finish the xerox service and check if the submission completion got checked print('==== sixth dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.finish(file_hash, 'xerox', 'result-key', 0, 'U') disp.dispatch_file(file_task) assert dh.finished(file_hash, 'xerox') assert len(disp.submission_queue) == 1
def test_dispatch_extracted(clean_redis): # Setup the fake datastore ds = MockDatastore( collections=['submission', 'result', 'service', 'error', 'file']) file_hash = get_random_hash(64) second_file_hash = get_random_hash(64) for fh in [file_hash, second_file_hash]: ds.file.save(fh, random_model_obj(models.file.File)) ds.file.get(fh).sha256 = fh # Inject the fake submission submission = random_model_obj(models.submission.Submission) submission.files.clear() submission.files.append(dict(name='./file', sha256=file_hash)) submission.sid = 'first-submission' # Launch the dispatcher disp = Dispatcher(ds, logger=logging, redis=clean_redis, redis_persist=clean_redis) # Launch the submission task = SubmissionTask(dict(submission=submission)) disp.dispatch_submission(task) # Check that the right values were sent to the file_task = FileTask(disp.file_queue.pop(timeout=1)) assert file_task.sid == submission.sid assert file_task.file_info.sha256 == file_hash assert file_task.depth == 0 assert file_task.file_info.type == ds.file.get(file_hash).type # Finish the services dh = DispatchHash(submission.sid, clean_redis) for service_name in disp.scheduler.services.keys(): dh.finish(file_hash, service_name, 'error-code', 0, 'U') # But one of the services extracted a file dh.add_file(second_file_hash, 10, file_hash) # But meanwhile, dispatch_submission has been recalled on the submission disp.dispatch_submission(task) # It should see the missing file, and we should get a new file dispatch message for it # to make sure it is getting processed properly, this should be at depth 1, the first layer of # extracted files file_task = disp.file_queue.pop(timeout=1) assert file_task is not None file_task = FileTask(file_task) assert file_task.sid == submission.sid assert file_task.file_info.sha256 == second_file_hash assert file_task.depth == 1 assert file_task.file_info.type == ds.file.get(second_file_hash).type # Finish the second file for service_name in disp.scheduler.services.keys(): dh.finish(second_file_hash, service_name, 'error-code', 0, 'U') # And now we should get the finished submission disp.dispatch_submission(task) submission = ds.submission.get(submission.sid) assert submission.state == 'completed' assert submission.errors == [] assert len(submission.results) == 2 * len(disp.scheduler.services)
def core(request, redis, filestore, config, clean_datastore: AssemblylineDatastore): # Block logs from being initialized, it breaks under pytest if you create new stream handlers from assemblyline.common import log as al_log al_log.init_logging = lambda *args: None dispatcher.TIMEOUT_EXTRA_TIME = 1 dispatcher.TIMEOUT_TEST_INTERVAL = 3 # al_log.init_logging("simulation") ds = clean_datastore # Register services stages = get_service_stage_hash(redis) services = [] for svc, stage in [('pre', 'EXTRACT'), ('core-a', 'CORE'), ('core-b', 'CORE'), ('finish', 'POST')]: ds.service.save(f'{svc}_0', dummy_service(svc, stage, docid=f'{svc}_0')) ds.service_delta.save( svc, ServiceDelta({ 'name': svc, 'version': '0', 'enabled': True })) stages.set(svc, ServiceStage.Running) services.append(MockService(svc, ds, redis, filestore)) ds.service.commit() ds.service_delta.commit() listed_services = ds.list_all_services(full=True) assert len(listed_services) == 4 ingester = Ingester(datastore=ds, redis=redis, persistent_redis=redis, config=config) fields = CoreSession(config, ingester) fields.redis = redis fields.ds = ds fields.config = config forge.config_cache[None] = fields.config threads = [] fields.filestore = filestore fields.pre_service = services[0] threads: list[ServerBase] = [ # Start the ingester components ingester, # Start the dispatcher Dispatcher(datastore=ds, redis=redis, redis_persist=redis, config=config), # Start plumber Plumber(datastore=ds, redis=redis, redis_persist=redis, delay=0.5, config=config), ] threads = threads + services for t in threads: t.daemon = True t.start() def stop_core(): [tr.stop() for tr in threads] [tr.raising_join() for tr in threads] request.addfinalizer(stop_core) return fields