def test_submit_binary(datastore, login_session): _, session, host = login_session sq.delete() byte_str = get_random_phrase(wmin=30, wmax=75).encode() fd, temp_path = tempfile.mkstemp() try: with os.fdopen(fd, 'wb') as fh: fh.write(byte_str) with open(temp_path, 'rb') as fh: sha256 = hashlib.sha256(byte_str).hexdigest() json_data = { 'name': 'text.txt', 'metadata': {'test': 'test_submit_binary'} } data = {'json': json.dumps(json_data)} resp = get_api_data(session, f"{host}/api/v4/submit/", method="POST", data=data, files={'bin': fh}, headers={}) assert isinstance(resp['sid'], str) for f in resp['files']: assert f['sha256'] == sha256 assert f['name'] == json_data['name'] msg = SubmissionTask(sq.pop(blocking=False)) assert msg.submission.sid == resp['sid'] finally: # noinspection PyBroadException try: os.unlink(temp_path) except Exception: pass
def test_resubmit(datastore, login_session): _, session, host = login_session sq.delete() submission_files = [f.sha256 for f in submission.files] resp = get_api_data(session, f"{host}/api/v4/submit/resubmit/{submission.sid}/") assert resp['params']['description'].startswith('Resubmit') assert resp['sid'] != submission.sid for f in resp['files']: assert f['sha256'] in submission_files msg = SubmissionTask(sq.pop(blocking=False)) assert msg.submission.sid == resp['sid']
def test_resubmit_dynamic(datastore, login_session): _, session, host = login_session sq.delete() sha256 = random.choice(submission.results)[:64] resp = get_api_data(session, f"{host}/api/v4/submit/dynamic/{sha256}/") assert resp['params']['description'].startswith('Resubmit') assert resp['params']['description'].endswith('Dynamic Analysis') assert resp['sid'] != submission.sid for f in resp['files']: assert f['sha256'] == sha256 assert 'Dynamic Analysis' in resp['params']['services']['selected'] msg = SubmissionTask(sq.pop(blocking=False)) assert msg.submission.sid == resp['sid']
def test_submit_url(datastore, login_session): _, session, host = login_session sq.delete() data = { 'url': 'https://www.cyber.gc.ca/en/theme-gcwu-fegc/assets/wmms.svg', 'name': 'wmms.svg', 'metadata': {'test': 'test_submit_url'} } resp = get_api_data(session, f"{host}/api/v4/submit/", method="POST", data=json.dumps(data)) assert isinstance(resp['sid'], str) for f in resp['files']: assert f['name'] == data['name'] msg = SubmissionTask(sq.pop(blocking=False)) assert msg.submission.sid == resp['sid']
def test_submit_hash(datastore, login_session): _, session, host = login_session sq.delete() data = { 'sha256': random.choice(submission.results)[:64], 'name': 'random_hash.txt', 'metadata': {'test': 'test_submit_hash'} } resp = get_api_data(session, f"{host}/api/v4/submit/", method="POST", data=json.dumps(data)) assert isinstance(resp['sid'], str) for f in resp['files']: assert f['sha256'] == data['sha256'] assert f['name'] == data['name'] msg = SubmissionTask(sq.pop(blocking=False)) assert msg.submission.sid == resp['sid']
def dispatch_submission(self, submission: Submission, completed_queue: str = None): """Insert a submission into the dispatching system. Note: You probably actually want to use the SubmissionTool Prerequsits: - submission should already be saved in the datastore - files should already be in the datastore and filestore """ self.submission_queue.push( SubmissionTask( dict( submission=submission, completed_queue=completed_queue, )).as_primitives())
def test_dispatch_submission(clean_redis): ds = MockDatastore( collections=['submission', 'result', 'service', 'error', 'file']) file_hash = get_random_hash(64) ds.file.save(file_hash, random_model_obj(models.file.File)) ds.file.get(file_hash).sha256 = file_hash # ds.file.get(file_hash).sha256 = '' submission = random_model_obj(models.submission.Submission) submission.files.clear() submission.files.append(dict(name='./file', sha256=file_hash)) submission.sid = 'first-submission' disp = Dispatcher(ds, logger=logging, redis=clean_redis, redis_persist=clean_redis) # Submit a problem, and check that it gets added to the dispatch hash # and the right service queues task = SubmissionTask(dict(submission=submission)) disp.dispatch_submission(task) file_task = FileTask(disp.file_queue.pop()) assert file_task.sid == submission.sid assert file_task.file_info.sha256 == file_hash assert file_task.depth == 0 assert file_task.file_info.type == ds.file.get(file_hash).type dh = DispatchHash(submission.sid, clean_redis) for service_name in disp.scheduler.services.keys(): dh.fail_nonrecoverable(file_hash, service_name, 'error-code') disp.dispatch_submission(task) assert ds.submission.get(submission.sid).state == 'completed' assert ds.submission.get( submission.sid).errors == ['error-code'] * len(disp.scheduler.services)
def try_run(self): queue = self.dispatcher.submission_queue cpu_mark = time.process_time() time_mark = time.time() while self.running: try: self.heartbeat() self.dispatcher.counter.increment_execution_time( 'cpu_seconds', time.process_time() - cpu_mark) self.dispatcher.counter.increment_execution_time( 'busy_seconds', time.time() - time_mark) message = queue.pop(timeout=1) cpu_mark = time.process_time() time_mark = time.time() if not message: continue # Start of process dispatcher transaction if self.apm_client: self.apm_client.begin_transaction( 'Process dispatcher message') # This is probably a complete task if 'submission' in message: task = SubmissionTask(message) if self.apm_client: elasticapm.tag(sid=task.submission.sid) # This is just as sid nudge, this submission should already be running elif 'sid' in message: active_task = self.dispatcher.active_submissions.get( message['sid']) if self.apm_client: elasticapm.tag(sid=message['sid']) if active_task is None: self.log.warning( f"[{message['sid']}] Dispatcher was nudged for inactive submission." ) # End of process dispatcher transaction (success) if self.apm_client: self.apm_client.end_transaction( 'submission_message', 'inactive') continue task = SubmissionTask(active_task) else: self.log.error( f'Corrupted submission message in dispatcher {message}' ) # End of process dispatcher transaction (success) if self.apm_client: self.apm_client.end_transaction( 'submission_message', 'corrupted') continue self.dispatcher.dispatch_submission(task) # End of process dispatcher transaction (success) if self.apm_client: self.apm_client.end_transaction('submission_message', 'success') except Exception as error: self.log.exception(error) # End of process dispatcher transaction (success) if self.apm_client: self.apm_client.end_transaction('submission_message', 'exception')
def test_dispatch_file(clean_redis): service_queue = lambda name: get_service_queue(name, clean_redis) ds = MockDatastore(collections=[ 'submission', 'result', 'service', 'error', 'file', 'filescore' ]) file_hash = get_random_hash(64) sub = random_model_obj(models.submission.Submission) sub.sid = sid = 'first-submission' sub.params.ignore_cache = False disp = Dispatcher(ds, clean_redis, clean_redis, logging) disp.active_submissions.add( sid, SubmissionTask(dict(submission=sub)).as_primitives()) dh = DispatchHash(sid=sid, client=clean_redis) print('==== first dispatch') # Submit a problem, and check that it gets added to the dispatch hash # and the right service queues file_task = FileTask({ 'sid': 'first-submission', 'min_classification': get_classification().UNRESTRICTED, 'file_info': dict(sha256=file_hash, type='unknown', magic='a', md5=get_random_hash(32), mime='a', sha1=get_random_hash(40), size=10), 'depth': 0, 'max_files': 5 }) disp.dispatch_file(file_task) assert dh.dispatch_time(file_hash, 'extract') > 0 assert dh.dispatch_time(file_hash, 'wrench') > 0 assert service_queue('extract').length() == 1 assert service_queue('wrench').length() == 1 # Making the same call again will queue it up again print('==== second dispatch') disp.dispatch_file(file_task) assert dh.dispatch_time(file_hash, 'extract') > 0 assert dh.dispatch_time(file_hash, 'wrench') > 0 assert service_queue('extract').length() == 2 assert service_queue('wrench').length() == 2 # assert len(mq) == 4 # Push back the timestamp in the dispatch hash to simulate a timeout, # make sure it gets pushed into that service queue again print('==== third dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.fail_recoverable(file_hash, 'extract') disp.dispatch_file(file_task) assert dh.dispatch_time(file_hash, 'extract') > 0 assert dh.dispatch_time(file_hash, 'wrench') > 0 assert service_queue('extract').length() == 1 # assert len(mq) == 1 # Mark extract as finished, wrench as failed print('==== fourth dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.finish(file_hash, 'extract', 'result-key', 0, 'U') dh.fail_nonrecoverable(file_hash, 'wrench', 'error-key') disp.dispatch_file(file_task) assert dh.finished(file_hash, 'extract') assert dh.finished(file_hash, 'wrench') assert service_queue('av-a').length() == 1 assert service_queue('av-b').length() == 1 assert service_queue('frankenstrings').length() == 1 # Have the AVs fail, frankenstrings finishes print('==== fifth dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.fail_nonrecoverable(file_hash, 'av-a', 'error-a') dh.fail_nonrecoverable(file_hash, 'av-b', 'error-b') dh.finish(file_hash, 'frankenstrings', 'result-key', 0, 'U') disp.dispatch_file(file_task) assert dh.finished(file_hash, 'av-a') assert dh.finished(file_hash, 'av-b') assert dh.finished(file_hash, 'frankenstrings') assert service_queue('xerox').length() == 1 # Finish the xerox service and check if the submission completion got checked print('==== sixth dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.finish(file_hash, 'xerox', 'result-key', 0, 'U') disp.dispatch_file(file_task) assert dh.finished(file_hash, 'xerox') assert len(disp.submission_queue) == 1
def test_dispatch_extracted(clean_redis): # Setup the fake datastore ds = MockDatastore( collections=['submission', 'result', 'service', 'error', 'file']) file_hash = get_random_hash(64) second_file_hash = get_random_hash(64) for fh in [file_hash, second_file_hash]: ds.file.save(fh, random_model_obj(models.file.File)) ds.file.get(fh).sha256 = fh # Inject the fake submission submission = random_model_obj(models.submission.Submission) submission.files.clear() submission.files.append(dict(name='./file', sha256=file_hash)) submission.sid = 'first-submission' # Launch the dispatcher disp = Dispatcher(ds, logger=logging, redis=clean_redis, redis_persist=clean_redis) # Launch the submission task = SubmissionTask(dict(submission=submission)) disp.dispatch_submission(task) # Check that the right values were sent to the file_task = FileTask(disp.file_queue.pop(timeout=1)) assert file_task.sid == submission.sid assert file_task.file_info.sha256 == file_hash assert file_task.depth == 0 assert file_task.file_info.type == ds.file.get(file_hash).type # Finish the services dh = DispatchHash(submission.sid, clean_redis) for service_name in disp.scheduler.services.keys(): dh.finish(file_hash, service_name, 'error-code', 0, 'U') # But one of the services extracted a file dh.add_file(second_file_hash, 10, file_hash) # But meanwhile, dispatch_submission has been recalled on the submission disp.dispatch_submission(task) # It should see the missing file, and we should get a new file dispatch message for it # to make sure it is getting processed properly, this should be at depth 1, the first layer of # extracted files file_task = disp.file_queue.pop(timeout=1) assert file_task is not None file_task = FileTask(file_task) assert file_task.sid == submission.sid assert file_task.file_info.sha256 == second_file_hash assert file_task.depth == 1 assert file_task.file_info.type == ds.file.get(second_file_hash).type # Finish the second file for service_name in disp.scheduler.services.keys(): dh.finish(second_file_hash, service_name, 'error-code', 0, 'U') # And now we should get the finished submission disp.dispatch_submission(task) submission = ds.submission.get(submission.sid) assert submission.state == 'completed' assert submission.errors == [] assert len(submission.results) == 2 * len(disp.scheduler.services)