def test_finish_missing_file(client, dispatch_client, heuristics): heuristics.get.return_value = None task = random_minimal_obj(Task) fs = forge.get_filestore() result: Result = random_minimal_obj(Result) while not result.response.extracted: result: Result = random_model_obj(Result) result.response.extracted = [ x for x in result.response.extracted if not fs.exists(x.sha256) ] missing = { x.sha256 for x in result.response.extracted if not fs.exists(x.sha256) } missing |= { x.sha256 for x in result.response.supplementary if not fs.exists(x.sha256) } message = { 'task': task.as_primitives(), 'result': result.as_primitives(), 'freshen': True } resp = client.post('/api/v1/task/', headers=headers, json=message) assert resp.status_code == 200 assert resp.json['api_response']['success'] is False assert set(resp.json['api_response']['missing_files']) == missing
def test_register_new_heuristics(client, storage): config_block = 'A CONFIG OBJECT FOR SURE' storage.get_service_with_delta.return_value = config_block storage.heuristic.get_if_exists.return_value = None service = random_minimal_obj(Service) service = service.as_primitives() service['heuristics'] = [random_minimal_obj(Heuristic).as_primitives()] storage.heuristic.bulk.return_value = { "items": [{ "update": { "result": "created", "_id": service['heuristics'][0]['heur_id'] } }] } headers['Service-Name'] = service['name'] headers['Service-Version'] = service['version'] result = client.post("/api/v1/service/register/", headers=headers, json=service) assert result.status_code == 200 assert storage.heuristic.bulk.call_count == 1 assert result.json['api_response']['keep_alive'] is True assert len(result.json['api_response']['new_heuristics']) == 1 assert service['heuristics'][0]['heur_id'] in result.json['api_response'][ 'new_heuristics'][0] assert result.json['api_response']['service_config'] == config_block
def test_finish_error(client, dispatch_client): task = random_minimal_obj(Task) error = random_minimal_obj(Error) message = {'task': task.as_primitives(), 'error': error.as_primitives()} resp = client.post('/api/v1/task/', headers=headers, json=message) assert resp.status_code == 200 assert dispatch_client.service_failed.call_count == 1 assert dispatch_client.service_failed.call_args[0][0] == task.sid error.archive_ts = dispatch_client.service_failed.call_args[0][ 2].archive_ts error.expiry_ts = dispatch_client.service_failed.call_args[0][2].expiry_ts error.created = dispatch_client.service_failed.call_args[0][2].created assert dispatch_client.service_failed.call_args[0][2] == error
def test_register_bad_heuristics(client, storage): service = random_minimal_obj(Service) service = service.as_primitives() service['heuristics'] = [random_minimal_obj(Heuristic).as_primitives()] service['heuristics'][0]['description'] = None headers['Service-Name'] = service['name'] headers['Service-Version'] = service['version'] result = client.post("/api/v1/service/register/", headers=headers, json=service) assert result.status_code == 400
def test_finish_heuristic(client, dispatch_client, heuristics): heuristics.get.return_value = None task = random_minimal_obj(Task) result: Result = random_model_obj(Result) while not any(sec.heuristic for sec in result.result.sections): result: Result = random_model_obj(Result) heuristics_count = sum( int(sec.heuristic is not None) for sec in result.result.sections) result.result.score = 99999 result.response.extracted = [] result.response.supplementary = [] message = { 'task': task.as_primitives(), 'result': result.as_primitives(), 'freshen': True } resp = client.post('/api/v1/task/', headers=headers, json=message) assert resp.status_code == 200 assert dispatch_client.service_finished.call_count == 1 assert dispatch_client.service_finished.call_args[0][0] == task.sid # Mock objects are always one on conversion to int, being changed to this, means that it looked at the # mocked out heuristics to load the score. assert dispatch_client.service_finished.call_args[0][2].result.score == 0 assert heuristics.get.call_count == heuristics_count
def test_ingest_size_error(ingest_harness): datastore, ingester, in_queue = ingest_harness # Send a rather big file submission = make_message( files={ 'size': ingester.config.submission.max_file_size + 1, # 'ascii': 'abc' }, params={ 'ignore_size': False, 'never_drop': False }) fo = random_minimal_obj(File) fo.sha256 = submission['files'][0]['sha256'] datastore.file.save(submission['files'][0]['sha256'], fo) submission['notification'] = {'queue': 'drop_test'} in_queue.push(submission) ingester.handle_ingest() # No files in the internal buffer assert ingester.unique_queue.length() == 0 assert ingester.ingest_queue.length() == 0 # A file was dropped queue_name = _notification_queue_prefix + submission['notification'][ 'queue'] queue = ingester.notification_queues[queue_name] message = queue.pop() assert message is not None
def test_finish_minimal(client, dispatch_client): task = random_minimal_obj(Task) result = random_minimal_obj(Result) message = { 'task': task.as_primitives(), 'result': result.as_primitives(), 'freshen': False } resp = client.post('/api/v1/task/', headers=headers, json=message) assert resp.status_code == 200 assert dispatch_client.service_finished.call_count == 1 assert dispatch_client.service_finished.call_args[0][0] == task.sid result.archive_ts = dispatch_client.service_finished.call_args[0][ 2].archive_ts result.expiry_ts = dispatch_client.service_finished.call_args[0][ 2].expiry_ts result.created = dispatch_client.service_finished.call_args[0][2].created assert dispatch_client.service_finished.call_args[0][2] == result
def test_dispatch_extracted(clean_redis, clean_datastore): redis = clean_redis ds = clean_datastore # def service_queue(name): return get_service_queue(name, redis) # Setup the fake datastore file_hash = get_random_hash(64) second_file_hash = get_random_hash(64) for fh in [file_hash, second_file_hash]: obj = random_model_obj(models.file.File) obj.sha256 = fh ds.file.save(fh, obj) # Inject the fake submission submission = random_model_obj(models.submission.Submission) submission.files = [dict(name='./file', sha256=file_hash)] sid = submission.sid = 'first-submission' disp = Dispatcher(ds, redis, redis) disp.running = ToggleTrue() client = DispatchClient(ds, redis, redis) client.dispatcher_data_age = time.time() client.dispatcher_data.append(disp.instance_id) # Launch the submission client.dispatch_submission(submission) disp.pull_submissions() disp.service_worker(disp.process_queue_index(sid)) # Finish one service extracting a file job = client.request_work('0', 'extract', '0') assert job.fileinfo.sha256 == file_hash assert job.filename == './file' new_result: Result = random_minimal_obj(Result) new_result.sha256 = file_hash new_result.response.service_name = 'extract' new_result.response.extracted = [ dict(sha256=second_file_hash, name='second-*', description='abc', classification='U') ] client.service_finished(sid, 'extracted-done', new_result) # process the result disp.pull_service_results() disp.service_worker(disp.process_queue_index(sid)) disp.service_worker(disp.process_queue_index(sid)) # job = client.request_work('0', 'extract', '0') assert job.fileinfo.sha256 == second_file_hash assert job.filename == 'second-*'
def test_register_existing_heuristics(client, storage): config_block = 'A CONFIG OBJECT FOR SURE' storage.get_service_with_delta.return_value = config_block service = random_minimal_obj(Service) service = service.as_primitives() service['heuristics'] = [random_minimal_obj(Heuristic).as_primitives()] headers['Service-Name'] = service['name'] headers['Service-Version'] = service['version'] result = client.post("/api/v1/service/register/", headers=headers, json=service) assert result.status_code == 200 assert storage.heuristic.save.call_count == 0 assert result.json['api_response']['keep_alive'] is True assert len(result.json['api_response']['new_heuristics']) == 0 assert result.json['api_response']['service_config'] == config_block
def test_register_bad_service(client, storage): service = random_minimal_obj(Service).as_primitives() headers['Service-Name'] = service['name'] headers['Service-Version'] = service['version'] del service['name'] result = client.post("/api/v1/service/register/", headers=headers, json=service) assert result.status_code == 400
def test_task_ignored_then_timeout(client, dispatch_client, storage): # Put a task "in the queue" task = random_minimal_obj(Task) task.ignore_cache = False dispatch_client.request_work.side_effect = [task, None] dispatch_client.service_data[service_name].timeout = 100 dispatch_client.service_data[service_name].disable_cache = False resp = client.get('/api/v1/task/', headers=headers) assert resp.status_code == 200 assert dispatch_client.service_finished.call_count == 1 assert not resp.json['api_response']['task']
def test_ingest_simple(ingest_harness): datastore, ingester, in_queue = ingest_harness user = random_minimal_obj(User) user.name = 'user' custom_user_groups = ['users', 'the_user'] user.groups = list(custom_user_groups) datastore.user.save('user', user) # Let the ingest loop run an extra time because we send two messages ingester.running.counter += 1 # Send a message with a garbled sha, this should be dropped in_queue.push(make_message(files={'sha256': '1' * 10})) with pytest.raises(ValueError): # Process garbled message ingester.try_run(volatile=True) # Send a message that is fine, but has an illegal metadata field in_queue.push( make_message(dict( metadata={ 'tobig': 'a' * (ingester.ingester.config.submission.max_metadata_length + 2), 'small': '100' }), params={ 'submitter': 'user', 'groups': [] })) # Process those ok message ingester.try_run(volatile=True) mm = ingester.ingester # The only task that makes it through though fit these parameters task = mm.unique_queue.pop() assert task task = IngestTask(task) assert task.submission.files[ 0].sha256 == '0' * 64 # Only the valid sha passed through assert 'tobig' not in task.submission.metadata # The bad metadata was stripped assert task.submission.metadata[ 'small'] == '100' # The valid metadata is unchanged assert task.submission.params.submitter == 'user' assert task.submission.params.groups == custom_user_groups # None of the other tasks should reach the end assert mm.unique_queue.length() == 0 assert mm.ingest_queue.length() == 0
def test_task_dispatch(client, dispatch_client, storage): # Put a task "in the queue" task = random_minimal_obj(Task) task.ignore_cache = False storage.result.get_if_exists.return_value = None storage.emptyresult.get_if_exists.return_value = None dispatch_client.request_work.return_value = task dispatch_client.service_data[service_name].timeout = 100 dispatch_client.service_data[service_name].disable_cache = False resp = client.get('/api/v1/task/', headers=headers) assert resp.status_code == 200 assert resp.json['api_response']['task'] == task.as_primitives()
def test_register_service_auth_fail(client, storage): config_block = 'A CONFIG OBJECT FOR SURE' storage.get_service_with_delta.return_value = config_block service = random_minimal_obj(Service) _headers = dict(headers) _headers['X-APIKEY'] = '10' _headers['Service-Name'] = service.name _headers['Service-Version'] = service.version result = client.post("/api/v1/service/register/", headers=_headers, json=service.as_primitives()) assert result.status_code == 401
def test_ingest_groups_custom(ingest_harness): datastore, ingester, in_queue = ingest_harness user = random_minimal_obj(User) user.name = 'user' custom_user_groups = ['users', 'the_user'] user.groups = list(custom_user_groups) datastore.user.save('user', user) in_queue.push( make_message(params={ 'submitter': 'user', 'groups': ['group_b'] })) ingester.handle_ingest() task = ingester.unique_queue.pop() assert task task = IngestTask(task) assert task.submission.params.submitter == 'user' assert task.submission.params.groups == ['group_b']
("file", random_model_obj(File)), ("filescore", random_model_obj(FileScore)), ("heuristic", random_model_obj(Heuristic)), ("result", random_model_obj(Result)), ("service", random_model_obj(Service)), ("service_delta", random_model_obj(ServiceDelta)), ("signature", random_model_obj(Signature)), ("submission", random_model_obj(Submission)), ("submission_summary", random_model_obj(SubmissionSummary)), ("submission_tree", random_model_obj(SubmissionTree)), ("user", random_model_obj(User)), ("user_favorites", random_model_obj(UserFavorites)), ("user_settings", random_model_obj(UserSettings)), ("vm", random_model_obj(VM)), ("workflow", random_model_obj(Workflow)), ("alert_min", random_minimal_obj(Alert)), ("cached_file_min", random_minimal_obj(CachedFile)), ("emptyresult_min", random_minimal_obj(EmptyResult)), ("error_min", random_minimal_obj(Error)), ("file_min", random_minimal_obj(File)), ("filescore_min", random_minimal_obj(FileScore)), ("heuristic_min", random_minimal_obj(Heuristic)), ("result_min", random_minimal_obj(Result)), ("service_min", random_minimal_obj(Service)), ("service_delta_min", random_minimal_obj(ServiceDelta)), ("signature_min", random_minimal_obj(Signature)), ("submission_min", random_minimal_obj(Submission)), ("submission_summary_min", random_minimal_obj(SubmissionSummary)), ("submission_tree_min", random_minimal_obj(SubmissionTree)), ("user_min", random_minimal_obj(User)), ("user_favorites_min", random_minimal_obj(UserFavorites)),
def _create_results_for_file(ds, fs, f, possible_childs=None, log=None): r_list = [] services_done = [] section_body_format = ["TEXT", "MEMORY_DUMP", "GRAPH_DATA", "URL", "JSON", "KEY_VALUE"] section_depth_list = [[1, 1, 2, 3, 1], [1, 2, 1], [1, 2, 3, 1], [1, 2]] section_depth = random.choice(section_depth_list) for _ in range(random.randint(2, 5)): r = random_model_obj(Result) # Only one result per service per file while r.response.service_name in services_done: r.response.service_name = random.choice(list(SERVICES.keys())) for depth_id, section in enumerate(r.result.sections): section.depth = section_depth[depth_id % len(section_depth)] section.body_format = random.choice(section_body_format) section.heuristic.heur_id = random.choice([f"{r.response.service_name.upper()}.{x+1}" for x in range(5)]) if section.body_format == "GRAPH_DATA": cmap_min = 0 cmap_max = random.choice([5, 10, 20]) color_map_data = { 'type': 'colormap', 'data': { 'domain': [cmap_min, cmap_max], 'values': [random.random() * cmap_max for _ in range(50)] } } section.body = json.dumps(color_map_data) elif section.body_format == "URL": data = [{"url": get_random_uri()} for _ in range(random.randint(1, 4))] section.body = json.dumps(data) elif section.body_format in ["JSON", "KEY_VALUE"]: data = {get_random_word(): get_random_id() for _ in range(random.randint(3, 9))} section.body = json.dumps(data) services_done.append(r.response.service_name) # Set the sha256 r.sha256 = f if random.randint(1, 10) > 8: # Generate and empty result r_key = f"{r.build_key()}.e" ds.emptyresult.save(r_key, random_model_obj(EmptyResult)) else: r_key = r.build_key() # Set random extracted files that are not top level if not possible_childs: r.response.extracted = [] else: for e in r.response.extracted: e.sha256 = random.choice(possible_childs) # Set random supplementary files that are not top level if r.response.supplementary: # Edit the first file to be an ontology file s = r.response.supplementary[0] # Create a random ontology onto = random_minimal_obj(ResultOntology).as_primitives(strip_null=True) onto['header']['sha256'] = f onto['header']['service_name'] = r.response.service_name onto['header']['service_version'] = r.response.service_version onto['header']['service_tool_version'] = r.response.service_tool_version # Create it's file record supp_file = random_model_obj(File) byte_str = json.dumps(onto).encode('utf-8') sha256 = hashlib.sha256(byte_str).hexdigest() supp_file.sha256 = sha256 ds.file.save(sha256, supp_file) fs.put(sha256, byte_str) # Add the random files s.sha256 = sha256 s.name = "random.ontology" s.description = f"Random Ontology file for: {f}" r.response.supplementary = [s] ds.result.save(r_key, r) if log: log.info(f"\t\t\t{r_key}") r_list.append(r_key) return r_list
def make_result(file_hash, service): new_result: Result = random_minimal_obj(Result) new_result.sha256 = file_hash new_result.response.service_name = service return new_result
def run(self): self.log.info("Random service result generator ready!") self.log.info("Monitoring queues:") for q in self.queues: self.log.info(f"\t{q.name}") self.log.info("Waiting for messages...") while self.running: # Reset Idle flags for s in self.service_info: if s['enabled']: self.service_state_hash.set( f"{self.client_id}_{s['name']}", (s['name'], ServiceStatus.Idle, time.time() + 30 + 5)) message = select(*self.queues, timeout=1) if not message: continue archive_ts = now_as_iso( self.config.datastore.ilm.days_until_archive * 24 * 60 * 60) if self.config.submission.dtl: expiry_ts = now_as_iso(self.config.submission.dtl * 24 * 60 * 60) else: expiry_ts = None queue, msg = message task = ServiceTask(msg) if not self.dispatch_client.running_tasks.add( task.key(), task.as_primitives()): continue # Set service busy flag self.service_state_hash.set( f"{self.client_id}_{task.service_name}", (task.service_name, ServiceStatus.Running, time.time() + 30 + 5)) # METRICS self.counters[task.service_name].increment('execute') # METRICS (not caching here so always miss) self.counters[task.service_name].increment('cache_miss') self.log.info( f"\tQueue {queue} received a new task for sid {task.sid}.") action = random.randint(1, 10) if action >= 2: if action > 8: result = random_minimal_obj(Result) else: result = random_model_obj(Result) result.sha256 = task.fileinfo.sha256 result.response.service_name = task.service_name result.archive_ts = archive_ts result.expiry_ts = expiry_ts result.response.extracted = result.response.extracted[task. depth + 2:] result.response.supplementary = result.response.supplementary[ task.depth + 2:] result_key = Result.help_build_key( sha256=task.fileinfo.sha256, service_name=task.service_name, service_version='0', is_empty=result.is_empty()) self.log.info( f"\t\tA result was generated for this task: {result_key}") new_files = result.response.extracted + result.response.supplementary for f in new_files: if not self.datastore.file.get(f.sha256): random_file = random_model_obj(File) random_file.archive_ts = archive_ts random_file.expiry_ts = expiry_ts random_file.sha256 = f.sha256 self.datastore.file.save(f.sha256, random_file) if not self.filestore.exists(f.sha256): self.filestore.put(f.sha256, f.sha256) time.sleep(random.randint(0, 2)) self.dispatch_client.service_finished(task.sid, result_key, result) # METRICS if result.result.score > 0: self.counters[task.service_name].increment('scored') else: self.counters[task.service_name].increment('not_scored') else: error = random_model_obj(Error) error.archive_ts = archive_ts error.expiry_ts = expiry_ts error.sha256 = task.fileinfo.sha256 error.response.service_name = task.service_name error.type = random.choice( ["EXCEPTION", "SERVICE DOWN", "SERVICE BUSY"]) error_key = error.build_key('0') self.log.info( f"\t\tA {error.response.status}:{error.type} " f"error was generated for this task: {error_key}") self.dispatch_client.service_failed(task.sid, error_key, error) # METRICS if error.response.status == "FAIL_RECOVERABLE": self.counters[task.service_name].increment( 'fail_recoverable') else: self.counters[task.service_name].increment( 'fail_nonrecoverable')