def delete_submission(sid, **kwargs): """ Delete a submission as well as all related files, results and errors Variables: sid => Submission ID to be deleted Arguments: None Data Block: None Result example: {success: true} """ user = kwargs['user'] submission = STORAGE.submission.get(sid, as_obj=False) if not submission: return make_api_response("", f"There are not submission with sid: {sid}", 404) if Classification.is_accessible(user['classification'], submission['classification']) \ and (submission['params']['submitter'] == user['uname'] or 'admin' in user['type']): STORAGE.delete_submission_tree_bulk(sid, Classification, transport=FILESTORE) STORAGE.submission.commit() return make_api_response({"success": True}) else: return make_api_response("", "Your are not allowed to delete this submission.", 403)
def restore(**_): """ Restore an old backup of the system configuration Variables: None Arguments: None Data Block: <SERVICE BACKUP> Result example: {'success': true} """ data = request.data try: backup = yaml.safe_load(data) if "type" not in backup or "server" not in backup or "data" not in backup: return make_api_response( "", err="Invalid service configuration backup.", status_code=400) if backup["server"] != config.ui.fqdn: return make_api_response( "", err= "This backup was not created on this server, restore operation cancelled.", status_code=400) for service_name, service in backup['data'].items(): # Grab the old value for a service old_service = STORAGE.get_service_with_delta(service_name, as_obj=False) # Restore the service for v_id, v_data in service['versions'].items(): STORAGE.service.save(v_id, v_data) STORAGE.service_delta.save(service_name, service['config']) # Grab the new value for the service new_service = STORAGE.get_service_with_delta(service_name, as_obj=False) # Synchronize the sources if needed if old_service and old_service.get("update_config", {}).get( "sources", None) is not None: synchronize_sources( service_name, old_service.get("update_config", {}).get("sources", []), new_service.get("update_config", {}).get("sources", [])) return make_api_response({"success": True}) except ValueError as e: return make_api_response("", err=str(e), status_code=400)
def get_heuristic(heuristic_id, **kwargs): """ Get a specific heuristic's detail from the system Variables: heuristic_id => ID of the heuristic Arguments: None Data Block: None Result example: {"id": "AL_HEUR_001", # Heuristics ID "filetype": ".*", # Target file type "name": "HEURISTICS_NAME", # Heuristics name "description": ""} # Heuristics description """ user = kwargs['user'] h = STORAGE.heuristic.get(heuristic_id, as_obj=False) if not h: return make_api_response("", "Heuristic not found", 404) if user and Classification.is_accessible(user['classification'], h['classification']): # Always refresh stats when someone get a heuristic h.update({'stats': STORAGE.get_stat_for_heuristic(heuristic_id)}) return make_api_response(h) else: return make_api_response("", "You are not allowed to see this heuristic...", 403)
def get_default_service_list(srv_list=None, default_selection=None): if not default_selection: default_selection = DEFAULT_SRV_SEL if not srv_list: srv_list = STORAGE.list_all_services(as_obj=False, full=True) services = {} for item in srv_list: grp = item['category'] if grp not in services: services[grp] = [] services[grp].append({ "name": item["name"], "category": grp, "selected": (grp in default_selection or item['name'] in default_selection), "is_external": item["is_external"] }) return [{ "name": k, "selected": k in default_selection, "services": v } for k, v in services.items()]
def request_replay(index, doc_id, **kwargs): """ Request an alert or a submission to be transfered to another system Variables: index => Type of document to be transfered (alert or submission) doc_id => ID of the document to transfer Arguments: None Data Block: None Result example: {"success": true} """ user = kwargs['user'] if index not in ['alert', 'submission']: return make_api_response( "", f"{index.upper()} is not a valid index for this API.", 400) index_ds = STORAGE.get_collection(index) doc = index_ds.get_if_exists(doc_id, as_obj=False) if not doc or not Classification.is_accessible(user['classification'], doc['classification']): return make_api_response( "", f"You are not allowed to modify the {index} with the following ID: {doc_id}", 403) operations = [(index_ds.UPDATE_SET, 'metadata.replay', REPLAY_REQUESTED)] return make_api_response({'success': index_ds.update(doc_id, operations)})
def load_user_settings(user): default_settings = get_default_user_settings(user) settings = STORAGE.user_settings.get(user['uname'], as_obj=False) srv_list = [ x for x in STORAGE.list_all_services(as_obj=False, full=True) if x['enabled'] ] if not settings: def_srv_list = None settings = default_settings else: # Make sure all defaults are there for key, item in default_settings.items(): if key not in settings: settings[key] = item # Remove all obsolete keys for key in list(settings.keys()): if key not in default_settings: del settings[key] def_srv_list = settings.get('services', {}).get('selected', None) settings['service_spec'] = get_default_service_spec(srv_list) settings['services'] = get_default_service_list(srv_list, def_srv_list) # Normalize the user's classification settings['classification'] = Classification.normalize_classification( settings['classification']) return settings
def get_results(keys): out = {} res = {} retry = 0 while keys and retry < max_retry: if retry: time.sleep(2**(retry - 7)) res.update( STORAGE.get_multiple_results(keys, Classification, as_obj=False)) keys = [x for x in keys if x not in res] retry += 1 results = {} for k, v in res.items(): file_info = data['file_infos'].get(k[:64], None) if file_info: v = format_result(user['classification'], v, file_info['classification']) if v: results[k] = v out["results"] = results out["missing_result_keys"] = keys return out
def get_default_service_spec(srv_list=None): if not srv_list: srv_list = STORAGE.list_all_services(as_obj=False, full=True) return [{ "name": x['name'], "params": x["submission_params"] } for x in srv_list if x["submission_params"]]
def get_systems_constants(**_): """ Return the current system configuration constants which includes: * Priorities * File types * Service tag types * Service tag contexts Variables: None Arguments: None Data Block: None Result example: { "priorities": {}, "file_types": [], "tag_types": [], "tag_contexts": [] } """ accepts_map = {} rejects_map = {} default_list = [] for srv in STORAGE.list_all_services(as_obj=False): name = srv.get('name', None) if name: accept = srv.get('accepts', DEFAULT_SERVICE_ACCEPTS) reject = srv.get('rejects', DEFAULT_SERVICE_REJECTS) if accept == DEFAULT_SERVICE_ACCEPTS and reject == DEFAULT_SERVICE_REJECTS: default_list.append(name) else: accepts_map[name] = re.compile(accept) rejects_map[name] = re.compile(reject) out = { "max_priority": constants.MAX_PRIORITY, "priorities": constants.PRIORITIES, "file_types": [[ t, sorted([ x for x in accepts_map.keys() if re.match(accepts_map[x], t) and not re.match(rejects_map[x], t) ]) ] for t in sorted(constants.RECOGNIZED_TYPES.keys())], "tag_types": sorted(list(Tagging.flat_fields().keys())) } out['file_types'].insert(0, ["*", default_list]) return make_api_response(out)
def get_or_create_summary(sid, results, user_classification, completed): user_classification = CLASSIFICATION.normalize_classification(user_classification, long_format=False) cache_key = f"{sid}_{user_classification}_with_sections" for illegal_char in [" ", ":", "/"]: cache_key = cache_key.replace(illegal_char, "") summary_cache = STORAGE.submission_summary.get_if_exists(cache_key, as_obj=False) if not summary_cache: summary = STORAGE.get_summary_from_keys( results, cl_engine=CLASSIFICATION, user_classification=user_classification, keep_heuristic_sections=True) expiry = now_as_iso(config.datastore.ilm.days_until_archive * 24 * 60 * 60) partial = not completed or "missing_results" in summary or "missing_files" in summary # Do not cache partial summary if not partial: summary_cache = { "attack_matrix": json.dumps(summary['attack_matrix']), "tags": json.dumps(summary['tags']), "expiry_ts": expiry, "heuristics": json.dumps(summary['heuristics']), "classification": summary['classification'], "filtered": summary["filtered"], "heuristic_sections": json.dumps(summary['heuristic_sections']), "heuristic_name_map": json.dumps(summary['heuristic_name_map']) } STORAGE.submission_summary.save(cache_key, summary_cache) return { "attack_matrix": summary['attack_matrix'], "tags": summary['tags'], "expiry_ts": expiry, "heuristics": summary['heuristics'], "classification": summary['classification'], "filtered": summary["filtered"], "partial": partial, "heuristic_sections": summary['heuristic_sections'], "heuristic_name_map": summary['heuristic_name_map'] } return { "attack_matrix": json.loads(summary_cache['attack_matrix']), "tags": json.loads(summary_cache['tags']), "expiry_ts": summary_cache["expiry_ts"], "heuristics": json.loads(summary_cache['heuristics']), "classification": summary_cache['classification'], "filtered": summary_cache["filtered"], "partial": False, "heuristic_sections": json.loads(summary_cache['heuristic_sections']), "heuristic_name_map": json.loads(summary_cache['heuristic_name_map']) }
def get_service(servicename, **_): """ Load the configuration for a given service Variables: servicename => Name of the service to get the info Arguments: version => Specific version of the service to get Data Block: None Result example: {'accepts': '(archive|executable|java|android)/.*', 'category': 'Extraction', 'classpath': 'al_services.alsvc_extract.Extract', 'config': {'DEFAULT_PW_LIST': ['password', 'infected']}, 'cpu_cores': 0.1, 'description': "Extracts some stuff" 'enabled': True, 'name': 'Extract', 'ram_mb': 256, 'rejects': 'empty|metadata/.*', 'stage': 'EXTRACT', 'submission_params': [{'default': u'', 'name': 'password', 'type': 'str', 'value': u''}, {'default': False, 'name': 'extract_pe_sections', 'type': 'bool', 'value': False}, {'default': False, 'name': 'continue_after_extract', 'type': 'bool', 'value': False}], 'timeout': 60} """ version = request.args.get('version', None) service = STORAGE.get_service_with_delta(servicename, version=version, as_obj=False) if service: return make_api_response(service) else: return make_api_response("", err=f"{servicename} service does not exist", status_code=404)
def get_or_create_summary(sid, results, user_classification): cache_key = f"{sid}_{CLASSIFICATION.normalize_classification(user_classification, long_format=False)}" for illegal_char in [" ", ":", "/"]: cache_key = cache_key.replace(illegal_char, "") summary_cache = STORAGE.submission_summary.get_if_exists(cache_key, as_obj=False) if not summary_cache: summary = STORAGE.get_summary_from_keys( results, cl_engine=CLASSIFICATION, user_classification=user_classification) expiry = now_as_iso(config.datastore.ilm.days_until_archive * 24 * 60 * 60) # Do not cache summaries that have errors... if "missing_results" not in summary and "missing_files" not in summary: summary_cache = { "attack_matrix": json.dumps(summary['attack_matrix']), "tags": json.dumps(summary['tags']), "expiry_ts": expiry, "heuristics": json.dumps(summary['heuristics']), "classification": summary['classification'], "filtered": summary["filtered"] } STORAGE.submission_summary.save(cache_key, summary_cache) return { "attack_matrix": summary['attack_matrix'], "tags": summary['tags'], "expiry_ts": expiry, "heuristics": summary['heuristics'], "classification": summary['classification'], "filtered": summary["filtered"], "partial": "missing_results" in summary or "missing_files" in summary } return { "attack_matrix": json.loads(summary_cache['attack_matrix']), "tags": json.loads(summary_cache['tags']), "expiry_ts": summary_cache["expiry_ts"], "heuristics": json.loads(summary_cache['heuristics']), "classification": summary_cache['classification'], "filtered": summary_cache["filtered"], "partial": False }
def set_bulk_replay_pending(**kwargs): """ Set the replay pending state on alert or submissions maching the queries Variables: None Arguments: None Data Block: { "index": "alert", # Target index (alert or submission) "query": "*:*", # Main query "filter_queries": [], # List of filter queries "max_docs": 100 # Maximum amount of document to change } Result example: {"success": true} """ user = kwargs['user'] data = request.json index = data.get('index', None) query = data.get('query', None) fqs = data.get('filter_queries', None) max_docs = data.get('max_docs', None) if index is None or query is None or fqs is None or max_docs is None: return make_api_response("", "Invalid data block.", 400) if index not in ['alert', 'submission']: return make_api_response( "", f"{index.upper()} is not a valid index for this API.", 400) index_ds = STORAGE.get_collection(index) operations = [(index_ds.UPDATE_SET, 'metadata.replay', REPLAY_PENDING)] return make_api_response({ 'success': True, "count": index_ds.update_by_query(query, operations, filters=fqs, max_docs=max_docs, access_control=user['access_control']) })
def get_file_tree(sid, **kwargs): """ Get the file hierarchy of a given Submission ID. This is an N deep recursive process but is limited to the max depth set in the system settings. Variables: sid => Submission ID to get the tree for Arguments: None Data Block: None API call example: /api/v4/submission/tree/12345678-1234-1234-1234-1234567890AB/ Result example: { # Dictionary of file blocks "1f...11": { # File sha256 (sha256) "score": 923, # Score for the file "name": ["file.exe",...] # List of possible names for the file "children": {...} # Dictionary of children file blocks }, ... """ user = kwargs['user'] data = STORAGE.submission.get(sid, as_obj=False) if data is None: return make_api_response("", "Submission ID %s does not exists." % sid, 404) if data and user and Classification.is_accessible(user['classification'], data['classification']): return make_api_response( STORAGE.get_or_create_file_tree( data, config.submission.max_extraction_depth, cl_engine=Classification, user_classification=user['classification'])) else: return make_api_response( "", "You are not allowed to view the data of this submission", 403)
def check_for_service_updates(**_): """ Check for potential updates for the given services. Variables: None Arguments: None Data Block: None Result example: { 'ResultSample': { 'latest_tag': 'v4.0.0dev163', 'update_available': true }, ... } """ output = {} for service in STORAGE.list_all_services(full=True, as_obj=False): update_info = latest_service_tags.get(service['name']) or {} if update_info: latest_tag = update_info.get(service['update_channel'], None) output[service['name']] = { "auth": update_info['auth'], "image": f"{update_info['image']}:{latest_tag or 'latest'}", "latest_tag": latest_tag, "update_available": latest_tag is not None and latest_tag.replace('stable', '') != service['version'], "updating": service_update.exists(service['name']) } return make_api_response(output)
def list_all_services(**_): """ List all service configurations of the system. Variables: None Arguments: None Data Block: None Result example: [ {'accepts': ".*" 'category': 'Extraction', 'classpath': 'al_services.alsvc_extract.Extract', 'description': "Extracts some stuff", 'enabled': True, 'name': 'Extract', 'rejects': 'empty' 'stage': 'CORE' }, ... ] """ resp = [{ 'accepts': x.get('accepts', None), 'category': x.get('category', None), 'description': x.get('description', None), 'enabled': x.get('enabled', False), 'name': x.get('name', None), 'privileged': x.get('privileged', False), 'rejects': x.get('rejects', None), 'stage': x.get('stage', None), 'version': x.get('version', None) } for x in STORAGE.list_all_services(full=True, as_obj=False)] return make_api_response(resp)
def get_signature_sources(**_): """ Get all signature sources Variables: None Arguments: None Data Block: None Result example: { 'Yara': { { "uri": "http://somesite/file_to_get", # URI to fetch for parsing the rules "name": "signature_file.yar", # Name of the file we will parse the rules as "username": null, # Username used to get to the URI "password": null, # Password used to get to the URI "header": { # Header sent during the request to the URI "X_TOKEN": "SOME RANDOM TOKEN" # Exemple of header }, "private_key": null, # Private key used to get to the URI "pattern": "^*.yar$" # Regex pattern use to get appropriate files from the URI }, ... }, ... } """ services = STORAGE.list_all_services(full=True, as_obj=False) out = {} for service in services: if service.get("update_config", {}).get("generates_signatures", False): out[service['name']] = service['update_config']['sources'] # Save the signature return make_api_response(out)
def update_available(**_): """ Check if updated signatures are. Variables: None Arguments: last_update => ISO time of last update. type => Signature type to check Data Block: None Result example: { "update_available" : true } # If updated rules are available. """ sig_type = request.args.get('type', '*') last_update = iso_to_epoch( request.args.get('last_update', '1970-01-01T00:00:00.000000Z')) last_modified = iso_to_epoch(STORAGE.get_signature_last_modified(sig_type)) return make_api_response({"update_available": last_modified > last_update})
def get_signature(signature_id, **kwargs): """ Get the detail of a signature based of its ID and revision Variables: signature_id => Signature ID Arguments: None Data Block: None Result example: {} """ user = kwargs['user'] data = STORAGE.signature.get(signature_id, as_obj=False) if data: if not Classification.is_accessible( user['classification'], data.get('classification', Classification.UNRESTRICTED)): return make_api_response( "", "Your are not allowed to view this signature.", 403) # Always refresh stats when someone get a signature data.update({ 'stats': STORAGE.get_stat_for_signature(signature_id, data['source'], data['name'], data['type']) }) return make_api_response(data) else: return make_api_response("", f"Signature not found. ({signature_id})", 404)
def update_signature_source(service, name, **_): """ Update a signature source by name for a given service Variables: service => Service to which we want to update the source name => Name of the source you want update Arguments: None Data Block: { "uri": "http://somesite/file_to_get", # URI to fetch for parsing the rules "name": "signature_file.yar", # Name of the file we will parse the rules as "username": null, # Username used to get to the URI "password": null, # Password used to get to the URI "header": { # Header sent during the request to the URI "X_TOKEN": "SOME RANDOM TOKEN" # Exemple of header }, "private_key": null, # Private key used to get to the URI "pattern": "^*.yar$" # Regex pattern use to get appropriate files from the URI } Result example: {"success": True/False} # if the operation succeeded of not """ data = request.json service_data = STORAGE.get_service_with_delta(service, as_obj=False) current_sources = service_data.get('update_config', {}).get('sources', []) # Ensure private_key (if any) ends with a \n if data.get('private_key', None) and not data['private_key'].endswith("\n"): data['private_key'] += "\n" if name != data['name']: return make_api_response( {"success": False}, err="You are not allowed to change the source name.", status_code=400) if not service_data.get('update_config', {}).get('generates_signatures', False): return make_api_response( {"success": False}, err= "This service does not generate alerts therefor you cannot update its sources.", status_code=400) if len(current_sources) == 0: return make_api_response( {"success": False}, err= "This service does not have any sources therefor you cannot update any source.", status_code=400) new_sources = [] found = False classification_changed = False for source in current_sources: if data['name'] == source['name']: new_sources.append(data) found = True classification_changed = data['default_classification'] != source[ 'default_classification'] else: new_sources.append(source) if not found: return make_api_response( {"success": False}, err=f"Could not found source '{data.name}' in service {service}.", status_code=404) service_delta = STORAGE.service_delta.get(service, as_obj=False) if service_delta.get('update_config') is None: service_delta['update_config'] = {"sources": new_sources} else: service_delta['update_config']['sources'] = new_sources # Has the classification changed? if classification_changed: class_norm = Classification.normalize_classification( data['default_classification']) STORAGE.signature.update_by_query(query=f'source:"{data["name"]}"', operations=[("SET", "classification", class_norm)]) _reset_service_updates(service) # Save the signature return make_api_response( {"success": STORAGE.service_delta.save(service, service_delta)})
def download_signatures(**kwargs): """ Download signatures from the system. Variables: None Arguments: query => Query used to filter the signatures Default: All deployed signatures Data Block: None Result example: <A zip file containing all signatures files from the different sources> """ user = kwargs['user'] query = request.args.get('query', 'status:DEPLOYED') access = user['access_control'] last_modified = STORAGE.get_signature_last_modified() query_hash = sha256( f'{query}.{access}.{last_modified}'.encode('utf-8')).hexdigest() with forge.get_cachestore('al_ui.signature') as signature_cache: response = _get_cached_signatures(signature_cache, query_hash) if response: return response with Lock(f"al_signatures_{query_hash[:7]}.zip", 30): response = _get_cached_signatures(signature_cache, query_hash) if response: return response output_files = {} keys = [ k['id'] for k in STORAGE.signature.stream_search( query, fl="id", access_control=access, as_obj=False) ] signature_list = sorted(STORAGE.signature.multiget( keys, as_dictionary=False, as_obj=False), key=lambda x: x['order']) for sig in signature_list: out_fname = f"{sig['type']}/{sig['source']}" output_files.setdefault(out_fname, []) output_files[out_fname].append(sig['data']) output_zip = InMemoryZip() for fname, data in output_files.items(): output_zip.append(fname, "\n\n".join(data)) rule_file_bin = output_zip.read() signature_cache.save(query_hash, rule_file_bin, ttl=DEFAULT_CACHE_TTL) return make_file_response(rule_file_bin, f"al_signatures_{query_hash[:7]}.zip", len(rule_file_bin), content_type="application/zip")
def delete_signature_source(service, name, **_): """ Delete a signature source by name for a given service Variables: service => Service to which we want to delete the source from name => Name of the source you want to remove Arguments: None Data Block: None Result example: { "source": True, # if deleting the source succeeded or not "signatures": False # if deleting associated signatures deleted or not } """ service_data = STORAGE.get_service_with_delta(service, as_obj=False) current_sources = service_data.get('update_config', {}).get('sources', []) if not service_data.get('update_config', {}).get('generates_signatures', False): return make_api_response( {"success": False}, err="This service does not generate alerts therefor " "you cannot delete one of its sources.", status_code=400) new_sources = [] found = False for source in current_sources: if name == source['name']: found = True else: new_sources.append(source) if not found: return make_api_response( {"success": False}, err=f"Could not found source '{name}' in service {service}.", status_code=404) service_delta = STORAGE.service_delta.get(service, as_obj=False) if service_delta.get('update_config') is None: service_delta['update_config'] = {"sources": new_sources} else: service_delta['update_config']['sources'] = new_sources # Save the new sources success = STORAGE.service_delta.save(service, service_delta) if success: # Remove old source signatures STORAGE.signature.delete_matching( f'type:"{service.lower()}" AND source:"{name}"') _reset_service_updates(service) return make_api_response({"success": success})
def add_signature_source(service, **_): """ Add a signature source for a given service Variables: service => Service to which we want to add the source to Arguments: None Data Block: { "uri": "http://somesite/file_to_get", # URI to fetch for parsing the rules "name": "signature_file.yar", # Name of the file we will parse the rules as "username": null, # Username used to get to the URI "password": null, # Password used to get to the URI "header": { # Header sent during the request to the URI "X_TOKEN": "SOME RANDOM TOKEN" # Exemple of header }, "private_key": null, # Private key used to get to the URI "pattern": "^*.yar$" # Regex pattern use to get appropriate files from the URI } Result example: {"success": True/False} # if the operation succeeded of not """ try: data = request.json except (ValueError, KeyError): return make_api_response({"success": False}, err="Invalid source object data", status_code=400) # Ensure data source doesn't have spaces in name data['name'] = data['name'].replace(" ", "_") # Ensure private_key (if any) ends with a \n if data.get('private_key', None) and not data['private_key'].endswith("\n"): data['private_key'] += "\n" service_data = STORAGE.get_service_with_delta(service, as_obj=False) if not service_data.get('update_config', {}).get('generates_signatures', False): return make_api_response( {"success": False}, err="This service does not generate alerts therefor " "you cannot add a source to get the alerts from.", status_code=400) current_sources = service_data.get('update_config', {}).get('sources', []) for source in current_sources: if source['name'] == data['name']: return make_api_response( {"success": False}, err=f"Update source name already exist: {data['name']}", status_code=400) current_sources.append(data) service_delta = STORAGE.service_delta.get(service, as_obj=False) if service_delta.get('update_config') is None: service_delta['update_config'] = {"sources": current_sources} else: service_delta['update_config']['sources'] = current_sources _reset_service_updates(service) # Save the signature return make_api_response( {"success": STORAGE.service_delta.save(service, service_delta)})
def get_file_submission_results(sid, sha256, **kwargs): """ Get the all the results and errors of a specific file for a specific Submission ID Variables: sid => Submission ID to get the result for sha256 => Resource locator to get the result for Arguments (POST only): extra_result_keys => List of extra result keys to get extra_error_keys => List of extra error keys to get Data Block: None Result example: {"errors": [], # List of error blocks "file_info": {}, # File information block (md5, ...) "results": [], # List of result blocks "tags": [] } # List of generated tags """ user = kwargs['user'] # Check if submission exist data = STORAGE.submission.get(sid, as_obj=False) if data is None: return make_api_response("", "Submission ID %s does not exists." % sid, 404) if data and user and Classification.is_accessible(user['classification'], data['classification']): # Prepare output output = { "file_info": {}, "results": [], "tags": {}, "errors": [], "attack_matrix": {}, 'heuristics': {}, "signatures": set() } # Extra keys - This is a live mode optimisation res_keys = data.get("results", []) err_keys = data.get("errors", []) if request.method == "POST" and request.json is not None and data[ 'state'] != "completed": extra_rkeys = request.json.get("extra_result_keys", []) extra_ekeys = request.json.get("extra_error_keys", []) # Load keys res_keys.extend(extra_rkeys) err_keys.extend(extra_ekeys) res_keys = list(set(res_keys)) err_keys = list(set(err_keys)) # Get File, results and errors temp_file = STORAGE.file.get(sha256, as_obj=False) if not temp_file: output['file_info']['sha256'] = sha256 output['signatures'] = list(output['signatures']) output['missing'] = True return make_api_response( output, "The file you are trying to view is missing from the system", 404) if not Classification.is_accessible(user['classification'], temp_file['classification']): return make_api_response( "", "You are not allowed to view the data of this file", 403) output['file_info'] = temp_file max_c12n = output['file_info']['classification'] temp_results = list( STORAGE.get_multiple_results( [x for x in res_keys if x.startswith(sha256)], cl_engine=Classification, as_obj=False).values()) results = [] for r in temp_results: r = format_result(user['classification'], r, temp_file['classification'], build_hierarchy=True) if r: max_c12n = Classification.max_classification( max_c12n, r['classification']) results.append(r) output['results'] = results try: output['errors'] = STORAGE.error.multiget( [x for x in err_keys if x.startswith(sha256)], as_obj=False, as_dictionary=False) except MultiKeyError as e: LOGGER.warning( f"Trying to get multiple errors but some are missing: {str(e.keys)}" ) output['errors'] = e.partial_output output['metadata'] = STORAGE.get_file_submission_meta( sha256, config.ui.statistics.submission, user["access_control"]) for res in output['results']: for sec in res['result']['sections']: h_type = "info" if sec.get('heuristic', False): # Get the heuristics data if sec['heuristic']['score'] < 100: h_type = "info" elif sec['heuristic']['score'] < 1000: h_type = "suspicious" else: h_type = "malicious" item = (sec['heuristic']['heur_id'], sec['heuristic']['name']) output['heuristics'].setdefault(h_type, []) if item not in output['heuristics'][h_type]: output['heuristics'][h_type].append(item) # Process Attack matrix for attack in sec['heuristic'].get('attack', []): attack_id = attack['attack_id'] for cat in attack['categories']: output['attack_matrix'].setdefault(cat, []) item = (attack_id, attack['pattern'], h_type) if item not in output['attack_matrix'][cat]: output['attack_matrix'][cat].append(item) # Process Signatures for signature in sec['heuristic'].get('signature', []): sig = (signature['name'], h_type) if sig not in output['signatures']: output['signatures'].add(sig) # Process tags for t in sec['tags']: output["tags"].setdefault(t['type'], {}) current_htype = output["tags"][t['type']].get( t['value'], None) if not current_htype: output["tags"][t['type']][t['value']] = h_type else: if current_htype == 'malicious' or h_type == 'malicious': output["tags"][t['type']][t['value']] = 'malicious' elif current_htype == 'suspicious' or h_type == 'suspicious': output["tags"][t['type']][ t['value']] = 'suspicious' else: output["tags"][t['type']][t['value']] = 'info' for t_type in output["tags"]: output["tags"][t_type] = [ (k, v) for k, v in output['tags'][t_type].items() ] output['signatures'] = list(output['signatures']) output['file_info']['classification'] = max_c12n return make_api_response(output) else: return make_api_response( "", "You are not allowed to view the data of this submission", 403)
def set_service(servicename, **_): """ Calculate the delta between the original service config and the posted service config then saves that delta as the current service delta. Variables: servicename => Name of the service to save Arguments: None Data Block: {'accepts': '(archive|executable|java|android)/.*', 'category': 'Extraction', 'classpath': 'al_services.alsvc_extract.Extract', 'config': {'DEFAULT_PW_LIST': ['password', 'infected']}, 'cpu_cores': 0.1, 'description': "Extract some stuff", 'enabled': True, 'name': 'Extract', 'ram_mb': 256, 'rejects': 'empty|metadata/.*', 'stage': 'EXTRACT', 'submission_params': [{'default': u'', 'name': 'password', 'type': 'str', 'value': u''}, {'default': False, 'name': 'extract_pe_sections', 'type': 'bool', 'value': False}, {'default': False, 'name': 'continue_after_extract', 'type': 'bool', 'value': False}], 'timeout': 60} Result example: {"success": true } #Saving the user info succeded """ data = request.json version = data.get('version', None) if not version: return make_api_response( {"success": False}, "The service you are trying to modify does not exist", 404) current_default = STORAGE.service.get(f"{servicename}_{version}", as_obj=False) current_service = STORAGE.get_service_with_delta(servicename, as_obj=False) if not current_default: return make_api_response( {"success": False}, "The service you are trying to modify does not exist", 404) if 'name' in data and servicename != data['name']: return make_api_response({"success": False}, "You cannot change the service name", 400) if current_service['version'] != version: # On version change, reset all container versions data['docker_config']['image'] = current_default['docker_config'][ 'image'] for k, v in data['dependencies'].items(): if k in current_default['dependencies']: v['container']['image'] = current_default['dependencies'][k][ 'container']['image'] delta = get_recursive_delta(current_default, data, stop_keys=['config']) delta['version'] = version removed_sources = {} # Check sources, especially to remove old sources if delta.get("update_config", {}).get("sources", None) is not None: delta["update_config"]["sources"] = preprocess_sources( delta["update_config"]["sources"]) c_srcs = current_service.get('update_config', {}).get('sources', []) removed_sources = synchronize_sources( servicename, c_srcs, delta["update_config"]["sources"]) # Notify components watching for service config changes success = STORAGE.service_delta.save(servicename, delta) if success: event_sender.send(servicename, { 'operation': Operation.Modified, 'name': servicename }) return make_api_response({ "success": success, "removed_sources": removed_sources })
def ingest_single_file(**kwargs): """ Ingest a single file, sha256 or URL in the system Note 1: If you are submitting a sha256 or a URL, you must use the application/json encoding and one of sha256 or url parameters must be included in the data block. Note 2: If you are submitting a file directly, you have to use multipart/form-data encoding this was done to reduce the memory footprint and speedup file transfers ** Read documentation of mime multipart standard if your library does not support it** The multipart/form-data for sending binary has two parts: - The first part contains a JSON dump of the optional params and uses the name 'json' - The last part conatins the file binary, uses the name 'bin' and includes a filename Note 3: The ingest API uses the user's default settings to submit files to the system unless these settings are overridden in the 'params' field. Although, there are exceptions to that rule. Fields deep_scan, ignore_filtering, ignore_cache are resetted to False because the lead to dangerous behavior in the system. Variables: None Arguments: None Data Block (SHA256 or URL): { //REQUIRED VALUES: One of the following "sha256": "1234...CDEF" # SHA256 hash of the file "url": "http://...", # Url to fetch the file from //OPTIONAL VALUES "name": "file.exe", # Name of the file "metadata": { # Submission Metadata "key": val, # Key/Value pair for metadata parameters }, "params": { # Submission parameters "key": val, # Key/Value pair for params that differ from the user's defaults }, # DEFAULT: /api/v3/user/submission_params/<user>/ "generate_alert": False, # Generate an alert in our alerting system or not "notification_queue": None, # Name of the notification queue "notification_threshold": None, # Threshold for notification } Data Block (Binary): --0b34a3c50d3c02dd804a172329a0b2aa <-- Randomly generated boundary for this http request Content-Disposition: form-data; name="json" <-- JSON data blob part (only previous optional values valid) {"params": {"ignore_cache": true}, "generate_alert": true} --0b34a3c50d3c02dd804a172329a0b2aa <-- Switch to next part, file part Content-Disposition: form-data; name="bin"; filename="name_of_the_file_to_scan.bin" <BINARY DATA OF THE FILE TO SCAN... DOES NOT NEED TO BE ENCODDED> --0b34a3c50d3c02dd804a172329a0b2aa-- <-- End of HTTP transmission Result example: { "ingest_id": <ID OF THE INGESTED FILE> } """ user = kwargs['user'] out_dir = os.path.join(TEMP_SUBMIT_DIR, get_random_id()) extracted_path = original_file = None try: # Get data block and binary blob if 'multipart/form-data' in request.content_type: if 'json' in request.values: data = json.loads(request.values['json']) else: data = {} binary = request.files['bin'] name = data.get("name", binary.filename) sha256 = None url = None elif 'application/json' in request.content_type: data = request.json binary = None sha256 = data.get('sha256', None) url = data.get('url', None) name = data.get("name", None) or sha256 or os.path.basename(url) or None else: return make_api_response({}, "Invalid content type", 400) if not data: return make_api_response({}, "Missing data block", 400) # Get notification queue parameters notification_queue = data.get('notification_queue', None) notification_threshold = data.get('notification_threshold', None) if not isinstance(notification_threshold, int) and notification_threshold: return make_api_response( {}, "notification_threshold should be and int", 400) # Get file name if not name: return make_api_response({}, "Filename missing", 400) name = safe_str(os.path.basename(name)) if not name: return make_api_response({}, "Invalid filename", 400) try: os.makedirs(out_dir) except Exception: pass original_file = out_file = os.path.join(out_dir, name) # Prepare variables extra_meta = {} fileinfo = None do_upload = True al_meta = {} # Load default user params s_params = ui_to_submission_params(load_user_settings(user)) # Reset dangerous user settings to safe values s_params.update({ 'deep_scan': False, "priority": 150, "ignore_cache": False, "ignore_dynamic_recursion_prevention": False, "ignore_filtering": False, "type": "INGEST" }) # Apply provided params s_params.update(data.get("params", {})) # Load file if not binary: if sha256: fileinfo = STORAGE.file.get_if_exists( sha256, as_obj=False, archive_access=config.datastore.ilm.update_archive) if FILESTORE.exists(sha256): if fileinfo: if not Classification.is_accessible( user['classification'], fileinfo['classification']): return make_api_response( {}, "SHA256 does not exist in our datastore", 404) else: # File's classification must be applied at a minimum s_params[ 'classification'] = Classification.max_classification( s_params['classification'], fileinfo['classification']) else: # File is in storage and the DB no need to upload anymore do_upload = False # File exists in the filestore and the user has appropriate file access FILESTORE.download(sha256, out_file) else: return make_api_response( {}, "SHA256 does not exist in our datastore", 404) else: if url: if not config.ui.allow_url_submissions: return make_api_response( {}, "URL submissions are disabled in this system", 400) try: safe_download(url, out_file) extra_meta['submitted_url'] = url except FileTooBigException: return make_api_response({}, "File too big to be scanned.", 400) except InvalidUrlException: return make_api_response({}, "Url provided is invalid.", 400) except ForbiddenLocation: return make_api_response( {}, "Hostname in this URL cannot be resolved.", 400) else: return make_api_response( {}, "Missing file to scan. No binary, sha256 or url provided.", 400) else: binary.save(out_file) if do_upload and os.path.getsize(out_file) == 0: return make_api_response({}, err="File empty. Ingestion failed", status_code=400) # Apply group params if not specified if 'groups' not in s_params: s_params['groups'] = user['groups'] # Get generate alert parameter generate_alert = data.get('generate_alert', s_params.get('generate_alert', False)) if not isinstance(generate_alert, bool): return make_api_response({}, "generate_alert should be a boolean", 400) # Override final parameters s_params.update({ 'generate_alert': generate_alert, 'max_extracted': config.core.ingester.default_max_extracted, 'max_supplementary': config.core.ingester.default_max_supplementary, 'priority': min(s_params.get("priority", 150), config.ui.ingest_max_priority), 'submitter': user['uname'] }) # Enforce maximum DTL if config.submission.max_dtl > 0: s_params['ttl'] = min(int( s_params['ttl']), config.submission.max_dtl) if int( s_params['ttl']) else config.submission.max_dtl # No need to re-calculate fileinfo if we have it already if not fileinfo: # Calculate file digest fileinfo = IDENTIFY.fileinfo(out_file) # Validate file size if fileinfo['size'] > MAX_SIZE and not s_params.get( 'ignore_size', False): msg = f"File too large ({fileinfo['size']} > {MAX_SIZE}). Ingestion failed" return make_api_response({}, err=msg, status_code=413) elif fileinfo['size'] == 0: return make_api_response({}, err="File empty. Ingestion failed", status_code=400) # Decode cart if needed extracted_path, fileinfo, al_meta = decode_file( out_file, fileinfo, IDENTIFY) if extracted_path: out_file = extracted_path # Alter filename and classification based on CaRT output meta_classification = al_meta.pop('classification', s_params['classification']) if meta_classification != s_params['classification']: try: s_params['classification'] = Classification.max_classification( meta_classification, s_params['classification']) except InvalidClassification as ic: return make_api_response( {}, "The classification found inside the cart file cannot be merged with " f"the classification the file was submitted as: {str(ic)}", 400) name = al_meta.pop('name', name) # Validate ingest classification if not Classification.is_accessible(user['classification'], s_params['classification']): return make_api_response( {}, "You cannot start a submission with higher " "classification then you're allowed to see", 400) # Freshen file object expiry = now_as_iso(s_params['ttl'] * 24 * 60 * 60) if s_params.get('ttl', None) else None STORAGE.save_or_freshen_file(fileinfo['sha256'], fileinfo, expiry, s_params['classification']) # Save the file to the filestore if needs be # also no need to test if exist before upload because it already does that if do_upload: FILESTORE.upload(out_file, fileinfo['sha256'], location='far') # Setup notification queue if needed if notification_queue: notification_params = { "queue": notification_queue, "threshold": notification_threshold } else: notification_params = {} # Load metadata, setup some default values if they are missing and append the cart metadata ingest_id = get_random_id() metadata = flatten(data.get("metadata", {})) metadata['ingest_id'] = ingest_id metadata['type'] = s_params['type'] metadata.update(al_meta) if 'ts' not in metadata: metadata['ts'] = now_as_iso() metadata.update(extra_meta) # Set description if it does not exists s_params['description'] = s_params[ 'description'] or f"[{s_params['type']}] Inspection of file: {name}" # Create submission object try: submission_obj = Submission({ "sid": ingest_id, "files": [{ 'name': name, 'sha256': fileinfo['sha256'], 'size': fileinfo['size'] }], "notification": notification_params, "metadata": metadata, "params": s_params }) except (ValueError, KeyError) as e: return make_api_response({}, err=str(e), status_code=400) # Send submission object for processing ingest.push(submission_obj.as_primitives()) submission_received(submission_obj) return make_api_response({"ingest_id": ingest_id}) finally: # Cleanup files on disk try: if original_file and os.path.exists(original_file): os.unlink(original_file) except Exception: pass try: if extracted_path and os.path.exists(extracted_path): os.unlink(extracted_path) except Exception: pass try: if os.path.exists(out_dir): shutil.rmtree(out_dir, ignore_errors=True) except Exception: pass
def get_system_configuration(**_): """ Return the current system configuration: * Max file size * Max number of embedded files * Extraction's max depth * and many others... Variables: None Arguments: None Data Block: None Result example: { "<CONFIGURATION_ITEM>": <CONFIGURATION_VALUE> } """ def get_config_item(parent, cur_item): if "." in cur_item: key, remainder = cur_item.split(".", 1) return get_config_item(parent.get(key, {}), remainder) else: return parent.get(cur_item, None) cat_map = {} stg_map = {} for srv in STORAGE.list_all_services(as_obj=False): name = srv.get('name', None) cat = srv.get('category', None) if cat and name: temp_cat = cat_map.get(cat, []) temp_cat.append(name) cat_map[cat] = temp_cat stg = srv.get('stage', None) if stg and name: temp_stg = stg_map.get(stg, []) temp_stg.append(name) stg_map[stg] = temp_stg shareable_config_items = [ "core.ingester.default_max_extracted", "core.ingester.default_max_supplementary", "services.categories", "services.min_service_workers", "services.preferred_update_channel", "services.stages", "submission.default_max_extracted", "submission.default_max_supplementary", "submission.dtl", "submission.max_extraction_depth", "submission.max_file_size", "submission.max_metadata_length", "submission.tag_types.attribution", "submission.tag_types.behavior", "submission.tag_types.ioc", "ui.allow_raw_downloads", "ui.audit", "ui.download_encoding", "ui.enforce_quota", "ui.ingest_max_priority" ] out = {} config_dict = config.as_primitives() for item in shareable_config_items: out[item] = get_config_item(config_dict, item) out["services.categories"] = [[x, cat_map.get(x, [])] for x in out.get("services.categories", None) ] out["services.stages"] = [[x, stg_map.get(x, [])] for x in out.get("services.stages", None)] return make_api_response(out)
def get_systems_constants(**_): """ Return the current system configuration constants which includes: * Priorities * File types * Service tag types * Service tag contexts Variables: None Arguments: None Data Block: None Result example: { "priorities": {}, "file_types": [], "tag_types": [], "tag_contexts": [] } """ accepts_map = {} rejects_map = {} default_list = [] recognized_types = set(IDENTIFY.trusted_mimes.values()) recognized_types = recognized_types.union( set([x['al_type'] for x in IDENTIFY.magic_patterns])) with open(IDENTIFY.magic_file.split(":")[0]) as fh: for values in magic_custom.findall(fh.read()): recognized_types.add(values) with open(IDENTIFY.yara_file) as fh: for values in yara_custom.findall(fh.read()): recognized_types.add(values) for srv in STORAGE.list_all_services(as_obj=False): name = srv.get('name', None) if name: accept = srv.get('accepts', DEFAULT_SERVICE_ACCEPTS) reject = srv.get('rejects', DEFAULT_SERVICE_REJECTS) if accept == DEFAULT_SERVICE_ACCEPTS and reject == DEFAULT_SERVICE_REJECTS: default_list.append(name) else: accepts_map[name] = re.compile(accept) rejects_map[name] = re.compile(reject) out = { "max_priority": constants.MAX_PRIORITY, "priorities": constants.PRIORITIES, "file_types": [[ t, sorted([ x for x in accepts_map.keys() if re.match(accepts_map[x], t) and not re.match(rejects_map[x], t) ]) ] for t in sorted(list(recognized_types))], "tag_types": sorted(list(Tagging.flat_fields().keys())) } out['file_types'].insert(0, ["*", default_list]) return make_api_response(out)
def get_report(submission_id, **kwargs): """ Create a report for a submission based on its ID. Variables: submission_id -> ID of the submission to create the report for Arguments: None Data Block: None Result example: { <THE REPORT> } """ user = kwargs['user'] submission = STORAGE.submission.get(submission_id, as_obj=False) if submission is None: return make_api_response( "", "Submission ID %s does not exists." % submission_id, 404) submission['important_files'] = set() submission['report_filtered'] = False if user and Classification.is_accessible(user['classification'], submission['classification']): if submission['state'] != 'completed': return make_api_response( "", f"It is too early to generate the report. " f"Submission ID {submission_id} is incomplete.", 425) tree = STORAGE.get_or_create_file_tree( submission, config.submission.max_extraction_depth, cl_engine=Classification, user_classification=user['classification']) submission['file_tree'] = tree['tree'] submission['classification'] = Classification.max_classification( submission['classification'], tree['classification']) if tree['filtered']: submission['report_filtered'] = True errors = submission.pop('errors', None) submission['params']['services']['errors'] = list( set([x.split('.')[1] for x in errors])) def recurse_get_names(data): output = {} for key, val in data.items(): output.setdefault(key, []) for res_name in val['name']: output[key].append(res_name) children = recurse_get_names(val['children']) for c_key, c_names in children.items(): output.setdefault(c_key, []) output[c_key].extend(c_names) return output name_map = recurse_get_names(tree['tree']) summary = get_or_create_summary(submission_id, submission.pop('results', []), user['classification']) tags = summary['tags'] attack_matrix = summary['attack_matrix'] heuristics = summary['heuristics'] submission['classification'] = Classification.max_classification( submission['classification'], summary['classification']) if summary['filtered']: submission['report_filtered'] = True if summary['partial']: submission['report_partial'] = True submission['attack_matrix'] = {} submission['heuristics'] = {} submission['tags'] = {} # Process attack matrix for item in attack_matrix: sha256 = item['key'][:64] for cat in item['categories']: submission['attack_matrix'].setdefault(cat, {}) submission['attack_matrix'][cat].setdefault( item['name'], { 'h_type': item['h_type'], 'files': [] }) for name in name_map.get(sha256, [sha256]): if (name, sha256) not in submission['attack_matrix'][cat][ item['name']]['files']: submission['attack_matrix'][cat][ item['name']]['files'].append((name, sha256)) submission['important_files'].add(sha256) # Process heuristics for h_type, items in heuristics.items(): submission['heuristics'].setdefault(h_type, {}) for item in items: sha256 = item['key'][:64] submission['heuristics'][h_type].setdefault(item['name'], []) for name in name_map.get(sha256, [sha256]): if (name, sha256) not in submission['heuristics'][h_type][ item['name']]: submission['heuristics'][h_type][item['name']].append( (name, sha256)) submission['important_files'].add(sha256) # Process tags for t in tags: summary_type = None if t["type"] in config.submission.tag_types.behavior: summary_type = 'behaviors' elif t["type"] in config.submission.tag_types.attribution: summary_type = 'attributions' elif t["type"] in config.submission.tag_types.ioc: summary_type = 'indicators_of_compromise' if t['value'] == "" or summary_type is None: continue sha256 = t["key"][:64] # Tags submission['tags'].setdefault(summary_type, {}) submission['tags'][summary_type].setdefault(t['type'], {}) submission['tags'][summary_type][t['type']].setdefault( t['value'], { 'h_type': t['h_type'], 'files': [] }) for name in name_map.get(sha256, [sha256]): if (name, sha256) not in submission['tags'][summary_type][ t['type']][t['value']]['files']: submission['tags'][summary_type][t['type']][ t['value']]['files'].append((name, sha256)) submission['important_files'].add(sha256) submitted_sha256 = submission['files'][0]['sha256'] submission["file_info"] = STORAGE.file.get(submitted_sha256, as_obj=False) if submitted_sha256 in submission['important_files']: submission['important_files'].remove(submitted_sha256) submission['important_files'] = list(submission['important_files']) return make_api_response(submission) else: return make_api_response( "", "You are not allowed to view the data of this submission", 403)
def get_full_results(sid, **kwargs): """ Get the full results for a given Submission ID. The difference between this and the get results API is that this one gets the actual values of the result and error keys instead of listing the keys. Variables: sid => Submission ID to get the full results for Arguments: None Data Block: None Result example: {"classification": "UNRESTRICTIED" # Access control for the submission "error_count": 0, # Number of errors in this submission "errors": [], # List of error blocks (see Get Service Error) "file_count": 4, # Number of files in this submission "files": [ # List of submitted files ["FNAME", "sha256"], ...], # Each file = List of name/sha256 "file_infos": { # Dictionary of fil info blocks "234...235": <<FILE_INFO>>, # File in block ...}, # Keyed by file's sha256 "file_tree": { # File tree of the submission "333...7a3": { # File tree item "children": {}, # Recursive children of file tree item "name": ["file.exe",...] # List of possible names for the file "score": 0 # Score of the file },, ...}, # Keyed by file's sha256 "missing_error_keys": [], # Errors that could not be fetched from the datastore "missing_result_keys": [], # Results that could not be fetched from the datastore "results": [], # List of Results Blocks (see Get Service Result) "services": { # Service Block "selected": ["mcafee"], # List of selected services "params": {}, # Service specific parameters "excluded": [] # List of excluded services }, "state": "completed", # State of the submission "submission": { # Submission Block "profile": true, # Should keep stats about execution? "description": "", # Submission description "ttl": 30, # Submission days to live "ignore_filtering": false, # Ignore filtering services? "priority": 1000, # Submission priority, higher = faster "ignore_cache": true, # Force reprocess even is result exist? "groups": ["group", ...], # List of groups with access "sid": "ab9...956", # Submission ID "submitter": "user", # Uname of the submitter "max_score": 1422, }, # Score of highest scoring file "times": { # Timing block "completed": "2014-...", # Completed time "submitted": "2014-..." # Submitted time } } """ max_retry = 10 def get_results(keys): out = {} res = {} retry = 0 while keys and retry < max_retry: if retry: time.sleep(2**(retry - 7)) res.update( STORAGE.get_multiple_results(keys, Classification, as_obj=False)) keys = [x for x in keys if x not in res] retry += 1 results = {} for k, v in res.items(): file_info = data['file_infos'].get(k[:64], None) if file_info: v = format_result(user['classification'], v, file_info['classification']) if v: results[k] = v out["results"] = results out["missing_result_keys"] = keys return out def get_errors(keys): out = {} err = {} missing = [] retry = 0 while keys and retry < max_retry: if retry: time.sleep(2**(retry - 7)) try: err.update(STORAGE.error.multiget(keys, as_obj=False)) except MultiKeyError as e: LOGGER.warning( f"Trying to get multiple errors but some are missing: {str(e.keys)}" ) err.update(e.partial_output) missing.extend(e.keys) keys = [x for x in keys if x not in err and x not in missing] retry += 1 out["errors"] = err out["missing_error_keys"] = keys + missing return out def get_file_infos(keys): infos = {} missing = [] retry = 0 while keys and retry < max_retry: if retry: time.sleep(2**(retry - 7)) try: infos.update(STORAGE.file.multiget(keys, as_obj=False)) except MultiKeyError as e: LOGGER.warning( f"Trying to get multiple files but some are missing: {str(e.keys)}" ) infos.update(e.partial_output) missing.extend(e.keys) keys = [x for x in keys if x not in infos and x not in missing] retry += 1 return infos, missing def recursive_flatten_tree(tree): sha256s = [] for key, val in tree.items(): sha256s.extend(recursive_flatten_tree(val.get('children', {}))) if key not in sha256s: sha256s.append(key) return list(set(sha256s)) user = kwargs['user'] data = STORAGE.submission.get(sid, as_obj=False) if data is None: return make_api_response("", "Submission ID %s does not exists." % sid, 404) if data and user and Classification.is_accessible(user['classification'], data['classification']): res_keys = data.get("results", []) err_keys = data.get("errors", []) data['file_tree'] = STORAGE.get_or_create_file_tree( data, config.submission.max_extraction_depth, cl_engine=Classification, user_classification=user['classification'])['tree'] data['file_infos'], data['missing_file_keys'] = get_file_infos( recursive_flatten_tree(data['file_tree'])) data.update(get_results(res_keys)) data.update(get_errors(err_keys)) for r in data['results'].values(): data['classification'] = Classification.max_classification( data['classification'], r['classification']) for f in data['file_infos'].values(): data['classification'] = Classification.max_classification( data['classification'], f['classification']) return make_api_response(data) else: return make_api_response( "", "You are not allowed to view the data of this submission", 403)