def get_results(keys): out = {} res = {} retry = 0 while keys and retry < max_retry: if retry: time.sleep(2**(retry - 7)) res.update( STORAGE.get_multiple_results(keys, Classification, as_obj=False)) keys = [x for x in keys if x not in res] retry += 1 results = {} for k, v in res.items(): file_info = data['file_infos'].get(k[:64], None) if file_info: v = format_result(user['classification'], v, file_info['classification']) if v: results[k] = v out["results"] = results out["missing_result_keys"] = keys return out
def get_file_results_for_service(sha256, service, **kwargs): """ Get the all the file results of a specific file and a specific query. Variables: sha256 => A resource locator for the file (SHA256) Arguments: all => if all argument is present, it will return all versions NOTE: Max to 100 results... Data Block: None API call example: /api/v4/file/result/123456...654321/service_name/ Result example: {"file_info": {}, # File info Block "results": {}} # Full result list for the service """ user = kwargs['user'] file_obj = STORAGE.file.get(sha256, as_obj=False) args = [("fl", "_yz_rk"), ("sort", "created desc")] if "all" in request.args: args.append(("rows", "100")) else: args.append(("rows", "1")) if not file_obj: return make_api_response([], "This file does not exists", 404) if user and Classification.is_accessible(user['classification'], file_obj['classification']): res = STORAGE.result.search(f"id:{sha256}.{service}*", sort="created desc", fl="id", rows=100 if "all" in request.args else 1, access_control=user["access_control"], as_obj=False) keys = [k["id"] for k in res['items']] results = [] for r in STORAGE.result.multiget(keys, as_dictionary=False, as_obj=False): result = format_result(user['classification'], r, file_obj['classification']) if result: results.append(result) return make_api_response({"file_info": file_obj, "results": results}) else: return make_api_response([], "You are not allowed to view this file", 403)
def get_file_results(sha256, **kwargs): """ Get the all the file results of a specific file. Variables: sha256 => A resource locator for the file (SHA256) Arguments: None Data Block: None API call example: /api/v4/file/result/123456...654321/ Result example: {"file_info": {}, # File info Block "results": {}, # Full result list "errors": {}, # Full error list "parents": {}, # List of possible parents "childrens": {}, # List of children files "tags": {}, # List tags generated "metadata": {}, # Metadata facets results "file_viewer_only": True } # UI switch to disable features """ user = kwargs['user'] file_obj = STORAGE.file.get(sha256, as_obj=False) if not file_obj: return make_api_response({}, "This file does not exists", 404) if user and Classification.is_accessible(user['classification'], file_obj['classification']): max_c12n = file_obj['classification'] output = { "file_info": file_obj, "results": [], "tags": {}, "attack_matrix": {}, 'heuristics': {}, "signatures": set() } with concurrent.futures.ThreadPoolExecutor(4) as executor: res_ac = executor.submit(list_file_active_keys, sha256, user["access_control"]) res_parents = executor.submit(list_file_parents, sha256, user["access_control"]) res_children = executor.submit(list_file_childrens, sha256, user["access_control"]) res_meta = executor.submit(STORAGE.get_file_submission_meta, sha256, config.ui.statistics.submission, user["access_control"]) active_keys, alternates = res_ac.result() output['parents'] = res_parents.result() output['childrens'] = res_children.result() output['metadata'] = res_meta.result() output['results'] = [] output['alternates'] = {} res = STORAGE.result.multiget(active_keys, as_dictionary=False, as_obj=False) for r in res: res = format_result(user['classification'], r, file_obj['classification'], build_hierarchy=True) if res: max_c12n = Classification.max_classification( max_c12n, res['classification']) output['results'].append(res) for i in alternates: if i['response']['service_name'] not in output["alternates"]: output["alternates"][i['response']['service_name']] = [] i['response']['service_version'] = i['id'].split(".", 3)[2].replace( "_", ".") output["alternates"][i['response']['service_name']].append(i) output['errors'] = [] output['file_viewer_only'] = True done_heuristics = set() for res in output['results']: sorted_sections = sorted(res.get('result', {}).get('sections', []), key=lambda i: i['heuristic']['score'] if i['heuristic'] is not None else 0, reverse=True) for sec in sorted_sections: h_type = "info" if sec.get('heuristic', False): # Get the heuristics data if sec['heuristic']['score'] < 0: h_type = "safe" elif sec['heuristic']['score'] < 300: h_type = "info" elif sec['heuristic']['score'] < 1000: h_type = "suspicious" else: h_type = "malicious" if sec['heuristic']['heur_id'] not in done_heuristics: item = (sec['heuristic']['heur_id'], sec['heuristic']['name']) output['heuristics'].setdefault(h_type, []) output['heuristics'][h_type].append(item) done_heuristics.add(sec['heuristic']['heur_id']) # Process Attack matrix for attack in sec['heuristic'].get('attack', []): attack_id = attack['attack_id'] for cat in attack['categories']: output['attack_matrix'].setdefault(cat, []) item = (attack_id, attack['pattern'], h_type) if item not in output['attack_matrix'][cat]: output['attack_matrix'][cat].append(item) # Process Signatures for signature in sec['heuristic'].get('signature', []): sig = (signature['name'], h_type, signature.get('safe', False)) if sig not in output['signatures']: output['signatures'].add(sig) # Process tags for t in sec['tags']: output["tags"].setdefault(t['type'], []) t_item = (t['value'], h_type, t['safelisted']) if t_item not in output["tags"][t['type']]: output["tags"][t['type']].append(t_item) output['signatures'] = list(output['signatures']) output['file_info']['classification'] = max_c12n return make_api_response(output) else: return make_api_response({}, "You are not allowed to view this file", 403)
def get_file_submission_results(sid, sha256, **kwargs): """ Get the all the results and errors of a specific file for a specific Submission ID Variables: sid => Submission ID to get the result for sha256 => Resource locator to get the result for Arguments (POST only): extra_result_keys => List of extra result keys to get extra_error_keys => List of extra error keys to get Data Block: None Result example: {"errors": [], # List of error blocks "file_info": {}, # File information block (md5, ...) "results": [], # List of result blocks "tags": [] } # List of generated tags """ user = kwargs['user'] # Check if submission exist data = STORAGE.submission.get(sid, as_obj=False) if data is None: return make_api_response("", "Submission ID %s does not exists." % sid, 404) if data and user and Classification.is_accessible(user['classification'], data['classification']): # Prepare output output = { "file_info": {}, "results": [], "tags": {}, "errors": [], "attack_matrix": {}, 'heuristics': {}, "signatures": set() } # Extra keys - This is a live mode optimisation res_keys = data.get("results", []) err_keys = data.get("errors", []) if request.method == "POST" and request.json is not None and data[ 'state'] != "completed": extra_rkeys = request.json.get("extra_result_keys", []) extra_ekeys = request.json.get("extra_error_keys", []) # Load keys res_keys.extend(extra_rkeys) err_keys.extend(extra_ekeys) res_keys = list(set(res_keys)) err_keys = list(set(err_keys)) # Get File, results and errors temp_file = STORAGE.file.get(sha256, as_obj=False) if not temp_file: output['file_info']['sha256'] = sha256 output['signatures'] = list(output['signatures']) output['missing'] = True return make_api_response( output, "The file you are trying to view is missing from the system", 404) if not Classification.is_accessible(user['classification'], temp_file['classification']): return make_api_response( "", "You are not allowed to view the data of this file", 403) output['file_info'] = temp_file max_c12n = output['file_info']['classification'] temp_results = list( STORAGE.get_multiple_results( [x for x in res_keys if x.startswith(sha256)], cl_engine=Classification, as_obj=False).values()) results = [] for r in temp_results: r = format_result(user['classification'], r, temp_file['classification'], build_hierarchy=True) if r: max_c12n = Classification.max_classification( max_c12n, r['classification']) results.append(r) output['results'] = results try: output['errors'] = STORAGE.error.multiget( [x for x in err_keys if x.startswith(sha256)], as_obj=False, as_dictionary=False) except MultiKeyError as e: LOGGER.warning( f"Trying to get multiple errors but some are missing: {str(e.keys)}" ) output['errors'] = e.partial_output output['metadata'] = STORAGE.get_file_submission_meta( sha256, config.ui.statistics.submission, user["access_control"]) for res in output['results']: for sec in res['result']['sections']: h_type = "info" if sec.get('heuristic', False): # Get the heuristics data if sec['heuristic']['score'] < 100: h_type = "info" elif sec['heuristic']['score'] < 1000: h_type = "suspicious" else: h_type = "malicious" item = (sec['heuristic']['heur_id'], sec['heuristic']['name']) output['heuristics'].setdefault(h_type, []) if item not in output['heuristics'][h_type]: output['heuristics'][h_type].append(item) # Process Attack matrix for attack in sec['heuristic'].get('attack', []): attack_id = attack['attack_id'] for cat in attack['categories']: output['attack_matrix'].setdefault(cat, []) item = (attack_id, attack['pattern'], h_type) if item not in output['attack_matrix'][cat]: output['attack_matrix'][cat].append(item) # Process Signatures for signature in sec['heuristic'].get('signature', []): sig = (signature['name'], h_type) if sig not in output['signatures']: output['signatures'].add(sig) # Process tags for t in sec['tags']: output["tags"].setdefault(t['type'], {}) current_htype = output["tags"][t['type']].get( t['value'], None) if not current_htype: output["tags"][t['type']][t['value']] = h_type else: if current_htype == 'malicious' or h_type == 'malicious': output["tags"][t['type']][t['value']] = 'malicious' elif current_htype == 'suspicious' or h_type == 'suspicious': output["tags"][t['type']][ t['value']] = 'suspicious' else: output["tags"][t['type']][t['value']] = 'info' for t_type in output["tags"]: output["tags"][t_type] = [ (k, v) for k, v in output['tags'][t_type].items() ] output['signatures'] = list(output['signatures']) output['file_info']['classification'] = max_c12n return make_api_response(output) else: return make_api_response( "", "You are not allowed to view the data of this submission", 403)
def get_multiple_service_results(**kwargs): """ Get multiple result and error keys at the same time Variables: None Arguments: None Data Block: {"error": [], #List of error keys to lookup "result": [] #List of result keys to lookup } Result example: {"error": {}, #Dictionary of error object matching the keys "result": {} #Dictionary of result object matching the keys } """ user = kwargs['user'] data = request.json try: errors = STORAGE.error.multiget(data.get('error', []), as_dictionary=True, as_obj=False) except MultiKeyError as e: LOGGER.warning( f"Trying to get multiple errors but some are missing: {str(e.keys)}" ) errors = e.partial_output results = STORAGE.get_multiple_results(data.get('result', []), CLASSIFICATION, as_obj=False) try: file_infos = STORAGE.file.multiget(list( set([x[:64] for x in results.keys()])), as_dictionary=True, as_obj=False) except MultiKeyError as e: LOGGER.warning( f"Trying to get multiple files but some are missing: {str(e.keys)}" ) file_infos = e.partial_output for r_key in list(results.keys()): r_value = format_result(user['classification'], results[r_key], file_infos.get(r_key[:64], {}).get( 'classification', CLASSIFICATION.UNRESTRICTED), build_hierarchy=True) if not r_value: del results[r_key] else: results[r_key] = r_value out = {"error": errors, "result": results} return make_api_response(out)
def get_service_result(cache_key, **kwargs): """ Get the result for a given service cache key. Variables: cache_key => Service result cache key as SHA256.ServiceName.ServiceVersion.Configuration Arguments: None Data Block: None Result example: {"response": { # Service Response "milestones": {}, # Timing object "supplementary": [], # Supplementary files "service_name": "Mcafee", # Service Name "message": "", # Service error message "extracted": [], # Extracted files "service_version": "v0"}, # Service Version "result": { # Result objects "score": 1302, # Total score for the file "sections": [{ # Result sections "body": "Text goes here", # Body of the section (TEXT) "classification": "", # Classification "links": [], # Links inside the section "title_text": "Title", # Title of the section "depth": 0, # Depth (for Display purposes) "score": 500, # Section's score "body_format": null, # Body format "subsections": [] # List of sub-sections }, ... ], "classification": "", # Maximum classification for service "tags": [{ # Generated Tags "usage": "IDENTIFICATION", # Tag usage "value": "Tag Value", # Tag value "type": "Tag Type", # Tag type "weight": 50, # Tag Weight "classification": "" # Tag Classification }, ...] } } """ user = kwargs['user'] data = STORAGE.get_single_result(cache_key, CLASSIFICATION, as_obj=False) if data is None: return make_api_response("", "Cache key %s does not exists." % cache_key, 404) cur_file = STORAGE.file.get(cache_key[:64], as_obj=False) data = format_result(user['classification'], data, cur_file['classification'], build_hierarchy=True) if not data: return make_api_response( "", "You are not allowed to view the results for this key", 403) return make_api_response(data)