def _merge_safe_hashes(new, old): try: # Check if hash types match if new['type'] != old['type']: raise InvalidSafehash( f"Safe hash type mismatch: {new['type']} != {old['type']}") # Use max classification old['classification'] = CLASSIFICATION.max_classification( old['classification'], new['classification']) # Update updated time old['updated'] = now_as_iso() # Update hashes old['hashes'].update(new['hashes']) # Update type specific info if old['type'] == 'file': old.setdefault('file', {}) new_names = new.get('file', {}).pop('name', []) if 'name' in old['file']: for name in new_names: if name not in old['file']['name']: old['file']['name'].append(name) elif new_names: old['file']['name'] = new_names old['file'].update(new.get('file', {})) elif old['type'] == 'tag': old['tag'] = new['tag'] # Merge sources src_map = {x['name']: x for x in new['sources']} if not src_map: raise InvalidSafehash("No valid source found") old_src_map = {x['name']: x for x in old['sources']} for name, src in src_map.items(): src_cl = src.get('classification', None) if src_cl: old['classification'] = CLASSIFICATION.max_classification( old['classification'], src_cl) if name not in old_src_map: old_src_map[name] = src else: old_src = old_src_map[name] if old_src['type'] != src['type']: raise InvalidSafehash( f"Source {name} has a type conflict: {old_src['type']} != {src['type']}" ) for reason in src['reason']: if reason not in old_src['reason']: old_src['reason'].append(reason) old['sources'] = old_src_map.values() return old except Exception as e: raise InvalidSafehash(f"Invalid data provided: {str(e)}")
def filter_sections(sections, user_classification, min_classification): max_classification = min_classification # Filtering section you do not have access to temp_sections = [ s for s in sections if CLASSIFICATION.is_accessible( user_classification, s['classification']) ] final_sections = [] for section in temp_sections: # TODO: Depth analysis should be done before returning sections try: # Recalculation max classification using the currently accessible sections section['classification'] = CLASSIFICATION.max_classification( section['classification'], min_classification) max_classification = CLASSIFICATION.max_classification( section['classification'], max_classification) except InvalidClassification: continue if section['body_format'] in [ "GRAPH_DATA", "URL", "JSON", "KEY_VALUE" ] and isinstance(section['body'], str): # Loading JSON formatted sections try: section['body'] = json.loads(section['body']) except ValueError: pass # Changing tags to a list section['tags'] = tag_dict_to_list(section['tags']) final_sections.append(section) # Telling the user a section was hidden if len(sections) != len(final_sections): hidden_section = dict( body= "One of the sections produced by the service has been removed because you do not have enough " "priviledges to see its results. \n\nContact system administrators for more information.", title_text="WARNING: Service sections have been sanitized", depth=0, classification=CLASSIFICATION.UNRESTRICTED, tags={}, heuristic=None, body_format="TEXT") final_sections.insert(0, hidden_section) return max_classification, final_sections
def format_result(user_classification, r, min_classification, build_hierarchy=False): if not CLASSIFICATION.is_accessible(user_classification, min_classification): return None # Drop sections user does not have access and set others to at least min classification max_classification, r['result']['sections'] = filter_sections( r['result']['sections'], user_classification, min_classification) # Drop supplementary and extracted files that the user does not have access to for ftype in ['supplementary', 'extracted']: r['response'][ftype] = [ x for x in r['response'][ftype] if CLASSIFICATION.is_accessible( user_classification, x['classification']) ] # Set result classification to at least min but no more then viewable result classification r['classification'] = CLASSIFICATION.max_classification( max_classification, min_classification) if build_hierarchy: try: section_hierarchy, _ = build_heirarchy_rec(r['result']['sections']) r['section_hierarchy'] = section_hierarchy['children'] except InvalidSectionList: LOGGER.warning( f"Could not generate section hierarchy for {r['response']['service_name']} " f"service. Will use old display method.") return r
def filter_sections(sections, user_classification, min_classification): max_classification = min_classification # Filtering section you do not have access to temp_sections = [ s for s in sections if CLASSIFICATION.is_accessible( user_classification, s['classification']) ] final_sections = [] for section in temp_sections: try: # Recalculation max classification using the currently accessible sections section['classification'] = CLASSIFICATION.max_classification( section['classification'], min_classification) max_classification = CLASSIFICATION.max_classification( section['classification'], max_classification) except InvalidClassification: continue final_sections.append(fix_section_data(section)) # Telling the user a section was hidden if len(sections) != len(final_sections): hidden_section = dict( body= "One of the sections produced by the service has been removed because you do not have enough " "priviledges to see its results. \n\nContact system administrators for more information.", title_text="WARNING: Service sections have been sanitized", depth=0, classification=CLASSIFICATION.UNRESTRICTED, tags=[], heuristic=None, body_format="TEXT") final_sections.insert(0, hidden_section) return max_classification, final_sections
def get_user_classification(self, group_dn_list): """ Extend the users classification information with the configured group information NB: This is not fully implemented at this point :param group_dn_list: list of DNs the user is member of :return: """ ret = CLASSIFICATION.UNRESTRICTED for group_dn in group_dn_list: if group_dn in self.classification_mappings: ret = CLASSIFICATION.max_classification( ret, self.classification_mappings[group_dn]) return ret
def get_file_results(sha256, **kwargs): """ Get the all the file results of a specific file. Variables: sha256 => A resource locator for the file (SHA256) Arguments: None Data Block: None API call example: /api/v4/file/result/123456...654321/ Result example: {"file_info": {}, # File info Block "results": {}, # Full result list "errors": {}, # Full error list "parents": {}, # List of possible parents "childrens": {}, # List of children files "tags": {}, # List tags generated "metadata": {}, # Metadata facets results "file_viewer_only": True } # UI switch to disable features """ user = kwargs['user'] file_obj = STORAGE.file.get(sha256, as_obj=False) if not file_obj: return make_api_response({}, "This file does not exists", 404) if user and Classification.is_accessible(user['classification'], file_obj['classification']): max_c12n = file_obj['classification'] output = { "file_info": file_obj, "results": [], "tags": {}, "attack_matrix": {}, 'heuristics': {}, "signatures": set() } with concurrent.futures.ThreadPoolExecutor(4) as executor: res_ac = executor.submit(list_file_active_keys, sha256, user["access_control"]) res_parents = executor.submit(list_file_parents, sha256, user["access_control"]) res_children = executor.submit(list_file_childrens, sha256, user["access_control"]) res_meta = executor.submit(STORAGE.get_file_submission_meta, sha256, config.ui.statistics.submission, user["access_control"]) active_keys, alternates = res_ac.result() output['parents'] = res_parents.result() output['childrens'] = res_children.result() output['metadata'] = res_meta.result() output['results'] = [] output['alternates'] = {} res = STORAGE.result.multiget(active_keys, as_dictionary=False, as_obj=False) for r in res: res = format_result(user['classification'], r, file_obj['classification'], build_hierarchy=True) if res: max_c12n = Classification.max_classification( max_c12n, res['classification']) output['results'].append(res) for i in alternates: if i['response']['service_name'] not in output["alternates"]: output["alternates"][i['response']['service_name']] = [] i['response']['service_version'] = i['id'].split(".", 3)[2].replace( "_", ".") output["alternates"][i['response']['service_name']].append(i) output['errors'] = [] output['file_viewer_only'] = True done_heuristics = set() for res in output['results']: sorted_sections = sorted(res.get('result', {}).get('sections', []), key=lambda i: i['heuristic']['score'] if i['heuristic'] is not None else 0, reverse=True) for sec in sorted_sections: h_type = "info" if sec.get('heuristic', False): # Get the heuristics data if sec['heuristic']['score'] < 0: h_type = "safe" elif sec['heuristic']['score'] < 300: h_type = "info" elif sec['heuristic']['score'] < 1000: h_type = "suspicious" else: h_type = "malicious" if sec['heuristic']['heur_id'] not in done_heuristics: item = (sec['heuristic']['heur_id'], sec['heuristic']['name']) output['heuristics'].setdefault(h_type, []) output['heuristics'][h_type].append(item) done_heuristics.add(sec['heuristic']['heur_id']) # Process Attack matrix for attack in sec['heuristic'].get('attack', []): attack_id = attack['attack_id'] for cat in attack['categories']: output['attack_matrix'].setdefault(cat, []) item = (attack_id, attack['pattern'], h_type) if item not in output['attack_matrix'][cat]: output['attack_matrix'][cat].append(item) # Process Signatures for signature in sec['heuristic'].get('signature', []): sig = (signature['name'], h_type, signature.get('safe', False)) if sig not in output['signatures']: output['signatures'].add(sig) # Process tags for t in sec['tags']: output["tags"].setdefault(t['type'], []) t_item = (t['value'], h_type, t['safelisted']) if t_item not in output["tags"][t['type']]: output["tags"][t['type']].append(t_item) output['signatures'] = list(output['signatures']) output['file_info']['classification'] = max_c12n return make_api_response(output) else: return make_api_response({}, "You are not allowed to view this file", 403)
def download_file(sha256, **kwargs): """ Download the file using the default encoding method. This api will force the browser in download mode. Variables: sha256 => A resource locator for the file (sha256) Arguments (optional): encoding => Type of encoding use for the resulting file name => Name of the file to download sid => Submission ID where the file is from Data Block: None API call example: /api/v4/file/download/123456...654321/ Result example: <THE FILE BINARY ENCODED IN SPECIFIED FORMAT> """ user = kwargs['user'] file_obj = STORAGE.file.get(sha256, as_obj=False) if not file_obj: return make_api_response({}, "The file was not found in the system.", 404) if user and Classification.is_accessible(user['classification'], file_obj['classification']): params = load_user_settings(user) name = request.args.get('name', sha256) or sha256 name = os.path.basename(name) name = safe_str(name) sid = request.args.get('sid', None) or None submission = {} file_metadata = {} if sid is not None: submission = STORAGE.submission.get(sid, as_obj=False) if submission is None: submission = {} if Classification.is_accessible(user['classification'], submission['classification']): file_metadata.update(unflatten(submission['metadata'])) if Classification.enforce: submission_classification = submission.get( 'classification', file_obj['classification']) file_metadata[ 'classification'] = Classification.max_classification( submission_classification, file_obj['classification']) encoding = request.args.get('encoding', params['download_encoding']) password = request.args.get('password', params['default_zip_password']) if encoding not in FILE_DOWNLOAD_ENCODINGS: return make_api_response( {}, f"{encoding.upper()} is not in the valid encoding types: {FILE_DOWNLOAD_ENCODINGS}", 403) if encoding == "raw" and not ALLOW_RAW_DOWNLOADS: return make_api_response( {}, "RAW file download has been disabled by administrators.", 403) if encoding == "zip": if not ALLOW_ZIP_DOWNLOADS: return make_api_response( {}, "PROTECTED file download has been disabled by administrators.", 403) elif not password: return make_api_response( {}, "No password given or retrieved from user's settings.", 403) download_dir = None target_path = None # Create a temporary download location if encoding == 'zip': download_dir = tempfile.mkdtemp() download_path = os.path.join(download_dir, name) else: _, download_path = tempfile.mkstemp() try: downloaded_from = FILESTORE.download(sha256, download_path) if not downloaded_from: return make_api_response( {}, "The file was not found in the system.", 404) # Encode file if encoding == 'raw': target_path = download_path elif encoding == 'zip': name += '.zip' target_path = os.path.join(download_dir, name) subprocess.run([ 'zip', '-j', '--password', password, target_path, download_path ], capture_output=True) else: target_path, name = encode_file(download_path, name, file_metadata) return stream_file_response(open(target_path, 'rb'), name, os.path.getsize(target_path)) finally: # Cleanup if target_path: if os.path.exists(target_path): os.unlink(target_path) if download_path: if os.path.exists(download_path): os.unlink(download_path) if download_dir: if os.path.exists(download_dir): os.rmdir(download_dir) else: return make_api_response({}, "You are not allowed to download this file.", 403)
def ingest_single_file(**kwargs): """ Ingest a single file, sha256 or URL in the system Note 1: If you are submitting a sha256 or a URL, you must use the application/json encoding and one of sha256 or url parameters must be included in the data block. Note 2: If you are submitting a file directly, you have to use multipart/form-data encoding this was done to reduce the memory footprint and speedup file transfers ** Read documentation of mime multipart standard if your library does not support it** The multipart/form-data for sending binary has two parts: - The first part contains a JSON dump of the optional params and uses the name 'json' - The last part conatins the file binary, uses the name 'bin' and includes a filename Note 3: The ingest API uses the user's default settings to submit files to the system unless these settings are overridden in the 'params' field. Although, there are exceptions to that rule. Fields deep_scan, ignore_filtering, ignore_cache are resetted to False because the lead to dangerous behavior in the system. Variables: None Arguments: None Data Block (SHA256 or URL): { //REQUIRED VALUES: One of the following "sha256": "1234...CDEF" # SHA256 hash of the file "url": "http://...", # Url to fetch the file from //OPTIONAL VALUES "name": "file.exe", # Name of the file "metadata": { # Submission Metadata "key": val, # Key/Value pair for metadata parameters }, "params": { # Submission parameters "key": val, # Key/Value pair for params that differ from the user's defaults }, # DEFAULT: /api/v3/user/submission_params/<user>/ "generate_alert": False, # Generate an alert in our alerting system or not "notification_queue": None, # Name of the notification queue "notification_threshold": None, # Threshold for notification } Data Block (Binary): --0b34a3c50d3c02dd804a172329a0b2aa <-- Randomly generated boundary for this http request Content-Disposition: form-data; name="json" <-- JSON data blob part (only previous optional values valid) {"params": {"ignore_cache": true}, "generate_alert": true} --0b34a3c50d3c02dd804a172329a0b2aa <-- Switch to next part, file part Content-Disposition: form-data; name="bin"; filename="name_of_the_file_to_scan.bin" <BINARY DATA OF THE FILE TO SCAN... DOES NOT NEED TO BE ENCODDED> --0b34a3c50d3c02dd804a172329a0b2aa-- <-- End of HTTP transmission Result example: { "ingest_id": <ID OF THE INGESTED FILE> } """ user = kwargs['user'] out_dir = os.path.join(TEMP_SUBMIT_DIR, get_random_id()) extracted_path = original_file = None try: # Get data block and binary blob if 'multipart/form-data' in request.content_type: if 'json' in request.values: data = json.loads(request.values['json']) else: data = {} binary = request.files['bin'] name = data.get("name", binary.filename) sha256 = None url = None elif 'application/json' in request.content_type: data = request.json binary = None sha256 = data.get('sha256', None) url = data.get('url', None) name = data.get("name", None) or sha256 or os.path.basename(url) or None else: return make_api_response({}, "Invalid content type", 400) if not data: return make_api_response({}, "Missing data block", 400) # Get notification queue parameters notification_queue = data.get('notification_queue', None) notification_threshold = data.get('notification_threshold', None) if not isinstance(notification_threshold, int) and notification_threshold: return make_api_response( {}, "notification_threshold should be and int", 400) # Get file name if not name: return make_api_response({}, "Filename missing", 400) name = safe_str(os.path.basename(name)) if not name: return make_api_response({}, "Invalid filename", 400) try: os.makedirs(out_dir) except Exception: pass original_file = out_file = os.path.join(out_dir, name) # Prepare variables extra_meta = {} fileinfo = None do_upload = True al_meta = {} # Load default user params s_params = ui_to_submission_params(load_user_settings(user)) # Reset dangerous user settings to safe values s_params.update({ 'deep_scan': False, "priority": 150, "ignore_cache": False, "ignore_dynamic_recursion_prevention": False, "ignore_filtering": False, "type": "INGEST" }) # Apply provided params s_params.update(data.get("params", {})) # Load file if not binary: if sha256: fileinfo = STORAGE.file.get_if_exists( sha256, as_obj=False, archive_access=config.datastore.ilm.update_archive) if FILESTORE.exists(sha256): if fileinfo: if not Classification.is_accessible( user['classification'], fileinfo['classification']): return make_api_response( {}, "SHA256 does not exist in our datastore", 404) else: # File's classification must be applied at a minimum s_params[ 'classification'] = Classification.max_classification( s_params['classification'], fileinfo['classification']) else: # File is in storage and the DB no need to upload anymore do_upload = False # File exists in the filestore and the user has appropriate file access FILESTORE.download(sha256, out_file) else: return make_api_response( {}, "SHA256 does not exist in our datastore", 404) else: if url: if not config.ui.allow_url_submissions: return make_api_response( {}, "URL submissions are disabled in this system", 400) try: safe_download(url, out_file) extra_meta['submitted_url'] = url except FileTooBigException: return make_api_response({}, "File too big to be scanned.", 400) except InvalidUrlException: return make_api_response({}, "Url provided is invalid.", 400) except ForbiddenLocation: return make_api_response( {}, "Hostname in this URL cannot be resolved.", 400) else: return make_api_response( {}, "Missing file to scan. No binary, sha256 or url provided.", 400) else: binary.save(out_file) if do_upload and os.path.getsize(out_file) == 0: return make_api_response({}, err="File empty. Ingestion failed", status_code=400) # Apply group params if not specified if 'groups' not in s_params: s_params['groups'] = user['groups'] # Get generate alert parameter generate_alert = data.get('generate_alert', s_params.get('generate_alert', False)) if not isinstance(generate_alert, bool): return make_api_response({}, "generate_alert should be a boolean", 400) # Override final parameters s_params.update({ 'generate_alert': generate_alert, 'max_extracted': config.core.ingester.default_max_extracted, 'max_supplementary': config.core.ingester.default_max_supplementary, 'priority': min(s_params.get("priority", 150), config.ui.ingest_max_priority), 'submitter': user['uname'] }) # Enforce maximum DTL if config.submission.max_dtl > 0: s_params['ttl'] = min(int( s_params['ttl']), config.submission.max_dtl) if int( s_params['ttl']) else config.submission.max_dtl # No need to re-calculate fileinfo if we have it already if not fileinfo: # Calculate file digest fileinfo = IDENTIFY.fileinfo(out_file) # Validate file size if fileinfo['size'] > MAX_SIZE and not s_params.get( 'ignore_size', False): msg = f"File too large ({fileinfo['size']} > {MAX_SIZE}). Ingestion failed" return make_api_response({}, err=msg, status_code=413) elif fileinfo['size'] == 0: return make_api_response({}, err="File empty. Ingestion failed", status_code=400) # Decode cart if needed extracted_path, fileinfo, al_meta = decode_file( out_file, fileinfo, IDENTIFY) if extracted_path: out_file = extracted_path # Alter filename and classification based on CaRT output meta_classification = al_meta.pop('classification', s_params['classification']) if meta_classification != s_params['classification']: try: s_params['classification'] = Classification.max_classification( meta_classification, s_params['classification']) except InvalidClassification as ic: return make_api_response( {}, "The classification found inside the cart file cannot be merged with " f"the classification the file was submitted as: {str(ic)}", 400) name = al_meta.pop('name', name) # Validate ingest classification if not Classification.is_accessible(user['classification'], s_params['classification']): return make_api_response( {}, "You cannot start a submission with higher " "classification then you're allowed to see", 400) # Freshen file object expiry = now_as_iso(s_params['ttl'] * 24 * 60 * 60) if s_params.get('ttl', None) else None STORAGE.save_or_freshen_file(fileinfo['sha256'], fileinfo, expiry, s_params['classification']) # Save the file to the filestore if needs be # also no need to test if exist before upload because it already does that if do_upload: FILESTORE.upload(out_file, fileinfo['sha256'], location='far') # Setup notification queue if needed if notification_queue: notification_params = { "queue": notification_queue, "threshold": notification_threshold } else: notification_params = {} # Load metadata, setup some default values if they are missing and append the cart metadata ingest_id = get_random_id() metadata = flatten(data.get("metadata", {})) metadata['ingest_id'] = ingest_id metadata['type'] = s_params['type'] metadata.update(al_meta) if 'ts' not in metadata: metadata['ts'] = now_as_iso() metadata.update(extra_meta) # Set description if it does not exists s_params['description'] = s_params[ 'description'] or f"[{s_params['type']}] Inspection of file: {name}" # Create submission object try: submission_obj = Submission({ "sid": ingest_id, "files": [{ 'name': name, 'sha256': fileinfo['sha256'], 'size': fileinfo['size'] }], "notification": notification_params, "metadata": metadata, "params": s_params }) except (ValueError, KeyError) as e: return make_api_response({}, err=str(e), status_code=400) # Send submission object for processing ingest.push(submission_obj.as_primitives()) submission_received(submission_obj) return make_api_response({"ingest_id": ingest_id}) finally: # Cleanup files on disk try: if original_file and os.path.exists(original_file): os.unlink(original_file) except Exception: pass try: if extracted_path and os.path.exists(extracted_path): os.unlink(extracted_path) except Exception: pass try: if os.path.exists(out_dir): shutil.rmtree(out_dir, ignore_errors=True) except Exception: pass
def submit(**kwargs): """ Submit a single file, sha256 or url for analysis Note 1: If you are submitting a sh256 or a URL, you must use the application/json encoding and one of sha256 or url parameters must be included in the data block. Note 2: If you are submitting a file directly, you have to use multipart/form-data encoding this was done to reduce the memory footprint and speedup file transfers ** Read documentation of mime multipart standard if your library does not support it** The multipart/form-data for sending binary has two parts: - The first part contains a JSON dump of the optional params and uses the name 'json' - The last part conatins the file binary, uses the name 'bin' and includes a filename Variables: None Arguments: None Data Block (SHA256 or URL): { // REQUIRED: One of the two following "sha256": "123...DEF", # SHA256 hash of the file already in the datastore "url": "http://...", # Url to fetch the file from // OPTIONAL VALUES "name": "file.exe", # Name of the file to scan otherwise the sha256 or base file of the url "metadata": { # Submission metadata "key": val, # Key/Value pair metadata values }, "params": { # Submission parameters "key": val, # Key/Value pair for params that different then defaults }, # Default params can be fetch at /api/v3/user/submission_params/<user>/ } Data Block (Binary): --0b34a3c50d3c02dd804a172329a0b2aa <-- Randomly generated boundary for this http request Content-Disposition: form-data; name="json" <-- JSON data blob part (only previous optional values valid) {"metadata": {"hello": "world"}} --0b34a3c50d3c02dd804a172329a0b2aa <-- Switch to next part, file part Content-Disposition: form-data; name="bin"; filename="name_of_the_file_to_scan.bin" <BINARY DATA OF THE FILE TO SCAN... DOES NOT NEED TO BE ENCODDED> --0b34a3c50d3c02dd804a172329a0b2aa-- <-- End of HTTP transmission Result example: <Submission message object as a json dictionary> """ user = kwargs['user'] out_dir = os.path.join(TEMP_SUBMIT_DIR, get_random_id()) quota_error = check_submission_quota(user) if quota_error: return make_api_response("", quota_error, 503) submit_result = None try: # Get data block and binary blob if 'multipart/form-data' in request.content_type: if 'json' in request.values: data = json.loads(request.values['json']) else: data = {} binary = request.files['bin'] name = data.get("name", binary.filename) sha256 = None url = None elif 'application/json' in request.content_type: data = request.json binary = None sha256 = data.get('sha256', None) url = data.get('url', None) name = data.get("name", None) or sha256 or os.path.basename(url) or None else: return make_api_response({}, "Invalid content type", 400) if data is None: return make_api_response({}, "Missing data block", 400) if not name: return make_api_response({}, "Filename missing", 400) name = safe_str(os.path.basename(name)) if not name: return make_api_response({}, "Invalid filename", 400) # Create task object if "ui_params" in data: s_params = ui_to_submission_params(data['ui_params']) else: s_params = ui_to_submission_params(load_user_settings(user)) s_params.update(data.get("params", {})) if 'groups' not in s_params: s_params['groups'] = user['groups'] s_params['quota_item'] = True s_params['submitter'] = user['uname'] if not s_params['description']: s_params['description'] = "Inspection of file: %s" % name # Enforce maximum DTL if config.submission.max_dtl > 0: s_params['ttl'] = min(int( s_params['ttl']), config.submission.max_dtl) if int( s_params['ttl']) else config.submission.max_dtl if not Classification.is_accessible(user['classification'], s_params['classification']): return make_api_response( {}, "You cannot start a scan with higher " "classification then you're allowed to see", 400) # Prepare the output directory try: os.makedirs(out_dir) except Exception: pass out_file = os.path.join(out_dir, name) # Get the output file extra_meta = {} if not binary: if sha256: fileinfo = STORAGE.file.get_if_exists( sha256, as_obj=False, archive_access=config.datastore.ilm.update_archive) if FILESTORE.exists(sha256): if fileinfo: if not Classification.is_accessible( user['classification'], fileinfo['classification']): return make_api_response( {}, "SHA256 does not exist in our datastore", 404) else: # File's classification must be applied at a minimum s_params[ 'classification'] = Classification.max_classification( s_params['classification'], fileinfo['classification']) # File exists in the filestore and the user has appropriate file access FILESTORE.download(sha256, out_file) else: return make_api_response( {}, "SHA256 does not exist in our datastore", 404) else: if url: if not config.ui.allow_url_submissions: return make_api_response( {}, "URL submissions are disabled in this system", 400) try: safe_download(url, out_file) extra_meta['submitted_url'] = url except FileTooBigException: return make_api_response({}, "File too big to be scanned.", 400) except InvalidUrlException: return make_api_response({}, "Url provided is invalid.", 400) except ForbiddenLocation: return make_api_response( {}, "Hostname in this URL cannot be resolved.", 400) else: return make_api_response( {}, "Missing file to scan. No binary, sha256 or url provided.", 400) else: with open(out_file, "wb") as my_file: my_file.write(binary.read()) try: metadata = flatten(data.get('metadata', {})) metadata.update(extra_meta) submission_obj = Submission({ "files": [], "metadata": metadata, "params": s_params }) except (ValueError, KeyError) as e: return make_api_response("", err=str(e), status_code=400) # Submit the task to the system try: submit_result = SubmissionClient(datastore=STORAGE, filestore=FILESTORE, config=config, identify=IDENTIFY)\ .submit(submission_obj, local_files=[out_file]) submission_received(submission_obj) except SubmissionException as e: return make_api_response("", err=str(e), status_code=400) return make_api_response(submit_result.as_primitives()) finally: if submit_result is None: decrement_submission_quota(user) try: # noinspection PyUnboundLocalVariable os.unlink(out_file) except Exception: pass try: shutil.rmtree(out_dir, ignore_errors=True) except Exception: pass
def get_report(submission_id, **kwargs): """ Create a report for a submission based on its ID. Variables: submission_id -> ID of the submission to create the report for Arguments: None Data Block: None Result example: { <THE REPORT> } """ user = kwargs['user'] submission = STORAGE.submission.get(submission_id, as_obj=False) if submission is None: return make_api_response("", "Submission ID %s does not exists." % submission_id, 404) submission['important_files'] = set() submission['report_filtered'] = False if user and Classification.is_accessible(user['classification'], submission['classification']): if submission['state'] != 'completed': return make_api_response("", f"It is too early to generate the report. " f"Submission ID {submission_id} is incomplete.", 425) tree = STORAGE.get_or_create_file_tree(submission, config.submission.max_extraction_depth, cl_engine=Classification, user_classification=user['classification']) submission['file_tree'] = tree['tree'] submission['classification'] = Classification.max_classification(submission['classification'], tree['classification']) if tree['filtered']: submission['report_filtered'] = True errors = submission.pop('errors', None) submission['params']['services']['errors'] = list(set([x.split('.')[1] for x in errors])) def recurse_get_names(data): output = {} for key, val in data.items(): output.setdefault(key, []) for res_name in val['name']: output[key].append(res_name) children = recurse_get_names(val['children']) for c_key, c_names in children.items(): output.setdefault(c_key, []) output[c_key].extend(c_names) return output name_map = recurse_get_names(tree['tree']) summary = get_or_create_summary(submission_id, submission.pop('results', []), user['classification'], submission['state'] == "completed") tags = [t for t in summary['tags'] if not t['safelisted']] attack_matrix = summary['attack_matrix'] heuristics = summary['heuristics'] submission['classification'] = Classification.max_classification(submission['classification'], summary['classification']) if summary['filtered']: submission['report_filtered'] = True if summary['partial']: submission['report_partial'] = True submission['heuristic_sections'] = cleanup_heuristic_sections(summary['heuristic_sections']) submission['heuristic_name_map'] = summary['heuristic_name_map'] submission['attack_matrix'] = {} submission['heuristics'] = {} submission['tags'] = {} # Process attack matrix for item in attack_matrix: sha256 = item['key'][:64] for cat in item['categories']: submission['attack_matrix'].setdefault(cat, {}) submission['attack_matrix'][cat].setdefault(item['name'], {'h_type': item['h_type'], 'files': []}) for name in name_map.get(sha256, [sha256]): if (name, sha256) not in submission['attack_matrix'][cat][item['name']]['files']: submission['attack_matrix'][cat][item['name']]['files'].append((name, sha256)) submission['important_files'].add(sha256) # Process heuristics for h_type, items in heuristics.items(): submission['heuristics'].setdefault(h_type, {}) for item in items: sha256 = item['key'][:64] submission['heuristics'][h_type].setdefault(item['name'], []) for name in name_map.get(sha256, [sha256]): if (name, sha256) not in submission['heuristics'][h_type][item['name']]: submission['heuristics'][h_type][item['name']].append((name, sha256)) submission['important_files'].add(sha256) # Process tags for t in tags: summary_type = None if t["type"] in config.submission.tag_types.behavior: summary_type = 'behaviors' elif t["type"] in config.submission.tag_types.attribution: summary_type = 'attributions' elif t["type"] in config.submission.tag_types.ioc: summary_type = 'indicators_of_compromise' if t['value'] == "" or summary_type is None: continue sha256 = t["key"][:64] # Tags submission['tags'].setdefault(summary_type, {}) submission['tags'][summary_type].setdefault(t['type'], {}) submission['tags'][summary_type][t['type']].setdefault(t['value'], {'h_type': t['h_type'], 'files': []}) if HEUR_RANK_MAP[submission['tags'][summary_type][t['type']][t['value']]['h_type']] < \ HEUR_RANK_MAP[t['h_type']]: submission['tags'][summary_type][t['type']][t['value']]['h_type'] = t['h_type'] for name in name_map.get(sha256, [sha256]): if (name, sha256) not in submission['tags'][summary_type][t['type']][t['value']]['files']: submission['tags'][summary_type][t['type']][t['value']]['files'].append((name, sha256)) submission['important_files'].add(sha256) submitted_sha256 = submission['files'][0]['sha256'] submission["file_info"] = STORAGE.file.get(submitted_sha256, as_obj=False) if submitted_sha256 in submission['important_files']: submission['important_files'].remove(submitted_sha256) submission['important_files'] = list(submission['important_files']) return make_api_response(submission) else: return make_api_response("", "You are not allowed to view the data of this submission", 403)
def get_file_submission_results(sid, sha256, **kwargs): """ Get the all the results and errors of a specific file for a specific Submission ID Variables: sid => Submission ID to get the result for sha256 => Resource locator to get the result for Arguments (POST only): extra_result_keys => List of extra result keys to get extra_error_keys => List of extra error keys to get Data Block: None Result example: {"errors": [], # List of error blocks "file_info": {}, # File information block (md5, ...) "results": [], # List of result blocks "tags": [] } # List of generated tags """ user = kwargs['user'] # Check if submission exist data = STORAGE.submission.get(sid, as_obj=False) if data is None: return make_api_response("", "Submission ID %s does not exists." % sid, 404) if data and user and Classification.is_accessible(user['classification'], data['classification']): # Prepare output output = { "file_info": {}, "results": [], "tags": {}, "errors": [], "attack_matrix": {}, 'heuristics': {}, "signatures": set() } # Extra keys - This is a live mode optimisation res_keys = data.get("results", []) err_keys = data.get("errors", []) if request.method == "POST" and data['state'] != "completed": try: req_data = request.json extra_rkeys = req_data.get("extra_result_keys", []) extra_ekeys = req_data.get("extra_error_keys", []) # Load keys res_keys.extend(extra_rkeys) err_keys.extend(extra_ekeys) except BadRequest: pass res_keys = list(set(res_keys)) err_keys = list(set(err_keys)) # Get File, results and errors temp_file = STORAGE.file.get(sha256, as_obj=False) if not temp_file: output['file_info']['sha256'] = sha256 output['signatures'] = list(output['signatures']) output['missing'] = True return make_api_response(output, "The file you are trying to view is missing from the system", 404) if not Classification.is_accessible(user['classification'], temp_file['classification']): return make_api_response("", "You are not allowed to view the data of this file", 403) output['file_info'] = temp_file max_c12n = output['file_info']['classification'] temp_results = list(STORAGE.get_multiple_results([x for x in res_keys if x.startswith(sha256)], cl_engine=Classification, as_obj=False).values()) results = [] for r in temp_results: r = format_result(user['classification'], r, temp_file['classification'], build_hierarchy=True) if r: max_c12n = Classification.max_classification(max_c12n, r['classification']) results.append(r) output['results'] = results try: output['errors'] = STORAGE.error.multiget([x for x in err_keys if x.startswith(sha256)], as_obj=False, as_dictionary=False) except MultiKeyError as e: LOGGER.warning(f"Trying to get multiple errors but some are missing: {str(e.keys)}") output['errors'] = e.partial_output output['metadata'] = STORAGE.get_file_submission_meta(sha256, config.ui.statistics.submission, user["access_control"]) done_heuristics = set() for res in output['results']: sorted_sections = sorted(res.get('result', {}).get('sections', []), key=lambda i: i['heuristic']['score'] if i['heuristic'] is not None else 0, reverse=True) for sec in sorted_sections: h_type = "info" if sec.get('heuristic', False): # Get the heuristics data if sec['heuristic']['score'] < 0: h_type = "safe" elif sec['heuristic']['score'] < 300: h_type = "info" elif sec['heuristic']['score'] < 1000: h_type = "suspicious" else: h_type = "malicious" if sec['heuristic']['heur_id'] not in done_heuristics: item = (sec['heuristic']['heur_id'], sec['heuristic']['name']) output['heuristics'].setdefault(h_type, []) output['heuristics'][h_type].append(item) done_heuristics.add(sec['heuristic']['heur_id']) # Process Attack matrix for attack in sec['heuristic'].get('attack', []): attack_id = attack['attack_id'] for cat in attack['categories']: output['attack_matrix'].setdefault(cat, []) item = (attack_id, attack['pattern'], h_type) if item not in output['attack_matrix'][cat]: output['attack_matrix'][cat].append(item) # Process Signatures for signature in sec['heuristic'].get('signature', []): sig = (signature['name'], h_type, signature.get('safe', False)) if sig not in output['signatures']: output['signatures'].add(sig) # Process tags for t in sec['tags']: output["tags"].setdefault(t['type'], {}) current_htype = output["tags"][t['type']].get(t['value'], None) if not current_htype: output["tags"][t['type']][t['value']] = (h_type, t['safelisted']) else: if current_htype == 'malicious' or h_type == 'malicious': output["tags"][t['type']][t['value']] = ('malicious', t['safelisted']) elif current_htype == 'suspicious' or h_type == 'suspicious': output["tags"][t['type']][t['value']] = ('suspicious', t['safelisted']) else: output["tags"][t['type']][t['value']] = ('info', t['safelisted']) for t_type in output["tags"]: output["tags"][t_type] = [(k, v[0], v[1]) for k, v in output['tags'][t_type].items()] output['signatures'] = list(output['signatures']) output['file_info']['classification'] = max_c12n return make_api_response(output) else: return make_api_response("", "You are not allowed to view the data of this submission", 403)
def get_full_results(sid, **kwargs): """ Get the full results for a given Submission ID. The difference between this and the get results API is that this one gets the actual values of the result and error keys instead of listing the keys. Variables: sid => Submission ID to get the full results for Arguments: None Data Block: None Result example: {"classification": "UNRESTRICTIED" # Access control for the submission "error_count": 0, # Number of errors in this submission "errors": [], # List of error blocks (see Get Service Error) "file_count": 4, # Number of files in this submission "files": [ # List of submitted files ["FNAME", "sha256"], ...], # Each file = List of name/sha256 "file_infos": { # Dictionary of fil info blocks "234...235": <<FILE_INFO>>, # File in block ...}, # Keyed by file's sha256 "file_tree": { # File tree of the submission "333...7a3": { # File tree item "children": {}, # Recursive children of file tree item "name": ["file.exe",...] # List of possible names for the file "score": 0 # Score of the file },, ...}, # Keyed by file's sha256 "missing_error_keys": [], # Errors that could not be fetched from the datastore "missing_result_keys": [], # Results that could not be fetched from the datastore "results": [], # List of Results Blocks (see Get Service Result) "services": { # Service Block "selected": ["mcafee"], # List of selected services "params": {}, # Service specific parameters "excluded": [] # List of excluded services }, "state": "completed", # State of the submission "submission": { # Submission Block "profile": true, # Should keep stats about execution? "description": "", # Submission description "ttl": 30, # Submission days to live "ignore_filtering": false, # Ignore filtering services? "priority": 1000, # Submission priority, higher = faster "ignore_cache": true, # Force reprocess even is result exist? "groups": ["group", ...], # List of groups with access "sid": "ab9...956", # Submission ID "submitter": "user", # Uname of the submitter "max_score": 1422, }, # Score of highest scoring file "times": { # Timing block "completed": "2014-...", # Completed time "submitted": "2014-..." # Submitted time } } """ max_retry = 10 def get_results(keys): out = {} res = {} retry = 0 while keys and retry < max_retry: if retry: time.sleep(2 ** (retry - 7)) res.update(STORAGE.get_multiple_results(keys, Classification, as_obj=False)) keys = [x for x in keys if x not in res] retry += 1 results = {} for k, v in res.items(): file_info = data['file_infos'].get(k[:64], None) if file_info: v = format_result(user['classification'], v, file_info['classification']) if v: results[k] = v out["results"] = results out["missing_result_keys"] = keys return out def get_errors(keys): out = {} err = {} missing = [] retry = 0 while keys and retry < max_retry: if retry: time.sleep(2 ** (retry - 7)) try: err.update(STORAGE.error.multiget(keys, as_obj=False)) except MultiKeyError as e: LOGGER.warning(f"Trying to get multiple errors but some are missing: {str(e.keys)}") err.update(e.partial_output) missing.extend(e.keys) keys = [x for x in keys if x not in err and x not in missing] retry += 1 out["errors"] = err out["missing_error_keys"] = keys + missing return out def get_file_infos(keys): infos = {} missing = [] retry = 0 while keys and retry < max_retry: if retry: time.sleep(2 ** (retry - 7)) try: infos.update(STORAGE.file.multiget(keys, as_obj=False)) except MultiKeyError as e: LOGGER.warning(f"Trying to get multiple files but some are missing: {str(e.keys)}") infos.update(e.partial_output) missing.extend(e.keys) keys = [x for x in keys if x not in infos and x not in missing] retry += 1 return infos, missing def recursive_flatten_tree(tree): sha256s = [] for key, val in tree.items(): sha256s.extend(recursive_flatten_tree(val.get('children', {}))) if key not in sha256s: sha256s.append(key) return list(set(sha256s)) user = kwargs['user'] data = STORAGE.submission.get(sid, as_obj=False) if data is None: return make_api_response("", "Submission ID %s does not exists." % sid, 404) if data and user and Classification.is_accessible(user['classification'], data['classification']): res_keys = data.get("results", []) err_keys = data.get("errors", []) data['file_tree'] = STORAGE.get_or_create_file_tree(data, config.submission.max_extraction_depth, cl_engine=Classification, user_classification=user['classification'])['tree'] data['file_infos'], data['missing_file_keys'] = get_file_infos(recursive_flatten_tree(data['file_tree'])) data.update(get_results(res_keys)) data.update(get_errors(err_keys)) for r in data['results'].values(): data['classification'] = Classification.max_classification(data['classification'], r['classification']) for f in data['file_infos'].values(): data['classification'] = Classification.max_classification(data['classification'], f['classification']) return make_api_response(data) else: return make_api_response("", "You are not allowed to view the data of this submission", 403)
def add_or_update_hash(**kwargs): """ Add a hash in the safelist if it does not exist or update its list of sources if it does Arguments: None Data Block: { "classification": "TLP:W", # Classification of the safe hash (Computed for the mix of sources) - Optional "enabled": true, # Is the safe hash enabled or not "file": { # Information about the file - Only used in file mode "name": ["file.txt"] # Possible names for the file "size": 12345, # Size of the file "type": "document/text"}, # Type of the file }, "hashes": { # Information about the safe hash - At least one hash required "md5": "123...321", # MD5 hash of the safe hash "sha1": "1234...4321", # SHA1 hash of the safe hash "sha256": "12345....54321", # SHA256 of the safe hash "sources": [ # List of sources for why the file is safelisted, dedupped on name - Required {"classification": "TLP:W", # Classification of the source (default: TLP:W) - Optional "name": "NSRL", # Name of external source or user who safelisted it - Required "reason": [ # List of reasons why the source is safelisted - Required "Found as test.txt on default windows 10 CD", "Found as install.txt on default windows XP CD" ], "type": "external"}, # Type or source (external or user) - Required {"classification": "TLP:W", "name": "admin", "reason": ["We've seen this file many times and it leads to False positives"], "type": "user"} ], "signature": { # Signature information - Only used in signature mode "name": "Avira.Eicar", # Name of signature }, "tag": { # Tag information - Only used in tag mode "type": "network.url", # Type of tag "value": "google.ca" # Value of the tag }, "type": "tag" # Type of safelist hash (tag or file) } Result example: { "success": true, # Was the hash successfully added "op": "add" # Was it added to the system or updated } """ # Load data data = request.json user = kwargs['user'] # Set defaults data.setdefault('classification', CLASSIFICATION.UNRESTRICTED) data.setdefault('hashes', {}) if data['type'] == 'tag': tag_data = data.get('tag', None) if tag_data is None or 'type' not in tag_data or 'value' not in tag_data: return make_api_response(None, "Tag data not found", 400) hashed_value = f"{tag_data['type']}: {tag_data['value']}".encode( 'utf8') data['hashes']['md5'] = hashlib.md5(hashed_value).hexdigest() data['hashes']['sha1'] = hashlib.sha1(hashed_value).hexdigest() data['hashes']['sha256'] = hashlib.sha256(hashed_value).hexdigest() data.pop('file', None) data.pop('signature', None) elif data['type'] == 'signature': sig_data = data.get('signature', None) if sig_data is None or 'name' not in sig_data: return make_api_response(None, "Signature data not found", 400) hashed_value = f"signature: {sig_data['name']}".encode('utf8') data['hashes']['md5'] = hashlib.md5(hashed_value).hexdigest() data['hashes']['sha1'] = hashlib.sha1(hashed_value).hexdigest() data['hashes']['sha256'] = hashlib.sha256(hashed_value).hexdigest() data.pop('tag', None) data.pop('file', None) elif data['type'] == 'file': data.pop('tag', None) data.pop('signature', None) data.setdefault('file', {}) data['added'] = data['updated'] = now_as_iso() # Find the best hash to use for the key qhash = data['hashes'].get( 'sha256', data['hashes'].get('sha1', data['hashes'].get('md5', None))) # Validate hash length if not qhash: return make_api_response(None, "No valid hash found", 400) # Validate sources src_map = {} for src in data['sources']: if src['type'] == 'user': if src['name'] != user['uname']: return make_api_response( {}, f"You cannot add a source for another user. {src['name']} != {user['uname']}", 400) else: if 'signature_importer' not in user['type']: return make_api_response( {}, "You do not have sufficient priviledges to add an external source.", 403) src_cl = src.get('classification', None) if src_cl: data['classification'] = CLASSIFICATION.max_classification( data['classification'], src_cl) src_map[src['name']] = src with Lock(f'add_or_update-safelist-{qhash}', 30): old = STORAGE.safelist.get_if_exists(qhash, as_obj=False) if old: try: # Save data to the DB STORAGE.safelist.save(qhash, _merge_safe_hashes(data, old)) return make_api_response({'success': True, "op": "update"}) except InvalidSafehash as e: return make_api_response({}, str(e), 400) else: try: data['sources'] = src_map.values() STORAGE.safelist.save(qhash, data) return make_api_response({'success': True, "op": "add"}) except Exception as e: return make_api_response({}, f"Invalid data provided: {str(e)}", 400)