def decode_file(original_path, fileinfo): extracted_path = None hdr = {} with open(original_path, 'rb') as original_file: if is_cart(original_file.read(256)): original_file.seek(0) extracted_fd, extracted_path = tempfile.mkstemp() extracted_file = os.fdopen(extracted_fd, 'wb') cart_extracted = False try: hdr, _ = unpack_stream(original_file, extracted_file) cart_extracted = True except Exception: extracted_path = None hdr = {} fileinfo['type'] = 'corrupted/cart' finally: extracted_file.close() if cart_extracted: fileinfo = identify.fileinfo(extracted_path) return extracted_path, fileinfo, hdr
def main(): if len(sys.argv) != 3: usage() exit(1) name = sys.argv[1] svc_class = class_by_name(name) if '.' in name else service_by_name(name) filename = sys.argv[2] if not os.path.isfile(filename): print 'Invalid input file: %s' % filename exit(3) fi = fileinfo(filename) sha256 = fi['sha256'] # The transport expects the filename to be the sha256. # Create a symlink if required. created_link = False if filename != sha256: try: if platform.system() == 'Windows': import shutil shutil.copyfile(filename, sha256) else: os.symlink(filename, sha256) except Exception as ex: #pylint: disable=W0703 print 'exception trying to link file: %s' % str(ex) created_link = True scan_file(svc_class, **fi) if created_link: os.unlink(sha256)
def create_service_task(sample): fileinfo_keys = ["magic", "md5", "mime", "sha1", "sha256", "size", "type"] return ServiceTask({ "sid": 1, "metadata": {}, "deep_scan": False, "service_name": "Not Important", "service_config": { "extract_body_text": False, "save_emlparser_output": False, }, "fileinfo": dict((k, v) for k, v in fileinfo(f"/tmp/{sample}").items() if k in fileinfo_keys), "filename": sample, "min_classification": "TLP:WHITE", "max_files": 501, "ttl": 3600, })
def find_scripts_and_exes(apktool_out_dir: str, result: Result): scripts = [] executables = [] apks = [] # We are gonna do the full apktool output dir here but in case we want to do less, # you can edit the test_path list test_paths = [apktool_out_dir] for path in test_paths: for root, _, files in os.walk(path): for f in files: if f.endswith(".smali"): continue cur_file = os.path.join(root, f) file_type = fileinfo(cur_file)['type'] if "code/sh" in file_type: scripts.append(cur_file.replace(apktool_out_dir, '')) elif "executable/linux" in file_type: executables.append(cur_file.replace(apktool_out_dir, '')) elif "android/apk" in file_type: executables.append(cur_file.replace(apktool_out_dir, '')) if scripts: res_script = ResultSection("Shell script(s) found inside APK", parent=result, heuristic=Heuristic(1)) for script in sorted(scripts)[:20]: res_script.add_line(script) if len(scripts) > 20: res_script.add_line(f"and {len(scripts) - 20} more...") if executables: res_exe = ResultSection("Executable(s) found inside APK", parent=result, heuristic=Heuristic(2)) for exe in sorted(executables)[:20]: res_exe.add_line(exe) if len(executables) > 20: res_exe.add_line(f"and {len(executables) - 20} more...") if apks: res_apk = ResultSection("Other APKs where found inside the APK", parent=result, heuristic=Heuristic(19)) for apk in sorted(apks)[:20]: res_apk.add_line(apk) if len(apks) > 20: res_apk.add_line(f"and {len(apks) - 20} more...")
def _ready_file(self, local_path: str, expiry, classification, cleanup, sha256=None, upload=False) -> Tuple[str, int, dict]: """Take a file from local storage and prepare it for submission. After this method finished the file will ONLY exist on the filestore, not locally. """ extracted_path = None try: # Analyze the file and make sure the file table is up to date fileinfo = identify.fileinfo(local_path) if fileinfo['size'] == 0: raise SubmissionException("File empty. Submission failed") if sha256 is not None and fileinfo['sha256'] != sha256: raise CorruptedFileStoreException(f"SHA256 mismatch between received and calculated " f"sha256. {sha256} != {fileinfo['sha256']}") # Check if there is an integrated decode process for this file # eg. files that are packaged, and the contained file (not the package # that local_path points to) should be passed into the system. extracted_path, fileinfo, al_meta = decode_file(local_path, fileinfo) al_meta['classification'] = al_meta.get('classification', classification) if extracted_path: local_path = extracted_path self.filestore.upload(local_path, fileinfo['sha256']) elif upload: self.filestore.upload(local_path, fileinfo['sha256']) self.datastore.save_or_freshen_file(fileinfo['sha256'], fileinfo, expiry, al_meta['classification'], redis=self.redis) return fileinfo['sha256'], fileinfo['size'], al_meta finally: # If we extracted anything delete it if extracted_path: if os.path.exists(extracted_path): os.unlink(extracted_path) # If we DIDN'T download anything, still delete it if local_path and cleanup: if os.path.exists(local_path): os.unlink(local_path)
def identify(cls, transport, storage, sha256, **kw): """ Identify a file. """ assert_valid_sha256(sha256) classification = kw['classification'] kw['ttl'] = ttl = effective_ttl(kw) kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl) # By the time identify is called, either the file was in our cache # and we freshed its ttl or the client has successfully transfered # the file to us. local_path = transport.local_path(sha256) if not local_path: path = kw.get("path", None) if path and os.path.exists(path): local_path = path if not transport.exists(sha256): log.warning('File specified is not on server: %s %s.', sha256, str(transport)) return None temporary_path = fileinfo = None try: if not local_path: temporary_path = tempfile.mktemp(prefix="submission.identify") transport.download(sha256, temporary_path) local_path = temporary_path fileinfo = identify.fileinfo(local_path) storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) finally: if temporary_path: try: os.unlink(temporary_path) except: # pylint: disable=W0702 pass return fileinfo
def ready_body(core, body=None): out = { 'salt': get_random_id(), } out.update(body or {}) out = json.dumps(out).encode() sha256 = hashlib.sha256() sha256.update(out) core.filestore.put(sha256.hexdigest(), out) with NamedTemporaryFile() as file: file.write(out) file.flush() fileinfo = identify.fileinfo(file.name) core.ds.save_or_freshen_file(sha256.hexdigest(), fileinfo, now_as_iso(500), 'U', redis=core.redis) return sha256.hexdigest(), len(out)
def decode_file(original_path, fileinfo): extracted_path = None original_name = None al_meta = {} if fileinfo['tag'] in NEUTERED_FORMAT: from cart import unpack_stream extracted_fd, extracted_path = tempfile.mkstemp() extracted_file = os.fdopen(extracted_fd, 'wb') original_file = open(original_path) hdr, _ = unpack_stream(original_file, extracted_file) original_name = hdr.get('name', os.path.basename(original_path)) al_meta = hdr.get("al", {}).get("meta", {}) extracted_file.close() fileinfo = identify.fileinfo(extracted_path) if original_name: fileinfo['path'] = original_name return extracted_path, original_name, fileinfo, al_meta
def test_identify(): # Setup test data aaaa = f"{'A' * 10000}".encode() sha256 = hashlib.sha256(aaaa).hexdigest() # Prep temp file _, input_path = tempfile.mkstemp() output_path = f"{input_path}.cart" try: # Write temp file with open(input_path, 'wb') as oh: oh.write(aaaa) # Create a cart file with open(output_path, 'wb') as oh: with open(input_path, 'rb') as ih: pack_stream(ih, oh, {'name': 'test_identify.a'}) # Validate the cart file created meta = get_metadata_only(output_path) assert meta.get("sha256", None) == sha256 # Validate identify file detection info = fileinfo(output_path) assert info.get("type", None) == "archive/cart" # Validate identify hashing output_sha256 = subprocess.check_output(['sha256sum', output_path])[:64].decode() assert info.get("sha256", None) == output_sha256 finally: # Cleanup output file if os.path.exists(output_path): os.unlink(output_path) # Cleanup input file if os.path.exists(input_path): os.unlink(input_path)
def try_run(self): try: self.service_class = load_module_by_path(SERVICE_PATH) except ValueError: raise except Exception: LOG.error("Could not find service in path. Check your environment variables.") raise self.load_service_manifest() if not os.path.isfile(FILE_PATH): LOG.info(f"File not found: {FILE_PATH}") return self.file_dir = os.path.dirname(FILE_PATH) # Get filename and working dir file_name = os.path.basename(FILE_PATH) working_dir = os.path.join(self.file_dir, f'{os.path.basename(FILE_PATH)}_{SERVICE_NAME.lower()}') # Start service self.service.start_service() # Identify the file file_info = identify.fileinfo(FILE_PATH) if file_info['type'] == "archive/cart": # This is a CART file, uncart it and recreate the file info object original_temp = os.path.join(tempfile.gettempdir(), file_info['sha256']) with open(FILE_PATH, 'rb') as ifile, open(original_temp, 'wb') as ofile: unpack_stream(ifile, ofile) file_info = identify.fileinfo(original_temp) target_file = os.path.join(tempfile.gettempdir(), file_info['sha256']) shutil.move(original_temp, target_file) LOG.info(f"File was a CaRT archive, it was un-CaRTed to {target_file} for processing") else: # It not a cart, move the file to the right place to be processed target_file = os.path.join(tempfile.gettempdir(), file_info['sha256']) shutil.copyfile(FILE_PATH, target_file) # Create service processing task service_task = ServiceTask(dict( sid=get_random_id(), metadata={}, service_name=SERVICE_NAME, service_config=self.submission_params, fileinfo=dict( magic=file_info['magic'], md5=file_info['md5'], mime=file_info['mime'], sha1=file_info['sha1'], sha256=file_info['sha256'], size=file_info['size'], type=file_info['type'], ), filename=file_name, min_classification=forge.get_classification().UNRESTRICTED, max_files=501, # TODO: get the actual value ttl=3600, )) LOG.info(f"Starting task with SID: {service_task.sid}") # Set the working directory to a directory with same parent as input file if os.path.isdir(working_dir): shutil.rmtree(working_dir) if not os.path.isdir(working_dir): os.makedirs(os.path.join(working_dir, 'working_directory')) self.service.handle_task(service_task) # Move the result.json and extracted/supplementary files to the working directory source = os.path.join(tempfile.gettempdir(), 'working_directory') if not os.path.exists(source): os.makedirs(source) files = os.listdir(source) for f in files: shutil.move(os.path.join(source, f), os.path.join(working_dir, 'working_directory')) # Cleanup files from the original directory created by the service base shutil.rmtree(source) result_json = os.path.join(tempfile.gettempdir(), f'{service_task.sid}_{service_task.fileinfo.sha256}_result.json') if not os.path.exists(result_json): raise Exception("A service error occured and no result json was found.") # Validate the generated result with open(result_json, 'r') as fh: try: result = json.load(fh) result.pop('temp_submission_data', None) for file in result['response']['extracted'] + result['response']['supplementary']: file.pop('path', None) # Load heuristics heuristics = get_heuristics() # Transform heuristics and calculate score total_score = 0 for section in result['result']['sections']: if section['heuristic']: heur_id = section['heuristic']['heur_id'] try: section['heuristic'] = service_heuristic_to_result_heuristic(section['heuristic'], heuristics) total_score += section['heuristic']['score'] except InvalidHeuristicException: section['heuristic'] = None section['heuristic']['name'] = heuristics[heur_id]['name'] result['result']['score'] = total_score # Add timestamps for creation, archive and expiry result['created'] = now_as_iso() result['archive_ts'] = now_as_iso(1 * 24 * 60 * 60) result['expiry_ts'] = now_as_iso(service_task.ttl * 24 * 60 * 60) result = Result(result) # Print the result on console if in debug mode if args.debug: f"{SERVICE_NAME.upper()}-RESULT".center(60, '-') for line in pprint.pformat(result.result.as_primitives()).split('\n'): LOG.debug(line) except Exception as e: LOG.error(f"Invalid result created: {str(e)}") LOG.info(f"Cleaning up file used for temporary processing: {target_file}") os.unlink(target_file) LOG.info(f"Moving {result_json} to the working directory: {working_dir}/result.json") shutil.move(result_json, os.path.join(working_dir, 'result.json')) LOG.info(f"Successfully completed task. Output directory: {working_dir}")
def ingest_single_file(**kwargs): """ Ingest a single file, sha256 or URL in the system Note 1: If you are submitting a sha256 or a URL, you must use the application/json encoding and one of sha256 or url parameters must be included in the data block. Note 2: If you are submitting a file directly, you have to use multipart/form-data encoding this was done to reduce the memory footprint and speedup file transfers ** Read documentation of mime multipart standard if your library does not support it** The multipart/form-data for sending binary has two parts: - The first part contains a JSON dump of the optional params and uses the name 'json' - The last part conatins the file binary, uses the name 'bin' and includes a filename Note 3: The ingest API uses the user's default settings to submit files to the system unless these settings are overridden in the 'params' field. Although, there are exceptions to that rule. Fields deep_scan, ignore_filtering, ignore_cache are resetted to False because the lead to dangerous behavior in the system. Variables: None Arguments: None Data Block (SHA256 or URL): { //REQUIRED VALUES: One of the following "sha256": "1234...CDEF" # SHA256 hash of the file "url": "http://...", # Url to fetch the file from //OPTIONAL VALUES "name": "file.exe", # Name of the file "metadata": { # Submission Metadata "key": val, # Key/Value pair for metadata parameters }, "params": { # Submission parameters "key": val, # Key/Value pair for params that differ from the user's defaults }, # DEFAULT: /api/v3/user/submission_params/<user>/ "generate_alert": False, # Generate an alert in our alerting system or not "notification_queue": None, # Name of the notification queue "notification_threshold": None, # Threshold for notification } Data Block (Binary): --0b34a3c50d3c02dd804a172329a0b2aa <-- Randomly generated boundary for this http request Content-Disposition: form-data; name="json" <-- JSON data blob part (only previous optional values valid) {"params": {"ignore_cache": true}, "generate_alert": true} --0b34a3c50d3c02dd804a172329a0b2aa <-- Switch to next part, file part Content-Disposition: form-data; name="bin"; filename="name_of_the_file_to_scan.bin" <BINARY DATA OF THE FILE TO SCAN... DOES NOT NEED TO BE ENCODDED> --0b34a3c50d3c02dd804a172329a0b2aa-- <-- End of HTTP transmission Result example: { "ingest_id": <ID OF THE INGESTED FILE> } """ user = kwargs['user'] out_dir = os.path.join(TEMP_SUBMIT_DIR, get_random_id()) extracted_path = original_file = None with forge.get_filestore() as f_transport: try: # Get data block and binary blob if 'multipart/form-data' in request.content_type: if 'json' in request.values: data = json.loads(request.values['json']) else: data = {} binary = request.files['bin'] name = data.get("name", binary.filename) sha256 = None url = None elif 'application/json' in request.content_type: data = request.json binary = None sha256 = data.get('sha256', None) url = data.get('url', None) name = data.get( "name", None) or sha256 or os.path.basename(url) or None else: return make_api_response({}, "Invalid content type", 400) if not data: return make_api_response({}, "Missing data block", 400) # Get notification queue parameters notification_queue = data.get('notification_queue', None) notification_threshold = data.get('notification_threshold', None) if not isinstance(notification_threshold, int) and notification_threshold: return make_api_response( {}, "notification_threshold should be and int", 400) # Get generate alert parameter generate_alert = data.get('generate_alert', False) if not isinstance(generate_alert, bool): return make_api_response({}, "generate_alert should be a boolean", 400) # Get file name if not name: return make_api_response({}, "Filename missing", 400) name = os.path.basename(name) if not name: return make_api_response({}, "Invalid filename", 400) try: os.makedirs(out_dir) except Exception: pass original_file = out_file = os.path.join(out_dir, name) # Load file extra_meta = {} if not binary: if sha256: if f_transport.exists(sha256): f_transport.download(sha256, out_file) else: return make_api_response( {}, "SHA256 does not exist in our datastore", 404) else: if url: if not config.ui.allow_url_submissions: return make_api_response( {}, "URL submissions are disabled in this system", 400) try: safe_download(url, out_file) extra_meta['submitted_url'] = url except FileTooBigException: return make_api_response( {}, "File too big to be scanned.", 400) except InvalidUrlException: return make_api_response( {}, "Url provided is invalid.", 400) except ForbiddenLocation: return make_api_response( {}, "Hostname in this URL cannot be resolved.", 400) else: return make_api_response( {}, "Missing file to scan. No binary, sha256 or url provided.", 400) else: with open(out_file, "wb") as my_file: my_file.write(binary.read()) # Load default user params s_params = ui_to_submission_params( STORAGE.user_settings.get(user['uname'], as_obj=False)) if not s_params: s_params = get_default_user_settings(user) # Reset dangerous user settings to safe values s_params.update({ 'deep_scan': False, "priority": 150, "ignore_cache": False, "ignore_dynamic_recursion_prevention": False, "ignore_filtering": False, "type": "INGEST" }) # Apply provided params s_params.update(data.get("params", {})) # Override final parameters s_params.update({ 'generate_alert': generate_alert, 'max_extracted': config.core.ingester.default_max_extracted, 'max_supplementary': config.core.ingester.default_max_supplementary, 'priority': min(s_params.get("priority", 150), config.ui.ingest_max_priority), 'submitter': user['uname'] }) # Calculate file digest fileinfo = identify.fileinfo(out_file) # Validate file size if fileinfo['size'] > MAX_SIZE and not s_params.get( 'ignore_size', False): msg = f"File too large ({fileinfo['size']} > {MAX_SIZE}). Ingestion failed" return make_api_response("", err=msg, status_code=400) elif fileinfo['size'] == 0: return make_api_response("", err="File empty. Ingestion failed", status_code=400) # Decode cart if needed extracted_path, fileinfo, al_meta = decode_file(out_file, fileinfo) if extracted_path: out_file = extracted_path # Save the file to the filestore if needs be sha256 = fileinfo['sha256'] if not f_transport.exists(sha256): f_transport.upload(out_file, sha256, location='far') # Freshen file object expiry = now_as_iso(s_params['ttl'] * 24 * 60 * 60) if s_params.get('ttl', None) else None STORAGE.save_or_freshen_file(fileinfo['sha256'], fileinfo, expiry, s_params['classification']) # Setup notification queue if needed if notification_queue: notification_params = { "queue": notification_queue, "threshold": notification_threshold } else: notification_params = {} # Load metadata, setup some default values if they are missing and append the cart metadata ingest_id = get_random_id() metadata = flatten(data.get("metadata", {})) metadata['ingest_id'] = ingest_id metadata['type'] = s_params['type'] name = al_meta.pop('name', name) metadata.update(al_meta) if 'ts' not in metadata: metadata['ts'] = now_as_iso() metadata.update(extra_meta) # Set description if it does not exists s_params['description'] = s_params[ 'description'] or f"[{s_params['type']}] Inspection of file: {name}" # Create submission object try: submission_obj = Submission({ "sid": ingest_id, "files": [{ 'name': name, 'sha256': sha256, 'size': fileinfo['size'] }], "notification": notification_params, "metadata": metadata, "params": s_params }) except (ValueError, KeyError) as e: return make_api_response("", err=str(e), status_code=400) # Send submission object for processing ingest.push(submission_obj.as_primitives()) return make_api_response({"ingest_id": ingest_id}) finally: # Cleanup files on disk try: if original_file and os.path.exists(original_file): os.unlink(original_file) except Exception: pass try: if extracted_path and os.path.exists(extracted_path): os.unlink(extracted_path) except Exception: pass try: if os.path.exists(out_dir): shutil.rmtree(out_dir, ignore_errors=True) except Exception: pass
def submit_multi(cls, storage, transport, files, **kw): """ Submit all files into one submission submit_multi can be used when all the files are already present in the file storage. files is an array of (name, sha256) tuples Any kw are passed to the Task created to dispatch this submission. """ sid = str(uuid.uuid4()) classification = kw['classification'] kw['max_extracted'] = max_extracted(kw) kw['max_supplementary'] = max_supplementary(kw) kw['ttl'] = ttl = effective_ttl(kw) kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl) submissions = [] temporary_path = None dispatch_request = None # Generate static fileinfo data for each file. for name, sha256 in files: local_path = transport.local_path(sha256) if not transport.exists(sha256): raise SubmissionException('File specified is not on server: %s %s.' % (sha256, str(transport))) try: if not local_path: temporary_path = tempfile.mktemp(prefix="submission.submit_multi") transport.download(sha256, temporary_path) local_path = temporary_path fileinfo = identify.fileinfo(local_path) storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) decode_file = forge.get_decode_file() massaged_path, new_name, fileinfo, al_meta = \ decode_file(local_path, fileinfo) if massaged_path: name = new_name local_path = massaged_path sha256 = fileinfo['sha256'] if not transport.exists(sha256): transport.put(local_path, sha256) storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) ignore_size = kw.get('ignore_size', False) max_size = config.submissions.max.size if fileinfo['size'] > max_size and not ignore_size: msg = "File too large (%d > %d). Submission failed" % (fileinfo['size'], max_size) raise SubmissionException(msg) # We'll just merge the mandatory arguments, fileinfo, and any # optional kw and pass those all on to the dispatch callback. task_args = fileinfo task_args['priority'] = 0 # Just a default. task_args.update(kw) task_args['srl'] = sha256 task_args['original_filename'] = name task_args['sid'] = sid task_args['path'] = name if 'metadata' in task_args: task_args['metadata'].update(al_meta) else: task_args['metadata'] = al_meta dispatch_request = Task.create(**task_args) submissions.append(dispatch_request) finally: if temporary_path: try: os.unlink(temporary_path) except: # pylint: disable=W0702 pass storage.create_submission( dispatch_request.sid, dispatch_request.as_submission_record(), files) dispatch_queue = forge.get_dispatch_queue() for submission in submissions: dispatch_queue.submit(submission) log.debug("Submission complete. Dispatched: %s", dispatch_request) return submissions[0].raw.copy()
def submit_inline(cls, storage, transport, file_paths, **kw): """ Submit local samples to the submission service. submit_inline can be used when the sample to submit is already local to the submission service. It does the presumit, filestore upload and submit. Any kw are passed to the Task created to dispatch this submission. """ classification = kw['classification'] kw['max_extracted'] = max_extracted(kw) kw['max_supplementary'] = max_supplementary(kw) kw['ttl'] = ttl = effective_ttl(kw) kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl) submissions = [] file_tuples = [] dispatch_request = None # Generate static fileinfo data for each file. for file_path in file_paths: file_name = os.path.basename(file_path) fileinfo = identify.fileinfo(file_path) ignore_size = kw.get('ignore_size', False) max_size = config.submissions.max.size if fileinfo['size'] > max_size and not ignore_size: msg = "File too large (%d > %d). Submission Failed" % \ (fileinfo['size'], max_size) raise SubmissionException(msg) decode_file = forge.get_decode_file() temp_path, original_name, fileinfo, al_meta = \ decode_file(file_path, fileinfo) if temp_path: file_path = temp_path if not original_name: original_name = os.path.splitext(file_name)[0] file_name = original_name sha256 = fileinfo['sha256'] storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) file_tuples.append((file_name, sha256)) if not transport.exists(sha256): log.debug('File not on remote filestore. Uploading %s', sha256) transport.put(file_path, sha256, location='near') if temp_path: os.remove(temp_path) # We'll just merge the mandatory arguments, fileinfo, and any # optional kw and pass those all on to the dispatch callback. task_args = fileinfo task_args['priority'] = 0 # Just a default. task_args.update(kw) task_args['srl'] = sha256 task_args['original_filename'] = file_name task_args['path'] = file_name if 'metadata' in task_args: task_args['metadata'].update(al_meta) else: task_args['metadata'] = al_meta dispatch_request = Task.create(**task_args) submissions.append(dispatch_request) storage.create_submission( dispatch_request.sid, dispatch_request.as_submission_record(), file_tuples) dispatch_queue = forge.get_dispatch_queue() for submission in submissions: dispatch_queue.submit(submission) log.debug("Submission complete. Dispatched: %s", dispatch_request) # Ugly - fighting with task to give UI something that makes sense. file_result_tuples = \ zip(file_paths, [dispatch_request.raw for dispatch_request in submissions]) result = submissions[0].raw.copy() fileinfos = [] for filename, result in file_result_tuples: finfo = result['fileinfo'] finfo['original_filename'] = os.path.basename(filename) finfo['path'] = finfo['original_filename'] fileinfos.append(finfo) result['fileinfo'] = fileinfos return result
def submit(cls, transport, storage, sha256, path, priority, submitter, **kw): """ Execute a submit. Any kw are passed along in the dispatched request. """ assert_valid_sha256(sha256) queue = forge.get_dispatch_queue() classification = kw['classification'] kw['max_extracted'] = max_extracted(kw) kw['max_supplementary'] = max_supplementary(kw) kw['ttl'] = ttl = effective_ttl(kw) kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl) # By the time submit is called, either the file was in our cache # and we freshed its ttl or the client has successfully transfered # the file to us. local_path = transport.local_path(sha256) if not transport.exists(sha256): raise SubmissionException('File specified is not on server: %s %s.' % (sha256, str(transport))) root_sha256 = sha256 temporary_path = massaged_path = None try: if not local_path: temporary_path = tempfile.mktemp(prefix="submission.submit") transport.download(sha256, temporary_path) local_path = temporary_path fileinfo = identify.fileinfo(local_path) if fileinfo['sha256'] != sha256: raise CorruptedFileStoreException('SHA256 mismatch between received ' 'and calculated sha256. %s != %s' % (sha256, fileinfo['sha256'])) storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) decode_file = forge.get_decode_file() massaged_path, _, fileinfo, al_meta = decode_file(local_path, fileinfo) if massaged_path: local_path = massaged_path sha256 = fileinfo['sha256'] transport.put(local_path, sha256) storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) ignore_size = kw.get('ignore_size', False) max_size = config.submissions.max.size if fileinfo['size'] > max_size and not ignore_size: msg = "File too large (%d > %d). Submission failed" % (fileinfo['size'], max_size) raise SubmissionException(msg) # We'll just merge the mandatory arguments, fileinfo, and any # optional kw and pass those all on to the dispatch callback. task_args = fileinfo task_args.update(kw) task_args.update({ 'original_selected': kw.get('selected', []), 'root_sha256': root_sha256, 'srl': sha256, 'sha256': sha256, 'priority': priority, 'submitter': submitter, 'path': safe_str(path)}) if 'metadata' in task_args: task_args['metadata'].update(al_meta) else: task_args['metadata'] = al_meta submit_task = Task.create(**task_args) if submit_task.is_initial(): storage.create_submission( submit_task.sid, submit_task.as_submission_record(), [(os.path.basename(path), submit_task.srl)]) log.debug("Submission complete. Dispatching: %s", submit_task) queue.send(submit_task, shards=SHARDS) return submit_task.raw finally: if massaged_path: try: os.unlink(massaged_path) except: # pylint:disable=W0702 pass if temporary_path: try: os.unlink(temporary_path) except: # pylint:disable=W0702 pass
def sample(request): sample_path = os.path.join("tests", "samples", request.param) sha256_of_file = fileinfo(sample_path)["sha256"] shutil.copy(sample_path, os.path.join("/tmp", sha256_of_file)) yield sha256_of_file os.remove(os.path.join("/tmp", sha256_of_file))