def scan_file(svc_class, sha256, **kwargs): logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) # Don't use srl normalization for filenames (i.e. 1/2/3/4/1234mysha256) # We use mocks for dispatcher, restore store etc that will inject the results into # these lists. dispatch_result_collector = mocks.MockDispatchCollector() result_store_good = {} result_store_bad = {} children = [] supplementary = [] cfg = forge.get_datastore().get_service(svc_class.SERVICE_NAME).get( "config", {}) import functools forge.get_filestore = functools.partial(mocks.get_local_transport, '.') forge.get_submit_client = functools.partial(mocks.get_mock_submit_client, children, supplementary) forge.get_dispatch_queue = lambda: dispatch_result_collector forge.get_datastore = functools.partial(mocks.get_mock_result_store, result_store_good, result_store_bad) service = svc_class(cfg) service.start_service() # Run all inputs through the service. Children will end up in the children list, # results will end up in the results list. Actual fleshed out service results # will be in riak. task = Task.create(srl=sha256, ignore_cache=True, submitter='local_soak_test', **kwargs) start = time.time() if service.BATCH_SERVICE: service._handle_task_batch([ task, ]) else: service._handle_task(task) end = time.time() duration = end - start print 'Duration: %s' % duration (serviced_ok, serviced_fail_recover, serviced_fail_nonrecover ) = dispatch_result_collector.get_serviced_results() for response in chain(serviced_ok, serviced_fail_recover, serviced_fail_nonrecover): # TODO: we should be able to find it by key in our result_store_good if 'response' in response and 'cache_key' in response['response']: if response['response']['cache_key'] not in result_store_good: print "Appear to be missing result in result store" pprint.pprint(response) for (_key, full_result) in result_store_good.items(): if full_result and 'result' in full_result: pprint.pprint(full_result) json.dumps(full_result, ensure_ascii=True).encode('utf-8') service.stop_service()
def submit_inline(cls, storage, transport, file_paths, **kw): """ Submit local samples to the submission service. submit_inline can be used when the sample to submit is already local to the submission service. It does the presumit, filestore upload and submit. Any kw are passed to the Task created to dispatch this submission. """ classification = kw['classification'] kw['max_extracted'] = max_extracted(kw) kw['max_supplementary'] = max_supplementary(kw) kw['ttl'] = ttl = effective_ttl(kw) kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl) submissions = [] file_tuples = [] dispatch_request = None # Generate static fileinfo data for each file. for file_path in file_paths: file_name = os.path.basename(file_path) fileinfo = identify.fileinfo(file_path) ignore_size = kw.get('ignore_size', False) max_size = config.submissions.max.size if fileinfo['size'] > max_size and not ignore_size: msg = "File too large (%d > %d). Submission Failed" % \ (fileinfo['size'], max_size) raise SubmissionException(msg) decode_file = forge.get_decode_file() temp_path, original_name, fileinfo, al_meta = \ decode_file(file_path, fileinfo) if temp_path: file_path = temp_path if not original_name: original_name = os.path.splitext(file_name)[0] file_name = original_name sha256 = fileinfo['sha256'] storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) file_tuples.append((file_name, sha256)) if not transport.exists(sha256): log.debug('File not on remote filestore. Uploading %s', sha256) transport.put(file_path, sha256, location='near') if temp_path: os.remove(temp_path) # We'll just merge the mandatory arguments, fileinfo, and any # optional kw and pass those all on to the dispatch callback. task_args = fileinfo task_args['priority'] = 0 # Just a default. task_args.update(kw) task_args['srl'] = sha256 task_args['original_filename'] = file_name task_args['path'] = file_name if 'metadata' in task_args: task_args['metadata'].update(al_meta) else: task_args['metadata'] = al_meta dispatch_request = Task.create(**task_args) submissions.append(dispatch_request) storage.create_submission( dispatch_request.sid, dispatch_request.as_submission_record(), file_tuples) dispatch_queue = forge.get_dispatch_queue() for submission in submissions: dispatch_queue.submit(submission) log.debug("Submission complete. Dispatched: %s", dispatch_request) # Ugly - fighting with task to give UI something that makes sense. file_result_tuples = \ zip(file_paths, [dispatch_request.raw for dispatch_request in submissions]) result = submissions[0].raw.copy() fileinfos = [] for filename, result in file_result_tuples: finfo = result['fileinfo'] finfo['original_filename'] = os.path.basename(filename) finfo['path'] = finfo['original_filename'] fileinfos.append(finfo) result['fileinfo'] = fileinfos return result
def submit_multi(cls, storage, transport, files, **kw): """ Submit all files into one submission submit_multi can be used when all the files are already present in the file storage. files is an array of (name, sha256) tuples Any kw are passed to the Task created to dispatch this submission. """ sid = str(uuid.uuid4()) classification = kw['classification'] kw['max_extracted'] = max_extracted(kw) kw['max_supplementary'] = max_supplementary(kw) kw['ttl'] = ttl = effective_ttl(kw) kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl) submissions = [] temporary_path = None dispatch_request = None # Generate static fileinfo data for each file. for name, sha256 in files: local_path = transport.local_path(sha256) if not transport.exists(sha256): raise SubmissionException('File specified is not on server: %s %s.' % (sha256, str(transport))) try: if not local_path: temporary_path = tempfile.mktemp(prefix="submission.submit_multi") transport.download(sha256, temporary_path) local_path = temporary_path fileinfo = identify.fileinfo(local_path) storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) decode_file = forge.get_decode_file() massaged_path, new_name, fileinfo, al_meta = \ decode_file(local_path, fileinfo) if massaged_path: name = new_name local_path = massaged_path sha256 = fileinfo['sha256'] if not transport.exists(sha256): transport.put(local_path, sha256) storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) ignore_size = kw.get('ignore_size', False) max_size = config.submissions.max.size if fileinfo['size'] > max_size and not ignore_size: msg = "File too large (%d > %d). Submission failed" % (fileinfo['size'], max_size) raise SubmissionException(msg) # We'll just merge the mandatory arguments, fileinfo, and any # optional kw and pass those all on to the dispatch callback. task_args = fileinfo task_args['priority'] = 0 # Just a default. task_args.update(kw) task_args['srl'] = sha256 task_args['original_filename'] = name task_args['sid'] = sid task_args['path'] = name if 'metadata' in task_args: task_args['metadata'].update(al_meta) else: task_args['metadata'] = al_meta dispatch_request = Task.create(**task_args) submissions.append(dispatch_request) finally: if temporary_path: try: os.unlink(temporary_path) except: # pylint: disable=W0702 pass storage.create_submission( dispatch_request.sid, dispatch_request.as_submission_record(), files) dispatch_queue = forge.get_dispatch_queue() for submission in submissions: dispatch_queue.submit(submission) log.debug("Submission complete. Dispatched: %s", dispatch_request) return submissions[0].raw.copy()
def submit(cls, transport, storage, sha256, path, priority, submitter, **kw): """ Execute a submit. Any kw are passed along in the dispatched request. """ assert_valid_sha256(sha256) queue = forge.get_dispatch_queue() classification = kw['classification'] kw['max_extracted'] = max_extracted(kw) kw['max_supplementary'] = max_supplementary(kw) kw['ttl'] = ttl = effective_ttl(kw) kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl) # By the time submit is called, either the file was in our cache # and we freshed its ttl or the client has successfully transfered # the file to us. local_path = transport.local_path(sha256) if not transport.exists(sha256): raise SubmissionException('File specified is not on server: %s %s.' % (sha256, str(transport))) root_sha256 = sha256 temporary_path = massaged_path = None try: if not local_path: temporary_path = tempfile.mktemp(prefix="submission.submit") transport.download(sha256, temporary_path) local_path = temporary_path fileinfo = identify.fileinfo(local_path) if fileinfo['sha256'] != sha256: raise CorruptedFileStoreException('SHA256 mismatch between received ' 'and calculated sha256. %s != %s' % (sha256, fileinfo['sha256'])) storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) decode_file = forge.get_decode_file() massaged_path, _, fileinfo, al_meta = decode_file(local_path, fileinfo) if massaged_path: local_path = massaged_path sha256 = fileinfo['sha256'] transport.put(local_path, sha256) storage.save_or_freshen_file(sha256, fileinfo, expiry, classification) ignore_size = kw.get('ignore_size', False) max_size = config.submissions.max.size if fileinfo['size'] > max_size and not ignore_size: msg = "File too large (%d > %d). Submission failed" % (fileinfo['size'], max_size) raise SubmissionException(msg) # We'll just merge the mandatory arguments, fileinfo, and any # optional kw and pass those all on to the dispatch callback. task_args = fileinfo task_args.update(kw) task_args.update({ 'original_selected': kw.get('selected', []), 'root_sha256': root_sha256, 'srl': sha256, 'sha256': sha256, 'priority': priority, 'submitter': submitter, 'path': safe_str(path)}) if 'metadata' in task_args: task_args['metadata'].update(al_meta) else: task_args['metadata'] = al_meta submit_task = Task.create(**task_args) if submit_task.is_initial(): storage.create_submission( submit_task.sid, submit_task.as_submission_record(), [(os.path.basename(path), submit_task.srl)]) log.debug("Submission complete. Dispatching: %s", submit_task) queue.send(submit_task, shards=SHARDS) return submit_task.raw finally: if massaged_path: try: os.unlink(massaged_path) except: # pylint:disable=W0702 pass if temporary_path: try: os.unlink(temporary_path) except: # pylint:disable=W0702 pass