Example #1
0
def scan_file(svc_class, sha256, **kwargs):
    logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)

    # Don't use srl normalization for filenames (i.e. 1/2/3/4/1234mysha256)

    # We use mocks for dispatcher, restore store etc that will inject the results into
    # these lists.

    dispatch_result_collector = mocks.MockDispatchCollector()
    result_store_good = {}
    result_store_bad = {}
    children = []
    supplementary = []

    cfg = forge.get_datastore().get_service(svc_class.SERVICE_NAME).get(
        "config", {})

    import functools
    forge.get_filestore = functools.partial(mocks.get_local_transport, '.')
    forge.get_submit_client = functools.partial(mocks.get_mock_submit_client,
                                                children, supplementary)
    forge.get_dispatch_queue = lambda: dispatch_result_collector
    forge.get_datastore = functools.partial(mocks.get_mock_result_store,
                                            result_store_good,
                                            result_store_bad)

    service = svc_class(cfg)
    service.start_service()

    # Run all inputs through the service. Children will end up in the children list,
    # results will end up in the results list. Actual fleshed out service results
    # will be in riak.
    task = Task.create(srl=sha256,
                       ignore_cache=True,
                       submitter='local_soak_test',
                       **kwargs)
    start = time.time()
    if service.BATCH_SERVICE:
        service._handle_task_batch([
            task,
        ])
    else:
        service._handle_task(task)
    end = time.time()
    duration = end - start
    print 'Duration: %s' % duration

    (serviced_ok, serviced_fail_recover, serviced_fail_nonrecover
     ) = dispatch_result_collector.get_serviced_results()

    for response in chain(serviced_ok, serviced_fail_recover,
                          serviced_fail_nonrecover):
        # TODO: we should be able to find it by key in our result_store_good
        if 'response' in response and 'cache_key' in response['response']:
            if response['response']['cache_key'] not in result_store_good:
                print "Appear to be missing result in result store"
        pprint.pprint(response)

    for (_key, full_result) in result_store_good.items():
        if full_result and 'result' in full_result:
            pprint.pprint(full_result)
            json.dumps(full_result, ensure_ascii=True).encode('utf-8')

    service.stop_service()
Example #2
0
    def submit_inline(cls, storage, transport, file_paths, **kw):
        """ Submit local samples to the submission service.

            submit_inline can be used when the sample to submit is already
            local to the submission service. It does the presumit, filestore
            upload and submit.

            Any kw are passed to the Task created to dispatch this submission.
        """
        classification = kw['classification']

        kw['max_extracted'] = max_extracted(kw)
        kw['max_supplementary'] = max_supplementary(kw)
        kw['ttl'] = ttl = effective_ttl(kw)
        kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl)

        submissions = []
        file_tuples = []
        dispatch_request = None
        # Generate static fileinfo data for each file.
        for file_path in file_paths:

            file_name = os.path.basename(file_path)
            fileinfo = identify.fileinfo(file_path)

            ignore_size = kw.get('ignore_size', False)
            max_size = config.submissions.max.size
            if fileinfo['size'] > max_size and not ignore_size:
                msg = "File too large (%d > %d). Submission Failed" % \
                      (fileinfo['size'], max_size)
                raise SubmissionException(msg)

            decode_file = forge.get_decode_file()
            temp_path, original_name, fileinfo, al_meta = \
                decode_file(file_path, fileinfo)

            if temp_path:
                file_path = temp_path
                if not original_name:
                    original_name = os.path.splitext(file_name)[0]
                file_name = original_name

            sha256 = fileinfo['sha256']

            storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)

            file_tuples.append((file_name, sha256))

            if not transport.exists(sha256):
                log.debug('File not on remote filestore. Uploading %s', sha256)
                transport.put(file_path, sha256, location='near')

            if temp_path:
                os.remove(temp_path)

            # We'll just merge the mandatory arguments, fileinfo, and any
            # optional kw and pass those all on to the dispatch callback.
            task_args = fileinfo
            task_args['priority'] = 0  # Just a default.
            task_args.update(kw)
            task_args['srl'] = sha256
            task_args['original_filename'] = file_name
            task_args['path'] = file_name

            if 'metadata' in task_args:
                task_args['metadata'].update(al_meta)
            else:
                task_args['metadata'] = al_meta

            dispatch_request = Task.create(**task_args)
            submissions.append(dispatch_request)

        storage.create_submission(
            dispatch_request.sid,
            dispatch_request.as_submission_record(),
            file_tuples)

        dispatch_queue = forge.get_dispatch_queue()
        for submission in submissions:
            dispatch_queue.submit(submission)

        log.debug("Submission complete. Dispatched: %s", dispatch_request)

        # Ugly - fighting with task to give UI something that makes sense.
        file_result_tuples = \
            zip(file_paths, [dispatch_request.raw for dispatch_request in submissions])
        result = submissions[0].raw.copy()
        fileinfos = []
        for filename, result in file_result_tuples:
            finfo = result['fileinfo']
            finfo['original_filename'] = os.path.basename(filename)
            finfo['path'] = finfo['original_filename']
            fileinfos.append(finfo)
        result['fileinfo'] = fileinfos
        return result
Example #3
0
    def submit_multi(cls, storage, transport, files, **kw):
        """ Submit all files into one submission

            submit_multi can be used when all the files are already present in the
            file storage.

            files is an array of (name, sha256) tuples

            Any kw are passed to the Task created to dispatch this submission.
        """
        sid = str(uuid.uuid4())
        classification = kw['classification']

        kw['max_extracted'] = max_extracted(kw)
        kw['max_supplementary'] = max_supplementary(kw)
        kw['ttl'] = ttl = effective_ttl(kw)
        kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl)

        submissions = []
        temporary_path = None
        dispatch_request = None
        # Generate static fileinfo data for each file.
        for name, sha256 in files:
            local_path = transport.local_path(sha256)

            if not transport.exists(sha256):
                raise SubmissionException('File specified is not on server: %s %s.' % (sha256, str(transport)))

            try:
                if not local_path:
                    temporary_path = tempfile.mktemp(prefix="submission.submit_multi")
                    transport.download(sha256, temporary_path)
                    local_path = temporary_path

                fileinfo = identify.fileinfo(local_path)
                storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)

                decode_file = forge.get_decode_file()
                massaged_path, new_name, fileinfo, al_meta = \
                    decode_file(local_path, fileinfo)

                if massaged_path:
                    name = new_name
                    local_path = massaged_path
                    sha256 = fileinfo['sha256']

                    if not transport.exists(sha256):
                        transport.put(local_path, sha256)
                    storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)

                ignore_size = kw.get('ignore_size', False)
                max_size = config.submissions.max.size
                if fileinfo['size'] > max_size and not ignore_size:
                    msg = "File too large (%d > %d). Submission failed" % (fileinfo['size'], max_size)
                    raise SubmissionException(msg)

                # We'll just merge the mandatory arguments, fileinfo, and any
                # optional kw and pass those all on to the dispatch callback.
                task_args = fileinfo
                task_args['priority'] = 0  # Just a default.
                task_args.update(kw)
                task_args['srl'] = sha256
                task_args['original_filename'] = name
                task_args['sid'] = sid
                task_args['path'] = name

                if 'metadata' in task_args:
                    task_args['metadata'].update(al_meta)
                else:
                    task_args['metadata'] = al_meta

                dispatch_request = Task.create(**task_args)
                submissions.append(dispatch_request)
            finally:
                if temporary_path:
                    try:
                        os.unlink(temporary_path)
                    except:  # pylint: disable=W0702
                        pass

        storage.create_submission(
            dispatch_request.sid,
            dispatch_request.as_submission_record(),
            files)

        dispatch_queue = forge.get_dispatch_queue()
        for submission in submissions:
            dispatch_queue.submit(submission)

        log.debug("Submission complete. Dispatched: %s", dispatch_request)
        return submissions[0].raw.copy()
Example #4
0
    def submit(cls, transport, storage, sha256, path, priority, submitter, **kw):
        """ Execute a submit.

        Any kw are passed along in the dispatched request.

        """
        assert_valid_sha256(sha256)
        queue = forge.get_dispatch_queue()

        classification = kw['classification']

        kw['max_extracted'] = max_extracted(kw)
        kw['max_supplementary'] = max_supplementary(kw)
        kw['ttl'] = ttl = effective_ttl(kw)
        kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl)

        # By the time submit is called, either the file was in our cache
        # and we freshed its ttl or the client has successfully transfered
        # the file to us.
        local_path = transport.local_path(sha256)

        if not transport.exists(sha256):
            raise SubmissionException('File specified is not on server: %s %s.' % (sha256, str(transport)))

        root_sha256 = sha256
        temporary_path = massaged_path = None
        try:
            if not local_path:
                temporary_path = tempfile.mktemp(prefix="submission.submit")
                transport.download(sha256, temporary_path)
                local_path = temporary_path

            fileinfo = identify.fileinfo(local_path)
            if fileinfo['sha256'] != sha256:
                raise CorruptedFileStoreException('SHA256 mismatch between received '
                                                  'and calculated sha256. %s != %s' % (sha256, fileinfo['sha256']))
            storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)

            decode_file = forge.get_decode_file()
            massaged_path, _, fileinfo, al_meta = decode_file(local_path, fileinfo)

            if massaged_path:
                local_path = massaged_path
                sha256 = fileinfo['sha256']

                transport.put(local_path, sha256)
                storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)

            ignore_size = kw.get('ignore_size', False)
            max_size = config.submissions.max.size
            if fileinfo['size'] > max_size and not ignore_size:
                msg = "File too large (%d > %d). Submission failed" % (fileinfo['size'], max_size)
                raise SubmissionException(msg)

            # We'll just merge the mandatory arguments, fileinfo, and any
            # optional kw and pass those all on to the dispatch callback.
            task_args = fileinfo
            task_args.update(kw)
            task_args.update({
                'original_selected': kw.get('selected', []),
                'root_sha256': root_sha256,
                'srl': sha256,
                'sha256': sha256,
                'priority': priority,
                'submitter': submitter,
                'path': safe_str(path)})

            if 'metadata' in task_args:
                task_args['metadata'].update(al_meta)
            else:
                task_args['metadata'] = al_meta

            submit_task = Task.create(**task_args)
            if submit_task.is_initial():
                storage.create_submission(
                    submit_task.sid,
                    submit_task.as_submission_record(),
                    [(os.path.basename(path), submit_task.srl)])
            log.debug("Submission complete. Dispatching: %s", submit_task)

            queue.send(submit_task, shards=SHARDS)

            return submit_task.raw
        finally:
            if massaged_path:
                try:
                    os.unlink(massaged_path)
                except:  # pylint:disable=W0702
                    pass

            if temporary_path:
                try:
                    os.unlink(temporary_path)
                except:  # pylint:disable=W0702
                    pass