Esempio n. 1
0
    def __init__(
            self,
            service_manager,  #pylint: disable=R0913
            control_queue=None,
            debug=False,
            high=config.core.dispatcher.max.inflight /
        config.core.dispatcher.shards,
            pop=forge.get_dispatch_queue().pop,
            shard='0'):
        if debug:
            self.debug = log.info
        else:
            self.debug = lambda *msg: None

        self.hostinfo = {
            'ip:': get_hostip(),
            'mac_address': get_mac_address(),
            'host': get_hostname(),
        }

        self.ack_timeout = {}
        self.child_timeout = {}
        self.completed = {}
        self.control_queue = control_queue or \
            forge.get_control_queue('control-queue-' + shard)
        self.drain = False
        self.entries = {}
        self.errors = {}
        self.high = high
        self.ingest_queue = 'ingest-queue-' + shard
        self.last_check = 0
        self.lock = threading.Lock()
        self.pop = pop
        self.queue_size = {}
        # Reponse queues are named: <hostname>-<pid>-<seconds>-<shard>.
        self.response_queue = '-'.join((socket.gethostname(), str(os.getpid()),
                                        str(int(time.time())), shard))
        self.results = {}
        self.running = False
        self.score = {}
        self.service_manager = service_manager
        self.service_timeout = {}
        self.shard = shard
        self.storage_queue = LocalQueue()
        self.watchers = {}

        log.info('Dispatcher started. Dispatching to services:{0}'.format(
            [s for s in service_manager.services]))
Esempio n. 2
0
    def _drain(self):

        with self._current_work_items_lock:
            if not self._current_work_items:
                self.log.info('EXIT_DRAIN:0')
                return

            result_store = forge.get_datastore()
            dispatch_queue = forge.get_dispatch_queue()
            self.log.info('EXIT_DRAIN:%s', len(self._current_work_items))
            for item in self._current_work_items:
                work = Task(item)
                task = Task({})
                task.sid = work.sid
                task.srl = work.srl
                task.dispatch_queue = work.dispatch_queue
                task.classification = work.classification
                self.log.info("DRAIN: %s/%s", task.sid, task.srl)
                task.watermark(self.service_cls.SERVICE_NAME, None)
                task.recoverable_failure(
                    'Task was pre-empted (shutdown, vm revert or cull)')
                task.cache_key = result_store.save_error(
                    self.service_cls.SERVICE_NAME, None, None, task)
                dispatch_queue.send_raw(task.as_dispatcher_response())
Esempio n. 3
0
def resubmit(submission):
    del submission['times']
    forge.get_dispatch_queue().send_raw(submission)
Esempio n. 4
0
    def submit_multi(cls, storage, transport, files, **kw):
        """ Submit all files into one submission

            submit_multi can be used when all the files are already present in the
            file storage.

            files is an array of (name, sha256) tuples

            Any kw are passed to the Task created to dispatch this submission.
        """
        sid = str(uuid.uuid4())
        classification = kw['classification']

        kw['max_extracted'] = max_extracted(kw)
        kw['max_supplementary'] = max_supplementary(kw)
        kw['ttl'] = ttl = effective_ttl(kw)
        kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl)

        submissions = []
        temporary_path = None
        dispatch_request = None
        # Generate static fileinfo data for each file.
        for name, sha256 in files:
            local_path = transport.local_path(sha256)

            if not transport.exists(sha256):
                raise SubmissionException('File specified is not on server: %s %s.' % (sha256, str(transport)))

            try:
                if not local_path:
                    temporary_path = tempfile.mktemp(prefix="submission.submit_multi")
                    transport.download(sha256, temporary_path)
                    local_path = temporary_path

                fileinfo = identify.fileinfo(local_path)
                storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)

                decode_file = forge.get_decode_file()
                massaged_path, new_name, fileinfo, al_meta = \
                    decode_file(local_path, fileinfo)

                if massaged_path:
                    name = new_name
                    local_path = massaged_path
                    sha256 = fileinfo['sha256']

                    if not transport.exists(sha256):
                        transport.put(local_path, sha256)
                    storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)

                ignore_size = kw.get('ignore_size', False)
                max_size = config.submissions.max.size
                if fileinfo['size'] > max_size and not ignore_size:
                    msg = "File too large (%d > %d). Submission failed" % (fileinfo['size'], max_size)
                    raise SubmissionException(msg)

                # We'll just merge the mandatory arguments, fileinfo, and any
                # optional kw and pass those all on to the dispatch callback.
                task_args = fileinfo
                task_args['priority'] = 0  # Just a default.
                task_args.update(kw)
                task_args['srl'] = sha256
                task_args['original_filename'] = name
                task_args['sid'] = sid
                task_args['path'] = name

                if 'metadata' in task_args:
                    task_args['metadata'].update(al_meta)
                else:
                    task_args['metadata'] = al_meta

                dispatch_request = Task.create(**task_args)
                submissions.append(dispatch_request)
            finally:
                if temporary_path:
                    try:
                        os.unlink(temporary_path)
                    except:  # pylint: disable=W0702
                        pass

        storage.create_submission(
            dispatch_request.sid,
            dispatch_request.as_submission_record(),
            files)

        dispatch_queue = forge.get_dispatch_queue()
        for submission in submissions:
            dispatch_queue.submit(submission)

        log.debug("Submission complete. Dispatched: %s", dispatch_request)
        return submissions[0].raw.copy()
Esempio n. 5
0
    def submit_inline(cls, storage, transport, file_paths, **kw):
        """ Submit local samples to the submission service.

            submit_inline can be used when the sample to submit is already
            local to the submission service. It does the presumit, filestore
            upload and submit.

            Any kw are passed to the Task created to dispatch this submission.
        """
        classification = kw['classification']

        kw['max_extracted'] = max_extracted(kw)
        kw['max_supplementary'] = max_supplementary(kw)
        kw['ttl'] = ttl = effective_ttl(kw)
        kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl)

        submissions = []
        file_tuples = []
        dispatch_request = None
        # Generate static fileinfo data for each file.
        for file_path in file_paths:

            file_name = os.path.basename(file_path)
            fileinfo = identify.fileinfo(file_path)

            ignore_size = kw.get('ignore_size', False)
            max_size = config.submissions.max.size
            if fileinfo['size'] > max_size and not ignore_size:
                msg = "File too large (%d > %d). Submission Failed" % \
                      (fileinfo['size'], max_size)
                raise SubmissionException(msg)

            decode_file = forge.get_decode_file()
            temp_path, original_name, fileinfo, al_meta = \
                decode_file(file_path, fileinfo)

            if temp_path:
                file_path = temp_path
                if not original_name:
                    original_name = os.path.splitext(file_name)[0]
                file_name = original_name

            sha256 = fileinfo['sha256']

            storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)

            file_tuples.append((file_name, sha256))

            if not transport.exists(sha256):
                log.debug('File not on remote filestore. Uploading %s', sha256)
                transport.put(file_path, sha256, location='near')

            if temp_path:
                os.remove(temp_path)

            # We'll just merge the mandatory arguments, fileinfo, and any
            # optional kw and pass those all on to the dispatch callback.
            task_args = fileinfo
            task_args['priority'] = 0  # Just a default.
            task_args.update(kw)
            task_args['srl'] = sha256
            task_args['original_filename'] = file_name
            task_args['path'] = file_name

            if 'metadata' in task_args:
                task_args['metadata'].update(al_meta)
            else:
                task_args['metadata'] = al_meta

            dispatch_request = Task.create(**task_args)
            submissions.append(dispatch_request)

        storage.create_submission(
            dispatch_request.sid,
            dispatch_request.as_submission_record(),
            file_tuples)

        dispatch_queue = forge.get_dispatch_queue()
        for submission in submissions:
            dispatch_queue.submit(submission)

        log.debug("Submission complete. Dispatched: %s", dispatch_request)

        # Ugly - fighting with task to give UI something that makes sense.
        file_result_tuples = \
            zip(file_paths, [dispatch_request.raw for dispatch_request in submissions])
        result = submissions[0].raw.copy()
        fileinfos = []
        for filename, result in file_result_tuples:
            finfo = result['fileinfo']
            finfo['original_filename'] = os.path.basename(filename)
            finfo['path'] = finfo['original_filename']
            fileinfos.append(finfo)
        result['fileinfo'] = fileinfos
        return result
Esempio n. 6
0
    def submit(cls, transport, storage, sha256, path, priority, submitter, **kw):
        """ Execute a submit.

        Any kw are passed along in the dispatched request.

        """
        assert_valid_sha256(sha256)
        queue = forge.get_dispatch_queue()

        classification = kw['classification']

        kw['max_extracted'] = max_extracted(kw)
        kw['max_supplementary'] = max_supplementary(kw)
        kw['ttl'] = ttl = effective_ttl(kw)
        kw['__expiry_ts__'] = expiry = ttl_to_expiry(ttl)

        # By the time submit is called, either the file was in our cache
        # and we freshed its ttl or the client has successfully transfered
        # the file to us.
        local_path = transport.local_path(sha256)

        if not transport.exists(sha256):
            raise SubmissionException('File specified is not on server: %s %s.' % (sha256, str(transport)))

        root_sha256 = sha256
        temporary_path = massaged_path = None
        try:
            if not local_path:
                temporary_path = tempfile.mktemp(prefix="submission.submit")
                transport.download(sha256, temporary_path)
                local_path = temporary_path

            fileinfo = identify.fileinfo(local_path)
            if fileinfo['sha256'] != sha256:
                raise CorruptedFileStoreException('SHA256 mismatch between received '
                                                  'and calculated sha256. %s != %s' % (sha256, fileinfo['sha256']))
            storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)

            decode_file = forge.get_decode_file()
            massaged_path, _, fileinfo, al_meta = decode_file(local_path, fileinfo)

            if massaged_path:
                local_path = massaged_path
                sha256 = fileinfo['sha256']

                transport.put(local_path, sha256)
                storage.save_or_freshen_file(sha256, fileinfo, expiry, classification)

            ignore_size = kw.get('ignore_size', False)
            max_size = config.submissions.max.size
            if fileinfo['size'] > max_size and not ignore_size:
                msg = "File too large (%d > %d). Submission failed" % (fileinfo['size'], max_size)
                raise SubmissionException(msg)

            # We'll just merge the mandatory arguments, fileinfo, and any
            # optional kw and pass those all on to the dispatch callback.
            task_args = fileinfo
            task_args.update(kw)
            task_args.update({
                'original_selected': kw.get('selected', []),
                'root_sha256': root_sha256,
                'srl': sha256,
                'sha256': sha256,
                'priority': priority,
                'submitter': submitter,
                'path': safe_str(path)})

            if 'metadata' in task_args:
                task_args['metadata'].update(al_meta)
            else:
                task_args['metadata'] = al_meta

            submit_task = Task.create(**task_args)
            if submit_task.is_initial():
                storage.create_submission(
                    submit_task.sid,
                    submit_task.as_submission_record(),
                    [(os.path.basename(path), submit_task.srl)])
            log.debug("Submission complete. Dispatching: %s", submit_task)

            queue.send(submit_task, shards=SHARDS)

            return submit_task.raw
        finally:
            if massaged_path:
                try:
                    os.unlink(massaged_path)
                except:  # pylint:disable=W0702
                    pass

            if temporary_path:
                try:
                    os.unlink(temporary_path)
                except:  # pylint:disable=W0702
                    pass
Esempio n. 7
0
#!/usr/bin/env python

import logging
import time

from assemblyline.al.common import forge
from assemblyline.al.common import log
from assemblyline.al.common.task import Task
from assemblyline.al.service.list_queue_sizes import get_service_queue_lengths

log.init_logging('plumber')
logger = logging.getLogger('assemblyline.plumber')

dispatch_queue = forge.get_dispatch_queue()
store = forge.get_datastore()
config = forge.get_config()
service_queue = {}
threshold = {}


def get_queue(n):
    q = service_queue.get(n, None)
    if not q:
        service_queue[n] = q = forge.get_service_queue(n)

    return q

for service in store.list_services():
    # noinspection PyBroadException
    try:
        name = service.get('name')