コード例 #1
0
    def __init__(self,
                 datastore=None,
                 redis=None,
                 redis_persist=None,
                 logger=None):
        super().__init__('assemblyline.dispatcher.file', logger)

        config: Config = forge.get_config()
        datastore: AssemblylineDatastore = datastore or forge.get_datastore(
            config)
        self.dispatcher = Dispatcher(redis=redis,
                                     redis_persist=redis_persist,
                                     datastore=datastore,
                                     logger=self.log)

        if config.core.metrics.apm_server.server_url is not None:
            self.log.info(
                f"Exporting application metrics to: {config.core.metrics.apm_server.server_url}"
            )
            elasticapm.instrument()
            self.apm_client = elasticapm.Client(
                server_url=config.core.metrics.apm_server.server_url,
                service_name="dispatcher")
        else:
            self.apm_client = None
コード例 #2
0
def test_dispatch_extracted(clean_redis, clean_datastore):
    redis = clean_redis
    ds = clean_datastore

    # def service_queue(name): return get_service_queue(name, redis)

    # Setup the fake datastore
    file_hash = get_random_hash(64)
    second_file_hash = get_random_hash(64)

    for fh in [file_hash, second_file_hash]:
        obj = random_model_obj(models.file.File)
        obj.sha256 = fh
        ds.file.save(fh, obj)

    # Inject the fake submission
    submission = random_model_obj(models.submission.Submission)
    submission.files = [dict(name='./file', sha256=file_hash)]
    sid = submission.sid = 'first-submission'

    disp = Dispatcher(ds, redis, redis)
    disp.running = ToggleTrue()
    client = DispatchClient(ds, redis, redis)
    client.dispatcher_data_age = time.time()
    client.dispatcher_data.append(disp.instance_id)

    # Launch the submission
    client.dispatch_submission(submission)
    disp.pull_submissions()
    disp.service_worker(disp.process_queue_index(sid))

    # Finish one service extracting a file
    job = client.request_work('0', 'extract', '0')
    assert job.fileinfo.sha256 == file_hash
    assert job.filename == './file'
    new_result: Result = random_minimal_obj(Result)
    new_result.sha256 = file_hash
    new_result.response.service_name = 'extract'
    new_result.response.extracted = [
        dict(sha256=second_file_hash,
             name='second-*',
             description='abc',
             classification='U')
    ]
    client.service_finished(sid, 'extracted-done', new_result)

    # process the result
    disp.pull_service_results()
    disp.service_worker(disp.process_queue_index(sid))
    disp.service_worker(disp.process_queue_index(sid))

    #
    job = client.request_work('0', 'extract', '0')
    assert job.fileinfo.sha256 == second_file_hash
    assert job.filename == 'second-*'
コード例 #3
0
def test_dispatch_submission(clean_redis):
    ds = MockDatastore(
        collections=['submission', 'result', 'service', 'error', 'file'])
    file_hash = get_random_hash(64)

    ds.file.save(file_hash, random_model_obj(models.file.File))
    ds.file.get(file_hash).sha256 = file_hash
    # ds.file.get(file_hash).sha256 = ''

    submission = random_model_obj(models.submission.Submission)
    submission.files.clear()
    submission.files.append(dict(name='./file', sha256=file_hash))

    submission.sid = 'first-submission'

    disp = Dispatcher(ds,
                      logger=logging,
                      redis=clean_redis,
                      redis_persist=clean_redis)
    # Submit a problem, and check that it gets added to the dispatch hash
    # and the right service queues
    task = SubmissionTask(dict(submission=submission))
    disp.dispatch_submission(task)

    file_task = FileTask(disp.file_queue.pop())
    assert file_task.sid == submission.sid
    assert file_task.file_info.sha256 == file_hash
    assert file_task.depth == 0
    assert file_task.file_info.type == ds.file.get(file_hash).type

    dh = DispatchHash(submission.sid, clean_redis)
    for service_name in disp.scheduler.services.keys():
        dh.fail_nonrecoverable(file_hash, service_name, 'error-code')

    disp.dispatch_submission(task)
    assert ds.submission.get(submission.sid).state == 'completed'
    assert ds.submission.get(
        submission.sid).errors == ['error-code'] * len(disp.scheduler.services)
コード例 #4
0
def test_simple(clean_redis, clean_datastore):
    ds = clean_datastore
    redis = clean_redis

    def service_queue(name):
        return get_service_queue(name, redis)

    file = random_model_obj(File)
    file_hash = file.sha256
    file.type = 'unknown'
    ds.file.save(file_hash, file)

    sub: Submission = random_model_obj(models.submission.Submission)
    sub.sid = sid = 'first-submission'
    sub.params.ignore_cache = False
    sub.params.max_extracted = 5
    sub.params.classification = get_classification().UNRESTRICTED
    sub.params.initial_data = json.dumps({'cats': 'big'})
    sub.files = [dict(sha256=file_hash, name='file')]

    disp = Dispatcher(ds, redis, redis)
    disp.running = ToggleTrue()
    client = DispatchClient(ds, redis, redis)
    client.dispatcher_data_age = time.time()
    client.dispatcher_data.append(disp.instance_id)

    # Submit a problem, and check that it gets added to the dispatch hash
    # and the right service queues
    logger.info('==== first dispatch')
    # task = SubmissionTask(sub.as_primitives(), 'some-completion-queue')
    client.dispatch_submission(sub)
    disp.pull_submissions()
    disp.service_worker(disp.process_queue_index(sid))
    task = disp.tasks.get(sid)

    assert task.queue_keys[(file_hash, 'extract')] is not None
    assert task.queue_keys[(file_hash, 'wrench')] is not None
    assert service_queue('extract').length() == 1
    assert service_queue('wrench').length() == 1

    # Making the same call again will queue it up again
    logger.info('==== second dispatch')
    disp.dispatch_file(task, file_hash)

    assert task.queue_keys[(file_hash, 'extract')] is not None
    assert task.queue_keys[(file_hash, 'wrench')] is not None
    assert service_queue('extract').length() == 1  # the queue doesn't pile up
    assert service_queue('wrench').length() == 1

    logger.info('==== third dispatch')
    job = client.request_work('0', 'extract', '0')
    assert job.temporary_submission_data == [{'name': 'cats', 'value': 'big'}]
    client.service_failed(sid, 'abc123', make_error(file_hash, 'extract'))
    # Deliberately do in the wrong order to make sure that works
    disp.pull_service_results()
    disp.service_worker(disp.process_queue_index(sid))

    assert task.queue_keys[(file_hash, 'extract')] is not None
    assert task.queue_keys[(file_hash, 'wrench')] is not None
    assert service_queue('extract').length() == 1

    # Mark extract as finished, wrench as failed
    logger.info('==== fourth dispatch')
    client.request_work('0', 'extract', '0')
    client.request_work('0', 'wrench', '0')
    client.service_finished(sid, 'extract-result',
                            make_result(file_hash, 'extract'))
    client.service_failed(sid, 'wrench-error',
                          make_error(file_hash, 'wrench', False))
    for _ in range(2):
        disp.pull_service_results()
        disp.service_worker(disp.process_queue_index(sid))

    assert wait_error(task, file_hash, 'wrench')
    assert wait_result(task, file_hash, 'extract')
    assert service_queue('av-a').length() == 1
    assert service_queue('av-b').length() == 1
    assert service_queue('frankenstrings').length() == 1

    # Have the AVs fail, frankenstrings finishes
    logger.info('==== fifth dispatch')
    client.request_work('0', 'av-a', '0')
    client.request_work('0', 'av-b', '0')
    client.request_work('0', 'frankenstrings', '0')
    client.service_failed(sid, 'av-a-error',
                          make_error(file_hash, 'av-a', False))
    client.service_failed(sid, 'av-b-error',
                          make_error(file_hash, 'av-b', False))
    client.service_finished(sid, 'f-result',
                            make_result(file_hash, 'frankenstrings'))
    for _ in range(3):
        disp.pull_service_results()
        disp.service_worker(disp.process_queue_index(sid))

    assert wait_result(task, file_hash, 'frankenstrings')
    assert wait_error(task, file_hash, 'av-a')
    assert wait_error(task, file_hash, 'av-b')
    assert service_queue('xerox').length() == 1

    # Finish the xerox service and check if the submission completion got checked
    logger.info('==== sixth dispatch')
    client.request_work('0', 'xerox', '0')
    client.service_finished(sid, 'xerox-result-key',
                            make_result(file_hash, 'xerox'))
    disp.pull_service_results()
    disp.service_worker(disp.process_queue_index(sid))
    disp.save_submission()

    assert wait_result(task, file_hash, 'xerox')
    assert disp.tasks.get(sid) is None
コード例 #5
0
from assemblyline_core.dispatching.dispatcher import Dispatcher

with Dispatcher() as server:
    server.serve_forever()
コード例 #6
0
def test_dispatch_file(clean_redis):
    service_queue = lambda name: get_service_queue(name, clean_redis)

    ds = MockDatastore(collections=[
        'submission', 'result', 'service', 'error', 'file', 'filescore'
    ])
    file_hash = get_random_hash(64)
    sub = random_model_obj(models.submission.Submission)
    sub.sid = sid = 'first-submission'
    sub.params.ignore_cache = False

    disp = Dispatcher(ds, clean_redis, clean_redis, logging)
    disp.active_submissions.add(
        sid,
        SubmissionTask(dict(submission=sub)).as_primitives())
    dh = DispatchHash(sid=sid, client=clean_redis)
    print('==== first dispatch')
    # Submit a problem, and check that it gets added to the dispatch hash
    # and the right service queues
    file_task = FileTask({
        'sid':
        'first-submission',
        'min_classification':
        get_classification().UNRESTRICTED,
        'file_info':
        dict(sha256=file_hash,
             type='unknown',
             magic='a',
             md5=get_random_hash(32),
             mime='a',
             sha1=get_random_hash(40),
             size=10),
        'depth':
        0,
        'max_files':
        5
    })
    disp.dispatch_file(file_task)

    assert dh.dispatch_time(file_hash, 'extract') > 0
    assert dh.dispatch_time(file_hash, 'wrench') > 0
    assert service_queue('extract').length() == 1
    assert service_queue('wrench').length() == 1

    # Making the same call again will queue it up again
    print('==== second dispatch')
    disp.dispatch_file(file_task)

    assert dh.dispatch_time(file_hash, 'extract') > 0
    assert dh.dispatch_time(file_hash, 'wrench') > 0
    assert service_queue('extract').length() == 2
    assert service_queue('wrench').length() == 2
    # assert len(mq) == 4

    # Push back the timestamp in the dispatch hash to simulate a timeout,
    # make sure it gets pushed into that service queue again
    print('==== third dispatch')
    [service_queue(name).delete() for name in disp.scheduler.services]
    dh.fail_recoverable(file_hash, 'extract')

    disp.dispatch_file(file_task)

    assert dh.dispatch_time(file_hash, 'extract') > 0
    assert dh.dispatch_time(file_hash, 'wrench') > 0
    assert service_queue('extract').length() == 1
    # assert len(mq) == 1

    # Mark extract as finished, wrench as failed
    print('==== fourth dispatch')
    [service_queue(name).delete() for name in disp.scheduler.services]
    dh.finish(file_hash, 'extract', 'result-key', 0, 'U')
    dh.fail_nonrecoverable(file_hash, 'wrench', 'error-key')

    disp.dispatch_file(file_task)

    assert dh.finished(file_hash, 'extract')
    assert dh.finished(file_hash, 'wrench')
    assert service_queue('av-a').length() == 1
    assert service_queue('av-b').length() == 1
    assert service_queue('frankenstrings').length() == 1

    # Have the AVs fail, frankenstrings finishes
    print('==== fifth dispatch')
    [service_queue(name).delete() for name in disp.scheduler.services]
    dh.fail_nonrecoverable(file_hash, 'av-a', 'error-a')
    dh.fail_nonrecoverable(file_hash, 'av-b', 'error-b')
    dh.finish(file_hash, 'frankenstrings', 'result-key', 0, 'U')

    disp.dispatch_file(file_task)

    assert dh.finished(file_hash, 'av-a')
    assert dh.finished(file_hash, 'av-b')
    assert dh.finished(file_hash, 'frankenstrings')
    assert service_queue('xerox').length() == 1

    # Finish the xerox service and check if the submission completion got checked
    print('==== sixth dispatch')
    [service_queue(name).delete() for name in disp.scheduler.services]
    dh.finish(file_hash, 'xerox', 'result-key', 0, 'U')

    disp.dispatch_file(file_task)

    assert dh.finished(file_hash, 'xerox')
    assert len(disp.submission_queue) == 1
コード例 #7
0
def test_dispatch_extracted(clean_redis):
    # Setup the fake datastore
    ds = MockDatastore(
        collections=['submission', 'result', 'service', 'error', 'file'])
    file_hash = get_random_hash(64)
    second_file_hash = get_random_hash(64)

    for fh in [file_hash, second_file_hash]:
        ds.file.save(fh, random_model_obj(models.file.File))
        ds.file.get(fh).sha256 = fh

    # Inject the fake submission
    submission = random_model_obj(models.submission.Submission)
    submission.files.clear()
    submission.files.append(dict(name='./file', sha256=file_hash))
    submission.sid = 'first-submission'

    # Launch the dispatcher
    disp = Dispatcher(ds,
                      logger=logging,
                      redis=clean_redis,
                      redis_persist=clean_redis)

    # Launch the submission
    task = SubmissionTask(dict(submission=submission))
    disp.dispatch_submission(task)

    # Check that the right values were sent to the
    file_task = FileTask(disp.file_queue.pop(timeout=1))
    assert file_task.sid == submission.sid
    assert file_task.file_info.sha256 == file_hash
    assert file_task.depth == 0
    assert file_task.file_info.type == ds.file.get(file_hash).type

    # Finish the services
    dh = DispatchHash(submission.sid, clean_redis)
    for service_name in disp.scheduler.services.keys():
        dh.finish(file_hash, service_name, 'error-code', 0, 'U')

    # But one of the services extracted a file
    dh.add_file(second_file_hash, 10, file_hash)

    # But meanwhile, dispatch_submission has been recalled on the submission
    disp.dispatch_submission(task)

    # It should see the missing file, and we should get a new file dispatch message for it
    # to make sure it is getting processed properly, this should be at depth 1, the first layer of
    # extracted files
    file_task = disp.file_queue.pop(timeout=1)
    assert file_task is not None
    file_task = FileTask(file_task)
    assert file_task.sid == submission.sid
    assert file_task.file_info.sha256 == second_file_hash
    assert file_task.depth == 1
    assert file_task.file_info.type == ds.file.get(second_file_hash).type

    # Finish the second file
    for service_name in disp.scheduler.services.keys():
        dh.finish(second_file_hash, service_name, 'error-code', 0, 'U')

    # And now we should get the finished submission
    disp.dispatch_submission(task)
    submission = ds.submission.get(submission.sid)
    assert submission.state == 'completed'
    assert submission.errors == []
    assert len(submission.results) == 2 * len(disp.scheduler.services)
コード例 #8
0
def core(request, redis, filestore, config,
         clean_datastore: AssemblylineDatastore):
    # Block logs from being initialized, it breaks under pytest if you create new stream handlers
    from assemblyline.common import log as al_log
    al_log.init_logging = lambda *args: None
    dispatcher.TIMEOUT_EXTRA_TIME = 1
    dispatcher.TIMEOUT_TEST_INTERVAL = 3
    # al_log.init_logging("simulation")

    ds = clean_datastore

    # Register services
    stages = get_service_stage_hash(redis)

    services = []
    for svc, stage in [('pre', 'EXTRACT'), ('core-a', 'CORE'),
                       ('core-b', 'CORE'), ('finish', 'POST')]:
        ds.service.save(f'{svc}_0', dummy_service(svc, stage,
                                                  docid=f'{svc}_0'))
        ds.service_delta.save(
            svc, ServiceDelta({
                'name': svc,
                'version': '0',
                'enabled': True
            }))
        stages.set(svc, ServiceStage.Running)
        services.append(MockService(svc, ds, redis, filestore))

    ds.service.commit()
    ds.service_delta.commit()

    listed_services = ds.list_all_services(full=True)
    assert len(listed_services) == 4

    ingester = Ingester(datastore=ds,
                        redis=redis,
                        persistent_redis=redis,
                        config=config)

    fields = CoreSession(config, ingester)
    fields.redis = redis
    fields.ds = ds

    fields.config = config
    forge.config_cache[None] = fields.config

    threads = []
    fields.filestore = filestore
    fields.pre_service = services[0]
    threads: list[ServerBase] = [
        # Start the ingester components
        ingester,

        # Start the dispatcher
        Dispatcher(datastore=ds,
                   redis=redis,
                   redis_persist=redis,
                   config=config),

        # Start plumber
        Plumber(datastore=ds,
                redis=redis,
                redis_persist=redis,
                delay=0.5,
                config=config),
    ]

    threads = threads + services

    for t in threads:
        t.daemon = True
        t.start()

    def stop_core():
        [tr.stop() for tr in threads]
        [tr.raising_join() for tr in threads]

    request.addfinalizer(stop_core)
    return fields