def test_finish_missing_file(client, dispatch_client, heuristics):
    heuristics.get.return_value = None
    task = random_minimal_obj(Task)
    fs = forge.get_filestore()

    result: Result = random_minimal_obj(Result)
    while not result.response.extracted:
        result: Result = random_model_obj(Result)
        result.response.extracted = [
            x for x in result.response.extracted if not fs.exists(x.sha256)
        ]
    missing = {
        x.sha256
        for x in result.response.extracted if not fs.exists(x.sha256)
    }
    missing |= {
        x.sha256
        for x in result.response.supplementary if not fs.exists(x.sha256)
    }

    message = {
        'task': task.as_primitives(),
        'result': result.as_primitives(),
        'freshen': True
    }
    resp = client.post('/api/v1/task/', headers=headers, json=message)
    assert resp.status_code == 200
    assert resp.json['api_response']['success'] is False
    assert set(resp.json['api_response']['missing_files']) == missing
def test_register_new_heuristics(client, storage):
    config_block = 'A CONFIG OBJECT FOR SURE'
    storage.get_service_with_delta.return_value = config_block
    storage.heuristic.get_if_exists.return_value = None

    service = random_minimal_obj(Service)
    service = service.as_primitives()
    service['heuristics'] = [random_minimal_obj(Heuristic).as_primitives()]

    storage.heuristic.bulk.return_value = {
        "items": [{
            "update": {
                "result": "created",
                "_id": service['heuristics'][0]['heur_id']
            }
        }]
    }

    headers['Service-Name'] = service['name']
    headers['Service-Version'] = service['version']

    result = client.post("/api/v1/service/register/",
                         headers=headers,
                         json=service)
    assert result.status_code == 200
    assert storage.heuristic.bulk.call_count == 1

    assert result.json['api_response']['keep_alive'] is True
    assert len(result.json['api_response']['new_heuristics']) == 1
    assert service['heuristics'][0]['heur_id'] in result.json['api_response'][
        'new_heuristics'][0]
    assert result.json['api_response']['service_config'] == config_block
def test_finish_error(client, dispatch_client):
    task = random_minimal_obj(Task)
    error = random_minimal_obj(Error)
    message = {'task': task.as_primitives(), 'error': error.as_primitives()}
    resp = client.post('/api/v1/task/', headers=headers, json=message)
    assert resp.status_code == 200
    assert dispatch_client.service_failed.call_count == 1
    assert dispatch_client.service_failed.call_args[0][0] == task.sid
    error.archive_ts = dispatch_client.service_failed.call_args[0][
        2].archive_ts
    error.expiry_ts = dispatch_client.service_failed.call_args[0][2].expiry_ts
    error.created = dispatch_client.service_failed.call_args[0][2].created
    assert dispatch_client.service_failed.call_args[0][2] == error
def test_register_bad_heuristics(client, storage):
    service = random_minimal_obj(Service)
    service = service.as_primitives()
    service['heuristics'] = [random_minimal_obj(Heuristic).as_primitives()]
    service['heuristics'][0]['description'] = None

    headers['Service-Name'] = service['name']
    headers['Service-Version'] = service['version']

    result = client.post("/api/v1/service/register/",
                         headers=headers,
                         json=service)
    assert result.status_code == 400
def test_finish_heuristic(client, dispatch_client, heuristics):
    heuristics.get.return_value = None
    task = random_minimal_obj(Task)

    result: Result = random_model_obj(Result)
    while not any(sec.heuristic for sec in result.result.sections):
        result: Result = random_model_obj(Result)

    heuristics_count = sum(
        int(sec.heuristic is not None) for sec in result.result.sections)

    result.result.score = 99999
    result.response.extracted = []
    result.response.supplementary = []

    message = {
        'task': task.as_primitives(),
        'result': result.as_primitives(),
        'freshen': True
    }
    resp = client.post('/api/v1/task/', headers=headers, json=message)
    assert resp.status_code == 200
    assert dispatch_client.service_finished.call_count == 1
    assert dispatch_client.service_finished.call_args[0][0] == task.sid
    # Mock objects are always one on conversion to int, being changed to this, means that it looked at the
    # mocked out heuristics to load the score.
    assert dispatch_client.service_finished.call_args[0][2].result.score == 0
    assert heuristics.get.call_count == heuristics_count
def test_ingest_size_error(ingest_harness):
    datastore, ingester, in_queue = ingest_harness

    # Send a rather big file
    submission = make_message(
        files={
            'size': ingester.config.submission.max_file_size + 1,
            # 'ascii': 'abc'
        },
        params={
            'ignore_size': False,
            'never_drop': False
        })
    fo = random_minimal_obj(File)
    fo.sha256 = submission['files'][0]['sha256']
    datastore.file.save(submission['files'][0]['sha256'], fo)
    submission['notification'] = {'queue': 'drop_test'}
    in_queue.push(submission)
    ingester.handle_ingest()

    # No files in the internal buffer
    assert ingester.unique_queue.length() == 0
    assert ingester.ingest_queue.length() == 0

    # A file was dropped
    queue_name = _notification_queue_prefix + submission['notification'][
        'queue']
    queue = ingester.notification_queues[queue_name]
    message = queue.pop()
    assert message is not None
def test_finish_minimal(client, dispatch_client):
    task = random_minimal_obj(Task)
    result = random_minimal_obj(Result)
    message = {
        'task': task.as_primitives(),
        'result': result.as_primitives(),
        'freshen': False
    }
    resp = client.post('/api/v1/task/', headers=headers, json=message)
    assert resp.status_code == 200
    assert dispatch_client.service_finished.call_count == 1
    assert dispatch_client.service_finished.call_args[0][0] == task.sid
    result.archive_ts = dispatch_client.service_finished.call_args[0][
        2].archive_ts
    result.expiry_ts = dispatch_client.service_finished.call_args[0][
        2].expiry_ts
    result.created = dispatch_client.service_finished.call_args[0][2].created
    assert dispatch_client.service_finished.call_args[0][2] == result
def test_dispatch_extracted(clean_redis, clean_datastore):
    redis = clean_redis
    ds = clean_datastore

    # def service_queue(name): return get_service_queue(name, redis)

    # Setup the fake datastore
    file_hash = get_random_hash(64)
    second_file_hash = get_random_hash(64)

    for fh in [file_hash, second_file_hash]:
        obj = random_model_obj(models.file.File)
        obj.sha256 = fh
        ds.file.save(fh, obj)

    # Inject the fake submission
    submission = random_model_obj(models.submission.Submission)
    submission.files = [dict(name='./file', sha256=file_hash)]
    sid = submission.sid = 'first-submission'

    disp = Dispatcher(ds, redis, redis)
    disp.running = ToggleTrue()
    client = DispatchClient(ds, redis, redis)
    client.dispatcher_data_age = time.time()
    client.dispatcher_data.append(disp.instance_id)

    # Launch the submission
    client.dispatch_submission(submission)
    disp.pull_submissions()
    disp.service_worker(disp.process_queue_index(sid))

    # Finish one service extracting a file
    job = client.request_work('0', 'extract', '0')
    assert job.fileinfo.sha256 == file_hash
    assert job.filename == './file'
    new_result: Result = random_minimal_obj(Result)
    new_result.sha256 = file_hash
    new_result.response.service_name = 'extract'
    new_result.response.extracted = [
        dict(sha256=second_file_hash,
             name='second-*',
             description='abc',
             classification='U')
    ]
    client.service_finished(sid, 'extracted-done', new_result)

    # process the result
    disp.pull_service_results()
    disp.service_worker(disp.process_queue_index(sid))
    disp.service_worker(disp.process_queue_index(sid))

    #
    job = client.request_work('0', 'extract', '0')
    assert job.fileinfo.sha256 == second_file_hash
    assert job.filename == 'second-*'
def test_register_existing_heuristics(client, storage):
    config_block = 'A CONFIG OBJECT FOR SURE'
    storage.get_service_with_delta.return_value = config_block

    service = random_minimal_obj(Service)
    service = service.as_primitives()
    service['heuristics'] = [random_minimal_obj(Heuristic).as_primitives()]

    headers['Service-Name'] = service['name']
    headers['Service-Version'] = service['version']

    result = client.post("/api/v1/service/register/",
                         headers=headers,
                         json=service)
    assert result.status_code == 200
    assert storage.heuristic.save.call_count == 0

    assert result.json['api_response']['keep_alive'] is True
    assert len(result.json['api_response']['new_heuristics']) == 0
    assert result.json['api_response']['service_config'] == config_block
def test_register_bad_service(client, storage):
    service = random_minimal_obj(Service).as_primitives()

    headers['Service-Name'] = service['name']
    headers['Service-Version'] = service['version']
    del service['name']

    result = client.post("/api/v1/service/register/",
                         headers=headers,
                         json=service)
    assert result.status_code == 400
def test_task_ignored_then_timeout(client, dispatch_client, storage):
    # Put a task "in the queue"
    task = random_minimal_obj(Task)
    task.ignore_cache = False
    dispatch_client.request_work.side_effect = [task, None]
    dispatch_client.service_data[service_name].timeout = 100
    dispatch_client.service_data[service_name].disable_cache = False

    resp = client.get('/api/v1/task/', headers=headers)
    assert resp.status_code == 200
    assert dispatch_client.service_finished.call_count == 1
    assert not resp.json['api_response']['task']
Esempio n. 12
0
def test_ingest_simple(ingest_harness):
    datastore, ingester, in_queue = ingest_harness

    user = random_minimal_obj(User)
    user.name = 'user'
    custom_user_groups = ['users', 'the_user']
    user.groups = list(custom_user_groups)
    datastore.user.save('user', user)

    # Let the ingest loop run an extra time because we send two messages
    ingester.running.counter += 1

    # Send a message with a garbled sha, this should be dropped
    in_queue.push(make_message(files={'sha256': '1' * 10}))

    with pytest.raises(ValueError):
        # Process garbled message
        ingester.try_run(volatile=True)

    # Send a message that is fine, but has an illegal metadata field
    in_queue.push(
        make_message(dict(
            metadata={
                'tobig':
                'a' *
                (ingester.ingester.config.submission.max_metadata_length + 2),
                'small':
                '100'
            }),
                     params={
                         'submitter': 'user',
                         'groups': []
                     }))

    # Process those ok message
    ingester.try_run(volatile=True)

    mm = ingester.ingester
    # The only task that makes it through though fit these parameters
    task = mm.unique_queue.pop()
    assert task
    task = IngestTask(task)
    assert task.submission.files[
        0].sha256 == '0' * 64  # Only the valid sha passed through
    assert 'tobig' not in task.submission.metadata  # The bad metadata was stripped
    assert task.submission.metadata[
        'small'] == '100'  # The valid metadata is unchanged
    assert task.submission.params.submitter == 'user'
    assert task.submission.params.groups == custom_user_groups

    # None of the other tasks should reach the end
    assert mm.unique_queue.length() == 0
    assert mm.ingest_queue.length() == 0
def test_task_dispatch(client, dispatch_client, storage):
    # Put a task "in the queue"
    task = random_minimal_obj(Task)
    task.ignore_cache = False
    storage.result.get_if_exists.return_value = None
    storage.emptyresult.get_if_exists.return_value = None
    dispatch_client.request_work.return_value = task
    dispatch_client.service_data[service_name].timeout = 100
    dispatch_client.service_data[service_name].disable_cache = False

    resp = client.get('/api/v1/task/', headers=headers)
    assert resp.status_code == 200
    assert resp.json['api_response']['task'] == task.as_primitives()
def test_register_service_auth_fail(client, storage):
    config_block = 'A CONFIG OBJECT FOR SURE'
    storage.get_service_with_delta.return_value = config_block
    service = random_minimal_obj(Service)

    _headers = dict(headers)
    _headers['X-APIKEY'] = '10'
    _headers['Service-Name'] = service.name
    _headers['Service-Version'] = service.version

    result = client.post("/api/v1/service/register/",
                         headers=_headers,
                         json=service.as_primitives())
    assert result.status_code == 401
def test_ingest_groups_custom(ingest_harness):
    datastore, ingester, in_queue = ingest_harness

    user = random_minimal_obj(User)
    user.name = 'user'
    custom_user_groups = ['users', 'the_user']
    user.groups = list(custom_user_groups)
    datastore.user.save('user', user)

    in_queue.push(
        make_message(params={
            'submitter': 'user',
            'groups': ['group_b']
        }))
    ingester.handle_ingest()

    task = ingester.unique_queue.pop()
    assert task
    task = IngestTask(task)
    assert task.submission.params.submitter == 'user'
    assert task.submission.params.groups == ['group_b']
Esempio n. 16
0
 ("file", random_model_obj(File)),
 ("filescore", random_model_obj(FileScore)),
 ("heuristic", random_model_obj(Heuristic)),
 ("result", random_model_obj(Result)),
 ("service", random_model_obj(Service)),
 ("service_delta", random_model_obj(ServiceDelta)),
 ("signature", random_model_obj(Signature)),
 ("submission", random_model_obj(Submission)),
 ("submission_summary", random_model_obj(SubmissionSummary)),
 ("submission_tree", random_model_obj(SubmissionTree)),
 ("user", random_model_obj(User)),
 ("user_favorites", random_model_obj(UserFavorites)),
 ("user_settings", random_model_obj(UserSettings)),
 ("vm", random_model_obj(VM)),
 ("workflow", random_model_obj(Workflow)),
 ("alert_min", random_minimal_obj(Alert)),
 ("cached_file_min", random_minimal_obj(CachedFile)),
 ("emptyresult_min", random_minimal_obj(EmptyResult)),
 ("error_min", random_minimal_obj(Error)),
 ("file_min", random_minimal_obj(File)),
 ("filescore_min", random_minimal_obj(FileScore)),
 ("heuristic_min", random_minimal_obj(Heuristic)),
 ("result_min", random_minimal_obj(Result)),
 ("service_min", random_minimal_obj(Service)),
 ("service_delta_min", random_minimal_obj(ServiceDelta)),
 ("signature_min", random_minimal_obj(Signature)),
 ("submission_min", random_minimal_obj(Submission)),
 ("submission_summary_min", random_minimal_obj(SubmissionSummary)),
 ("submission_tree_min", random_minimal_obj(SubmissionTree)),
 ("user_min", random_minimal_obj(User)),
 ("user_favorites_min", random_minimal_obj(UserFavorites)),
def _create_results_for_file(ds, fs, f, possible_childs=None, log=None):
    r_list = []
    services_done = []
    section_body_format = ["TEXT", "MEMORY_DUMP", "GRAPH_DATA", "URL", "JSON", "KEY_VALUE"]
    section_depth_list = [[1, 1, 2, 3, 1], [1, 2, 1], [1, 2, 3, 1], [1, 2]]
    section_depth = random.choice(section_depth_list)
    for _ in range(random.randint(2, 5)):
        r = random_model_obj(Result)

        # Only one result per service per file
        while r.response.service_name in services_done:
            r.response.service_name = random.choice(list(SERVICES.keys()))

        for depth_id, section in enumerate(r.result.sections):
            section.depth = section_depth[depth_id % len(section_depth)]
            section.body_format = random.choice(section_body_format)
            section.heuristic.heur_id = random.choice([f"{r.response.service_name.upper()}.{x+1}" for x in range(5)])
            if section.body_format == "GRAPH_DATA":
                cmap_min = 0
                cmap_max = random.choice([5, 10, 20])
                color_map_data = {
                    'type': 'colormap',
                    'data': {
                        'domain': [cmap_min, cmap_max],
                        'values': [random.random() * cmap_max for _ in range(50)]
                    }
                }
                section.body = json.dumps(color_map_data)
            elif section.body_format == "URL":
                data = [{"url": get_random_uri()} for _ in range(random.randint(1, 4))]
                section.body = json.dumps(data)
            elif section.body_format in ["JSON", "KEY_VALUE"]:
                data = {get_random_word(): get_random_id() for _ in range(random.randint(3, 9))}
                section.body = json.dumps(data)

        services_done.append(r.response.service_name)

        # Set the sha256
        r.sha256 = f

        if random.randint(1, 10) > 8:
            # Generate and empty result
            r_key = f"{r.build_key()}.e"
            ds.emptyresult.save(r_key, random_model_obj(EmptyResult))
        else:
            r_key = r.build_key()
            # Set random extracted files that are not top level
            if not possible_childs:
                r.response.extracted = []
            else:
                for e in r.response.extracted:
                    e.sha256 = random.choice(possible_childs)

            # Set random supplementary files that are not top level
            if r.response.supplementary:
                # Edit the first file to be an ontology file
                s = r.response.supplementary[0]

                # Create a random ontology
                onto = random_minimal_obj(ResultOntology).as_primitives(strip_null=True)
                onto['header']['sha256'] = f
                onto['header']['service_name'] = r.response.service_name
                onto['header']['service_version'] = r.response.service_version
                onto['header']['service_tool_version'] = r.response.service_tool_version

                # Create it's file record
                supp_file = random_model_obj(File)
                byte_str = json.dumps(onto).encode('utf-8')
                sha256 = hashlib.sha256(byte_str).hexdigest()
                supp_file.sha256 = sha256
                ds.file.save(sha256, supp_file)
                fs.put(sha256, byte_str)

                # Add the random files
                s.sha256 = sha256
                s.name = "random.ontology"
                s.description = f"Random Ontology file for: {f}"

                r.response.supplementary = [s]

            ds.result.save(r_key, r)

        if log:
            log.info(f"\t\t\t{r_key}")
        r_list.append(r_key)

    return r_list
def make_result(file_hash, service):
    new_result: Result = random_minimal_obj(Result)
    new_result.sha256 = file_hash
    new_result.response.service_name = service
    return new_result
Esempio n. 19
0
    def run(self):
        self.log.info("Random service result generator ready!")
        self.log.info("Monitoring queues:")
        for q in self.queues:
            self.log.info(f"\t{q.name}")

        self.log.info("Waiting for messages...")
        while self.running:
            # Reset Idle flags
            for s in self.service_info:
                if s['enabled']:
                    self.service_state_hash.set(
                        f"{self.client_id}_{s['name']}",
                        (s['name'], ServiceStatus.Idle, time.time() + 30 + 5))

            message = select(*self.queues, timeout=1)
            if not message:
                continue

            archive_ts = now_as_iso(
                self.config.datastore.ilm.days_until_archive * 24 * 60 * 60)
            if self.config.submission.dtl:
                expiry_ts = now_as_iso(self.config.submission.dtl * 24 * 60 *
                                       60)
            else:
                expiry_ts = None
            queue, msg = message
            task = ServiceTask(msg)

            if not self.dispatch_client.running_tasks.add(
                    task.key(), task.as_primitives()):
                continue

            # Set service busy flag
            self.service_state_hash.set(
                f"{self.client_id}_{task.service_name}",
                (task.service_name, ServiceStatus.Running,
                 time.time() + 30 + 5))

            # METRICS
            self.counters[task.service_name].increment('execute')
            # METRICS (not caching here so always miss)
            self.counters[task.service_name].increment('cache_miss')

            self.log.info(
                f"\tQueue {queue} received a new task for sid {task.sid}.")
            action = random.randint(1, 10)
            if action >= 2:
                if action > 8:
                    result = random_minimal_obj(Result)
                else:
                    result = random_model_obj(Result)
                result.sha256 = task.fileinfo.sha256
                result.response.service_name = task.service_name
                result.archive_ts = archive_ts
                result.expiry_ts = expiry_ts
                result.response.extracted = result.response.extracted[task.
                                                                      depth +
                                                                      2:]
                result.response.supplementary = result.response.supplementary[
                    task.depth + 2:]
                result_key = Result.help_build_key(
                    sha256=task.fileinfo.sha256,
                    service_name=task.service_name,
                    service_version='0',
                    is_empty=result.is_empty())

                self.log.info(
                    f"\t\tA result was generated for this task: {result_key}")

                new_files = result.response.extracted + result.response.supplementary
                for f in new_files:
                    if not self.datastore.file.get(f.sha256):
                        random_file = random_model_obj(File)
                        random_file.archive_ts = archive_ts
                        random_file.expiry_ts = expiry_ts
                        random_file.sha256 = f.sha256
                        self.datastore.file.save(f.sha256, random_file)
                    if not self.filestore.exists(f.sha256):
                        self.filestore.put(f.sha256, f.sha256)

                time.sleep(random.randint(0, 2))

                self.dispatch_client.service_finished(task.sid, result_key,
                                                      result)

                # METRICS
                if result.result.score > 0:
                    self.counters[task.service_name].increment('scored')
                else:
                    self.counters[task.service_name].increment('not_scored')

            else:
                error = random_model_obj(Error)
                error.archive_ts = archive_ts
                error.expiry_ts = expiry_ts
                error.sha256 = task.fileinfo.sha256
                error.response.service_name = task.service_name
                error.type = random.choice(
                    ["EXCEPTION", "SERVICE DOWN", "SERVICE BUSY"])

                error_key = error.build_key('0')

                self.log.info(
                    f"\t\tA {error.response.status}:{error.type} "
                    f"error was generated for this task: {error_key}")

                self.dispatch_client.service_failed(task.sid, error_key, error)

                # METRICS
                if error.response.status == "FAIL_RECOVERABLE":
                    self.counters[task.service_name].increment(
                        'fail_recoverable')
                else:
                    self.counters[task.service_name].increment(
                        'fail_nonrecoverable')