Example #1
0
    def upload_file(self,
                    file_path,
                    classification,
                    ttl,
                    is_section_image,
                    expected_sha256=None):
        # Identify the file info of the uploaded file
        file_info = self.identify.fileinfo(file_path)

        # Validate SHA256 of the uploaded file
        if expected_sha256 is None or expected_sha256 == file_info['sha256']:
            file_info['archive_ts'] = now_as_iso(
                self.config.datastore.ilm.days_until_archive * 24 * 60 * 60)
            file_info['classification'] = classification
            if ttl:
                file_info['expiry_ts'] = now_as_iso(ttl * 24 * 60 * 60)
            else:
                file_info['expiry_ts'] = None

            # Update the datastore with the uploaded file
            self.datastore.save_or_freshen_file(
                file_info['sha256'],
                file_info,
                file_info['expiry_ts'],
                file_info['classification'],
                is_section_image=is_section_image)

            # Upload file to the filestore (upload already checks if the file exists)
            self.filestore.upload(file_path, file_info['sha256'])
        else:
            raise TaskingClientException(
                "Uploaded file does not match expected file hash. "
                f"[{file_info['sha256']} != {expected_sha256}]")
Example #2
0
def create_fake_submission(current_ds, classification, file_count,
                           res_per_file, hours_to_live):
    # Update the default submission with random values
    submission = copy.deepcopy(SUBMISSION_TEMPLATE)
    submission['times']['submitted'] = now_as_iso()
    submission['__expiry_ts__'] = now_as_iso(hours_to_live * 60 * 60)
    sid = str(uuid.uuid4())
    submission['submission']['sid'] = sid
    submission['submission']['description'] %= sid
    submission['file_count'] = file_count
    submission['classification'] = classification

    # Add files to the submissions
    for _ in xrange(file_count):
        file_obj = create_fake_file()
        current_ds.save_or_freshen_file(file_obj['sha256'], file_obj,
                                        now_as_iso(hours_to_live * 60 * 60),
                                        classification)
        submission['files'].append((file_obj['sha256'], file_obj['sha256']))

        # Add results to files
        for y in xrange(res_per_file):
            res_obj, config_key = create_fake_result(y, classification,
                                                     file_obj['sha256'],
                                                     hours_to_live)
            res_key = current_ds.save_result(
                res_obj['response']['service_name'],
                res_obj['response']['service_version'], config_key,
                file_obj['sha256'], classification, res_obj)
            submission['results'].append(res_key)

    submission['times']['completed'] = now_as_iso()
    current_ds.save_submission(sid, submission)
    def watch_service(self, service_name):
        service_queue = get_service_queue(service_name, self.redis)
        while self.running and not self.stop_signals[service_name].is_set():
            while service_queue.length() > self.service_limit[service_name]:
                task = self.dispatch_client.request_work(
                    'plumber',
                    service_name=service_name,
                    service_version='0',
                    blocking=False,
                    low_priority=True)
                if task is None:
                    break

                error = Error(
                    dict(
                        archive_ts=now_as_iso(
                            self.config.datastore.ilm.days_until_archive * 24 *
                            60 * 60),
                        created='NOW',
                        expiry_ts=now_as_iso(task.ttl * 24 * 60 *
                                             60) if task.ttl else None,
                        response=dict(
                            message="Task canceled due to execesive queuing.",
                            service_name=task.service_name,
                            service_version='0',
                            status='FAIL_NONRECOVERABLE',
                        ),
                        sha256=task.fileinfo.sha256,
                        type="TASK PRE-EMPTED",
                    ))

                error_key = error.build_key(task=task)
                self.dispatch_client.service_failed(task.sid, error_key, error)
            self.sleep(2)
Example #4
0
    def _handle_task_error(self, exec_time: int, task: ServiceTask,
                           error: Dict[str, Any], client_id, service_name,
                           metric_factory) -> None:
        self.log.info(
            f"[{task.sid}] {client_id} - {service_name} "
            f"failed to complete task {f' in {exec_time}ms' if exec_time else ''}"
        )

        # Add timestamps for creation, archive and expiry
        error['created'] = now_as_iso()
        error['archive_ts'] = now_as_iso(
            self.config.datastore.ilm.days_until_archive * 24 * 60 * 60)
        if task.ttl:
            error['expiry_ts'] = now_as_iso(task.ttl * 24 * 60 * 60)

        error = Error(error)
        error_key = error.build_key(
            service_tool_version=error.response.service_tool_version,
            task=task)
        self.dispatch_client.service_failed(task.sid, error_key, error)

        # Metrics
        if error.response.status == 'FAIL_RECOVERABLE':
            metric_factory.increment('fail_recoverable')
        else:
            metric_factory.increment('fail_nonrecoverable')

        self.status_table.set(
            client_id, (service_name, ServiceStatus.Idle, time.time() + 5))
def _add_initial_riak_users(alsi, client):
    htpass_users = alsi.config['auth'].get('internal', {}).get('users', [])
    # add the service api user as well
    if htpass_users:
        users = client.bucket("user", bucket_type="data")
        for user in htpass_users.itervalues():
            alsi.milestone('Creating user in riak:' + user['uname'])
            u = users.new(key=user['uname'],
                          data={
                              "api_quota":
                              user.get('api_quota', 10),
                              "agrees_with_tos":
                              user.get('agrees_with_tos', now_as_iso()),
                              "dn":
                              user.get('dn', None),
                              "uname":
                              user['uname'],
                              "name":
                              user.get('name', user['uname']),
                              "avatar":
                              user.get('avatar', None),
                              "groups":
                              user.get('groups', ["DEFAULT_GROUP"]),
                              "is_admin":
                              user.get('is_admin', False),
                              "is_active":
                              user.get('is_active', True),
                              "classification":
                              user['classification'],
                              "password":
                              get_password_hash(user.get('password', None))
                          },
                          content_type='application/json')
            u.store()
def main():
    for day in range(31):
        today = now_as_iso(24 * 60 * 60 * day)
        query = "__expiry_ts__:[%s TO %s+1DAY]" % (today, today)
        minutes_params = (
            ("rows", "0"),
            ("facet", "on"),
            ("facet.date", "__expiry_ts__"),
            ("facet.date.start", today),
            ("facet.date.end", today + "+1DAY"),
            ("facet.date.gap", "+1MINUTE"),
            ("facet.mincount", "1"),
        )
        res_minutes = datastore.direct_search("emptyresult", query, args=minutes_params)
        minutes = res_minutes.get("facet_counts", {}).get("facet_dates", {}).get("__expiry_ts__", {})
        for minute, minute_count in minutes.iteritems():
            if minute in ['end', 'gap', 'start']:
                continue

            if minute_count > 0:
                for x in datastore.stream_search('emptyresult', "__expiry_ts__:[%s TO %s+1MINUTE]" % (minute, minute)):
                    try:
                        created = epoch_to_iso(iso_to_epoch(today) - (15 * 24 * 60 * 60))
                        riak_key = x['_yz_rk']

                        path = os.path.join(directory, created[:10]) + '.index'
                        fh = get_filehandle(path)

                        fh.write(riak_key + "\n")
                        fh.flush()

                    except:  # pylint: disable=W0702
                        logger.exception('Unhandled exception:')
Example #7
0
def _reset_service_updates(signature_type):
    service_updates = Hash(
        'service-updates',
        get_client(
            host=config.core.redis.persistent.host,
            port=config.core.redis.persistent.port,
            private=False,
        ))

    for svc in service_updates.items():
        if svc.lower() == signature_type.lower():
            update_data = service_updates.get(svc)
            update_data['next_update'] = now_as_iso(120)
            update_data['previous_update'] = now_as_iso(-10**10)
            service_updates.set(svc, update_data)
            break
Example #8
0
def calculate_solr_metrics(es, cur_ip, cur_host):
    log.info("Starting solr metrics gathering...")
    excluded = [
        'searcherName', 'reader', 'indexVersion', 'openedAt', 'registeredAt',
        'readerDir', 'handlerStart', 'caching'
    ]
    stats = {}
    cores = get_cores()
    for core in cores:
        info = fetch_info(core)
        if info:
            for item in info.get('solr-mbeans'):
                if isinstance(item, dict):
                    for k, v in item.iteritems():
                        k = k.replace("/", "")
                        stats[k] = {
                            x: y
                            for x, y in v['stats'].iteritems()
                            if x not in excluded
                        }

        stats['host'] = cur_host
        stats['ip'] = cur_ip
        stats['core'] = core
        stats['timestamp'] = now_as_iso()

        try:
            es.create("solr-%s" % now_as_local()[:10].replace("-", "."),
                      "solr", stats)
        except Exception as e:
            log.exception(e)

    log.info("Solr metrics sent to elasticsearch... Waiting for next run.")
Example #9
0
def test_set_workflow(datastore, login_session):
    _, session, host = login_session

    workflow_id = random.choice(workflow_list)
    workflow_data = datastore.workflow.get(workflow_id, as_obj=False)
    workflow_data['edited_by'] = 'admin'
    workflow_data['hit_count'] = 111
    workflow_data['last_seen'] = now_as_iso()
    workflow_data['query'] = "query:[1 AND 'THIS IS INVALID'"

    with pytest.raises(APIError):
        resp = get_api_data(session,
                            f"{host}/api/v4/workflow/{workflow_id}/",
                            method="POST",
                            data=json.dumps(workflow_data))

    workflow_data['query'] = "file.sha256:12*"
    resp = get_api_data(session,
                        f"{host}/api/v4/workflow/{workflow_id}/",
                        method="POST",
                        data=json.dumps(workflow_data))
    assert resp['success']

    datastore.workflow.commit()
    new_workflow = datastore.workflow.get(workflow_id, as_obj=False)
    new_workflow['last_edit'] = workflow_data['last_edit']
    assert workflow_data == new_workflow
def test_service_changes(updater: run_updater.ServiceUpdater):
    ds: MockDatastore = updater.datastore.ds
    # Base conditions, nothing anywhere
    assert updater.services.length() == 0
    assert len(updater.datastore.list_all_services()) == 0

    # Nothing does nothing
    updater.sync_services()
    assert updater.services.length() == 0
    assert len(updater.datastore.list_all_services()) == 0

    # Any non-disabled services should be picked up by the updater
    create_services(updater.datastore, limit=1)
    for data in ds._collections['service']._docs.values():
        data.enabled = True
        updater._service_stage_hash.set(data.name, ServiceStage.Update)
        data.update_config = random_model_obj(UpdateConfig)
    assert len(updater.datastore.list_all_services(full=True)) == 1
    updater.sync_services()
    assert updater.services.length() == 1
    assert len(updater.datastore.list_all_services(full=True)) == 1

    # It should be scheduled to update ASAP
    for data in updater.services.items().values():
        assert data['next_update'] <= now_as_iso()

    # Disable the service and it will disappear from redis
    for data in ds._collections['service']._docs.values():
        data.enabled = False
    updater.sync_services()
    assert updater.services.length() == 0
    assert len(updater.datastore.list_all_services(full=True)) == 1
Example #11
0
def get_stats_for_fields(fields, query, tc_start, tc, access_control):
    if not tc_start and "no_delay" not in request.args and config.core.alerter.delay != 0:
        tc_start = now_as_iso(config.core.alerter.delay * -1)
    if tc and config.ui.read_only:
        tc += config.ui.read_only_offset
    timming_filter = get_timming_filter(tc_start, tc)

    filters = [x for x in request.args.getlist("fq") if x != ""]
    if timming_filter:
        filters.append(timming_filter)

    try:
        if isinstance(fields, list):
            with concurrent.futures.ThreadPoolExecutor(
                    len(fields)) as executor:
                res = {
                    field: executor.submit(STORAGE.alert.facet,
                                           field,
                                           query=query,
                                           filters=filters,
                                           limit=100,
                                           access_control=access_control)
                    for field in fields
                }

            return make_api_response({k: v.result() for k, v in res.items()})
        else:
            return make_api_response(
                STORAGE.alert.facet(fields,
                                    query=query,
                                    filters=filters,
                                    limit=100,
                                    access_control=access_control))
    except SearchException as e:
        return make_api_response("", f"SearchException: {e}", 400)
def _merge_safe_hashes(new, old):
    try:
        # Check if hash types match
        if new['type'] != old['type']:
            raise InvalidSafehash(
                f"Safe hash type mismatch: {new['type']} != {old['type']}")

        # Use max classification
        old['classification'] = CLASSIFICATION.max_classification(
            old['classification'], new['classification'])

        # Update updated time
        old['updated'] = now_as_iso()

        # Update hashes
        old['hashes'].update(new['hashes'])

        # Update type specific info
        if old['type'] == 'file':
            old.setdefault('file', {})
            new_names = new.get('file', {}).pop('name', [])
            if 'name' in old['file']:
                for name in new_names:
                    if name not in old['file']['name']:
                        old['file']['name'].append(name)
            elif new_names:
                old['file']['name'] = new_names
            old['file'].update(new.get('file', {}))
        elif old['type'] == 'tag':
            old['tag'] = new['tag']

        # Merge sources
        src_map = {x['name']: x for x in new['sources']}
        if not src_map:
            raise InvalidSafehash("No valid source found")

        old_src_map = {x['name']: x for x in old['sources']}
        for name, src in src_map.items():
            src_cl = src.get('classification', None)
            if src_cl:
                old['classification'] = CLASSIFICATION.max_classification(
                    old['classification'], src_cl)

            if name not in old_src_map:
                old_src_map[name] = src
            else:
                old_src = old_src_map[name]
                if old_src['type'] != src['type']:
                    raise InvalidSafehash(
                        f"Source {name} has a type conflict: {old_src['type']} != {src['type']}"
                    )

                for reason in src['reason']:
                    if reason not in old_src['reason']:
                        old_src['reason'].append(reason)
        old['sources'] = old_src_map.values()
        return old
    except Exception as e:
        raise InvalidSafehash(f"Invalid data provided: {str(e)}")
    def run_archive_once(self):
        reached_max = False
        if not self.config.datastore.ilm.enabled:
            return reached_max

        now = now_as_iso()
        # Archive data
        for collection in self.archiveable_collections:
            # Call heartbeat pre-dated by 5 minutes. If a collection takes more than
            # 5 minutes to expire, this container could be seen as unhealthy. The down
            # side is if it is stuck on something it will be more than 5 minutes before
            # the container is restarted.
            self.heartbeat(int(time.time() + 5 * 60))

            # Start of expiry transaction
            if self.apm_client:
                self.apm_client.begin_transaction("Archive older documents")

            archive_query = f"archive_ts:[* TO {now}]"
            sort = ["archive_ts asc", "id asc"]

            number_to_archive = collection.search(
                archive_query,
                rows=0,
                as_obj=False,
                use_archive=False,
                sort=sort,
                track_total_hits=ARCHIVE_SIZE)['total']

            if number_to_archive == ARCHIVE_SIZE:
                reached_max = True

            if self.apm_client:
                elasticapm.label(query=archive_query)
                elasticapm.label(number_to_archive=number_to_archive)

            self.log.info(f"Processing collection: {collection.name}")
            if number_to_archive != 0:
                # Proceed with archiving
                if collection.archive(archive_query,
                                      max_docs=number_to_archive,
                                      sort=sort):
                    self.counter_archive.increment(
                        f'{collection.name}', increment_by=number_to_archive)
                    self.log.info(
                        f"    Archived {number_to_archive} documents...")
                else:
                    self.log.warning(
                        f"    Failed to properly archive {number_to_archive} documents..."
                    )

            else:
                self.log.debug("    Nothing to archive in this collection.")

            # End of expiry transaction
            if self.apm_client:
                self.apm_client.end_transaction(collection.name, 'archived')

        return reached_max
Example #14
0
def test_isotime_iso():
    iso_date = now_as_iso()
    iso_format = re.compile(r'[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{6}Z')

    assert isinstance(iso_date, str)
    assert iso_format.match(iso_date)
    assert epoch_to_iso(iso_to_epoch(iso_date)) == iso_date
    assert iso_date == epoch_to_iso(local_to_epoch(epoch_to_local(iso_to_epoch(iso_date))))
Example #15
0
 def _build_heartbeat(self):
     heartbeat = {'mac': self.mac, 'time': isotime.now_as_iso(), 'resources': {
         'cpu_usage.percent': psutil.cpu_percent(),
         'mem_usage.percent': psutil.phymem_usage().percent,
         'disk_usage.percent': psutil.disk_usage('/').percent,
         'disk_usage.free': psutil.disk_usage('/').free
     }}
     return heartbeat
    def _create_aggregated_metrics(self):
        self.log.info("Copying counters ...")
        # APM Transaction start
        if self.apm_client:
            self.apm_client.begin_transaction('metrics')

        with self.counters_lock:
            counter_copy, self.counters = self.counters, {}

        self.log.info("Aggregating metrics ...")
        timestamp = now_as_iso()
        for component, counts in counter_copy.items():
            component_name, component_type = component
            output_metrics = {'name': component_name, 'type': component_type}

            for key, value in counts.items():
                # Skip counts, they will be paired with a time entry and we only want to count it once
                if key.endswith('.c'):
                    continue
                # We have an entry that is a timer, should also have a .c count
                elif key.endswith('.t'):
                    name = key.rstrip('.t')
                    output_metrics[name] = counts[key] / counts.get(
                        name + ".c", 1)
                    output_metrics[name + "_count"] = counts.get(
                        name + ".c", 0)
                # Plain old metric, no modifications needed
                else:
                    output_metrics[key] = value

            ensure_indexes(self.log,
                           self.es,
                           self.config.core.metrics.elasticsearch,
                           [component_type],
                           datastream_enabled=self.is_datastream)

            index = f"al_metrics_{component_type}"
            # Were data streams created for the index specified?
            try:
                if self.es.indices.get_index_template(name=f"{index}_ds"):
                    output_metrics['@timestamp'] = timestamp
                    index = f"{index}_ds"
            except elasticsearch.exceptions.TransportError:
                pass
            output_metrics['timestamp'] = timestamp
            output_metrics = cleanup_metrics(output_metrics)

            self.log.info(output_metrics)
            with_retries(self.log,
                         self.es.index,
                         index=index,
                         body=output_metrics)

        self.log.info("Metrics aggregated. Waiting for next run...")

        # APM Transaction end
        if self.apm_client:
            self.apm_client.end_transaction('aggregate_metrics', 'success')
    def run_update(self, service_name):
        """Common setup and tear down for all update types."""
        # noinspection PyBroadException
        try:
            # Check for new update with service specified update method
            service = self.datastore.get_service_with_delta(service_name)
            update_method = service.update_config.method
            update_data = self.services.get(service_name)
            update_hash = None

            try:
                # Actually run the update method
                if update_method == 'run':
                    update_hash = self.do_file_update(
                        service=service,
                        previous_hash=update_data['sha256'],
                        previous_update=update_data['previous_update'])
                elif update_method == 'build':
                    update_hash = self.do_build_update()

                # If we have performed an update, write that data
                if update_hash is not None and update_hash != update_data[
                        'sha256']:
                    update_data['sha256'] = update_hash
                    update_data['previous_update'] = now_as_iso()
                else:
                    update_hash = None

            finally:
                # Update the next service update check time, don't update the config_hash,
                # as we don't want to disrupt being re-run if our config has changed during this run
                update_data['next_update'] = now_as_iso(
                    service.update_config.update_interval_seconds)
                self.services.set(service_name, update_data)

            if update_hash:
                self.log.info(
                    f"New update applied for {service_name}. Restarting service."
                )
                self.controller.restart(service_name=service_name)

        except BaseException:
            self.log.exception(
                "An error occurred while running an update for: " +
                service_name)
def list_alerts(**kwargs):
    """
    List all alert in the system (per page)

    Variables:
    None

    Arguments:
    fq                => Post filter queries (you can have multiple of those)
    q                 => Query to apply to the alert list
    no_delay          => Do not delay alerts
    offset            => Offset at which we start giving alerts
    rows              => Numbers of alerts to return
    tc_start          => Time offset at which we start the time constraint
    tc                => Time constraint applied to the API
    use_archive       => List alerts from archive as well (Default: False)
    track_total_hits  => Track the total number of item that match the query (Default: 10 000)

    Data Block:
    None

    API call example:
    /api/v4/alert/list/

    Result example:
    {"total": 201,                # Total alerts found
     "offset": 0,                 # Offset in the alert list
     "count": 100,                # Number of alerts returned
     "items": []                  # List of alert blocks
    }
    """
    user = kwargs['user']

    use_archive = request.args.get('use_archive', 'false').lower() in ['true', '']
    offset = int(request.args.get('offset', 0))
    rows = int(request.args.get('rows', 100))
    query = request.args.get('q', "alert_id:*") or "alert_id:*"
    tc_start = request.args.get('tc_start', None)
    if not tc_start and "no_delay" not in request.args and config.core.alerter.delay != 0:
        tc_start = now_as_iso(config.core.alerter.delay * -1)
    tc = request.args.get('tc', None)
    if tc and config.ui.read_only:
        tc += config.ui.read_only_offset
    timming_filter = get_timming_filter(tc_start, tc)
    track_total_hits = request.args.get('track_total_hits', False)

    filters = [x for x in request.args.getlist("fq") if x != ""]
    if timming_filter:
        filters.append(timming_filter)

    try:
        return make_api_response(STORAGE.alert.search(
            query, offset=offset, rows=rows, fl="*", sort="reporting_ts desc",
            access_control=user['access_control'],
            filters=filters, as_obj=False, use_archive=use_archive, track_total_hits=track_total_hits))
    except SearchException as e:
        return make_api_response("", f"SearchException: {e}", 400)
    def save(self, cache_key: str, data: AnyStr, ttl=DEFAULT_CACHE_LEN, force=False):
        if not COMPONENT_VALIDATOR.match(cache_key):
            raise ValueError("Invalid cache_key for cache item. "
                             "(Only letters, numbers, underscores and dots allowed)")

        new_key = f"{self.component}_{cache_key}" if self.component else cache_key

        self.datastore.cached_file.save(new_key, {'expiry_ts': now_as_iso(ttl), 'component': self.component})
        self.filestore.put(new_key, data, force=force)
    def touch(self, cache_key: str, ttl=DEFAULT_CACHE_LEN):
        if not COMPONENT_VALIDATOR.match(cache_key):
            raise ValueError("Invalid cache_key for cache item. "
                             "(Only letters, numbers, underscores and dots allowed)")
        if not self.exists(cache_key):
            raise KeyError(cache_key)

        new_key = f"{self.component}_{cache_key}" if self.component else cache_key
        self.datastore.cached_file.save(new_key, {'expiry_ts': now_as_iso(ttl), 'component': self.component})
Example #21
0
    def save_or_freshen_file(self,
                             sha256,
                             fileinfo,
                             expiry,
                             classification,
                             cl_engine=forge.get_classification(),
                             redis=None):
        with Lock(f'save-or-freshen-file-{sha256}', 5, host=redis):
            current_fileinfo = self.ds.file.get(
                sha256, as_obj=False, force_archive_access=True) or {}

            # Remove control fields from file info and update current file info
            for x in ['classification', 'expiry_ts', 'seen', 'archive_ts']:
                fileinfo.pop(x, None)
            current_fileinfo.update(fileinfo)

            current_fileinfo['archive_ts'] = now_as_iso(days_until_archive *
                                                        24 * 60 * 60)

            # Update expiry time
            if isinstance(expiry, datetime):
                expiry = expiry.strftime(DATEFORMAT)
            current_expiry = current_fileinfo.get('expiry_ts', expiry)
            if current_expiry and expiry:
                current_fileinfo['expiry_ts'] = max(current_expiry, expiry)
            else:
                current_fileinfo['expiry_ts'] = None

            # Update seen counters
            now = now_as_iso()
            current_fileinfo['seen'] = seen = current_fileinfo.get('seen', {})
            seen['count'] = seen.get('count', 0) + 1
            seen['last'] = now
            seen['first'] = seen.get('first', now)

            # Update Classification
            classification = cl_engine.min_classification(
                str(current_fileinfo.get('classification', classification)),
                str(classification))
            current_fileinfo['classification'] = classification
            self.ds.file.save(sha256,
                              current_fileinfo,
                              force_archive_access=True)
Example #22
0
    def start(self, service_default_result_classification: Classification,
              service_version: str, service_tool_version: Optional[str] = None) -> None:
        self.service_version = service_version
        self.service_tool_version = service_tool_version
        self.service_default_result_classification = service_default_result_classification

        self._service_started = now_as_iso()

        self.clear_extracted()
        self.clear_supplementary()
Example #23
0
    def _compile_rules(self, rules_txt):
        tmp_dir = tempfile.mkdtemp(dir='/tmp')
        try:
            # Extract the first line of the rules which should look like this:
            # // Signatures last updated: LAST_UPDATE_IN_ISO_FORMAT
            first_line, clean_data = rules_txt.split('\n', 1)
            prefix = '// Signatures last updated: '

            if first_line.startswith(prefix):
                last_update = first_line.replace(prefix, '')
            else:
                self.log.warning(
                    "Couldn't read last update time from %s", rules_txt[:40]
                )
                last_update = now_as_iso()
                clean_data = rules_txt

            rules_file = os.path.join(tmp_dir, 'rules.yar')
            with open(rules_file, 'w') as f:
                f.write(rules_txt)
            try:
                validate = YaraValidator(externals=self.get_yara_externals, logger=self.log)
                edited = validate.validate_rules(rules_file, datastore=True)
            except Exception as e:
                raise e
            # Grab the final output if Yara Validator found problem rules
            if edited:
                with open(rules_file, 'r') as f:
                    sdata = f.read()
                first_line, clean_data = sdata.split('\n', 1)
                if first_line.startswith(prefix):
                    last_update = first_line.replace(prefix, '')
                else:
                    last_update = now_as_iso()
                    clean_data = sdata

            rules = yara.compile(rules_file, externals=self.get_yara_externals)
            rules_md5 = md5(clean_data).hexdigest()
            return last_update, rules, rules_md5
        except Exception as e:
            raise e
        finally:
            shutil.rmtree(tmp_dir)
Example #24
0
    def cleanup_stale(self):
        # We want containers that are updater managed, already finished, and exited five minutes ago.
        # The reason for the delay is in development systems people may want to check the output
        # of failed update containers they are working on launching with the updater.
        filters = {'label': 'updater_launched=true', 'status': 'exited'}
        time_mark = isotime.now_as_iso(-60*5)

        for container in self.client.containers.list(all=True, ignore_removed=True, filters=filters):
            if container.attrs['State'].get('FinishedAt', '9999') < time_mark:
                container.remove()
    def try_run(self):
        # Get an initial list of all the service queues
        service_queues = {queue.decode('utf-8').lstrip('service-queue-'): None
                          for queue in self.redis.keys(service_queue_name('*'))}

        while self.running:
            self.heartbeat()
            # Reset the status of the service queues
            service_queues = {service_name: None for service_name in service_queues}

            # Update the service queue status based on current list of services
            for service in self.datastore.list_all_services(full=True):
                service_queues[service.name] = service

            for service_name, service in service_queues.items():
                if not service or not service.enabled or self.get_service_stage(service_name) != ServiceStage.Running:
                    while True:
                        task = self.dispatch_client.request_work(None, service_name=service_name,
                                                                 service_version='0', blocking=False)
                        if task is None:
                            break

                        error = Error(dict(
                            archive_ts=now_as_iso(self.config.datastore.ilm.days_until_archive * 24 * 60 * 60),
                            created='NOW',
                            expiry_ts=now_as_iso(task.ttl * 24 * 60 * 60) if task.ttl else None,
                            response=dict(
                                message='The service was disabled while processing this task.',
                                service_name=task.service_name,
                                service_version='0',
                                status='FAIL_NONRECOVERABLE',
                            ),
                            sha256=task.fileinfo.sha256,
                            type="TASK PRE-EMPTED",
                        ))

                        error_key = error.build_key(task=task)

                        self.dispatch_client.service_failed(task.sid, error_key, error)

            # Wait a while before checking status of all services again
            time.sleep(self.delay)
Example #26
0
    def check(self, value, **kwargs):
        if value is None:
            return None

        if value == "NOW":
            value = now_as_iso()

        try:
            return datetime.strptime(value, DATEFORMAT).replace(tzinfo=UTC_TZ)
        except (TypeError, ValueError):
            return arrow.get(value).datetime
Example #27
0
def CreateEntry(dispatcher, task, now):
    parent = None
    psrl = task.psrl
    sid = task.sid
    srl = task.srl

    if not psrl:
        if task.quota_item and task.submitter:
            log.info("Submission %s counts toward quota for %s", sid,
                     task.submitter)
            Hash('submissions-' + task.submitter,
                 **persistent).add(sid, now_as_iso())
    else:
        # This task has a parent.
        try:
            parent = dispatcher.entries[sid][psrl]
            parent_task = parent.task

            # Child inherits parent's selected (and skipped) services
            # + any specifically added by the child.
            task.selected = (task.selected or []) + \
                (parent_task.selected or []) + (parent_task.skipped or [])

            if eligible_parent(dispatcher.service_manager, task):
                # Child inherits parent's excluded services
                # + any specifically excluded by the child.
                task.excluded = (task.excluded or []) + \
                    (parent_task.excluded or [])
            else:
                task.excluded = task.selected
        except KeyError:
            # Couldn't find parent. It might have been filtered out.
            dispatcher.debug("Couldn't find parent (%s) of %s/%s", psrl, sid,
                             srl)
            return None

    # Create acked, completed, dispatched and outstanding service structures.
    a, c, d, o = dispatcher.service_manager.determine_services(task, now)

    # Make sure the initial score is set to 0.
    task.score = 0
    task.max_score = 0

    # Tuples are immutable so we have to store the entry's stage on the task.
    task.stage = 0
    entry = Entry(dispatcher, task, {}, [], {}, {}, {}, a, c, d, o)
    if parent:
        # Set up parent/child links.
        entry.parents.append(parent)
        if not srl in parent.outstanding_children and \
            not srl in parent.completed_children:
            parent.outstanding_children[srl] = entry

    return entry
Example #28
0
    def _notify_drop(self, task: IngestTask):
        self.send_notification(task)

        c12n = task.params.classification
        expiry = now_as_iso(86400)
        sha256 = task.submission.files[0].sha256

        self.datastore.save_or_freshen_file(sha256, {'sha256': sha256},
                                            expiry,
                                            c12n,
                                            redis=self.redis)
Example #29
0
def get_or_create_summary(sid, results, user_classification, completed):
    user_classification = CLASSIFICATION.normalize_classification(user_classification, long_format=False)
    cache_key = f"{sid}_{user_classification}_with_sections"
    for illegal_char in [" ", ":", "/"]:
        cache_key = cache_key.replace(illegal_char, "")

    summary_cache = STORAGE.submission_summary.get_if_exists(cache_key, as_obj=False)

    if not summary_cache:
        summary = STORAGE.get_summary_from_keys(
            results, cl_engine=CLASSIFICATION, user_classification=user_classification,
            keep_heuristic_sections=True)

        expiry = now_as_iso(config.datastore.ilm.days_until_archive * 24 * 60 * 60)
        partial = not completed or "missing_results" in summary or "missing_files" in summary

        # Do not cache partial summary
        if not partial:
            summary_cache = {
                "attack_matrix": json.dumps(summary['attack_matrix']),
                "tags": json.dumps(summary['tags']),
                "expiry_ts": expiry,
                "heuristics": json.dumps(summary['heuristics']),
                "classification": summary['classification'],
                "filtered": summary["filtered"],
                "heuristic_sections": json.dumps(summary['heuristic_sections']),
                "heuristic_name_map": json.dumps(summary['heuristic_name_map'])
            }
            STORAGE.submission_summary.save(cache_key, summary_cache)

        return {
            "attack_matrix": summary['attack_matrix'],
            "tags": summary['tags'],
            "expiry_ts": expiry,
            "heuristics": summary['heuristics'],
            "classification": summary['classification'],
            "filtered": summary["filtered"],
            "partial": partial,
            "heuristic_sections": summary['heuristic_sections'],
            "heuristic_name_map": summary['heuristic_name_map']
        }

    return {
        "attack_matrix": json.loads(summary_cache['attack_matrix']),
        "tags": json.loads(summary_cache['tags']),
        "expiry_ts": summary_cache["expiry_ts"],
        "heuristics": json.loads(summary_cache['heuristics']),
        "classification": summary_cache['classification'],
        "filtered": summary_cache["filtered"],
        "partial": False,
        "heuristic_sections": json.loads(summary_cache['heuristic_sections']),
        "heuristic_name_map": json.loads(summary_cache['heuristic_name_map'])
    }
Example #30
0
def edit_workflow(workflow_id, **kwargs):
    """
    Edit a workflow.

    Variables:
    workflow_id    => ID of the workflow to edit

    Arguments:
    None

    Data Block:
    {
     "name": "Workflow name",    # Name of the workflow
     "classification": "",       # Max classification for workflow
     "label": ['label1'],        # Labels for the workflow
     "priority": "LOW",          # Priority of the workflow
     "status": "MALICIOUS",      # Status of the workflow
     "query": "*:*"              # Query to match the data
    }

    Result example:
    {
     "success": true             # Saving the user info succeded
    }
    """
    data = request.json
    name = data.get('name', None)
    query = data.get('query', None)

    if not name:
        return make_api_response({"success": False}, err="Name field is required", status_code=400)

    if not query:
        return make_api_response({"success": False}, err="Query field is required", status_code=400)

    if not verify_query(query):
        return make_api_response({"success": False}, err="Query contains an error", status_code=400)

    wf = STORAGE.workflow.get(workflow_id, as_obj=False)
    if wf:
        uname = kwargs['user']['uname']
        wf.update(data)
        wf.update({
            "edited_by": uname,
            "last_edit": now_as_iso(),
            "workflow_id": workflow_id
        })

        return make_api_response({"success": STORAGE.workflow.save(workflow_id, wf)})
    else:
        return make_api_response({"success": False},
                                 err="Workflow ID %s does not exist" % workflow_id,
                                 status_code=404)