def __init__(self, sender, log, config=None, redis=None): self.sender = sender self.log = log self.config = config or forge.get_config() self.datastore = forge.get_datastore(self.config) self.redis = redis or get_client( host=self.config.core.redis.nonpersistent.host, port=self.config.core.redis.nonpersistent.port, private=False, ) self.redis_persist = get_client( host=self.config.core.redis.persistent.host, port=self.config.core.redis.persistent.port, private=False, ) self.status_queue = CommsQueue(STATUS_QUEUE, self.redis) self.dispatch_active_hash = Hash(DISPATCH_TASK_HASH, self.redis_persist) self.dispatcher_submission_queue = NamedQueue(SUBMISSION_QUEUE, self.redis) self.ingest_scanning = Hash('m-scanning-table', self.redis_persist) self.ingest_unique_queue = PriorityQueue('m-unique', self.redis_persist) self.ingest_queue = NamedQueue(INGEST_QUEUE_NAME, self.redis_persist) self.ingest_complete_queue = NamedQueue(COMPLETE_QUEUE_NAME, self.redis) self.alert_queue = NamedQueue(ALERT_QUEUE_NAME, self.redis_persist) constants = forge.get_constants(self.config) self.c_rng = constants.PRIORITY_RANGES['critical'] self.h_rng = constants.PRIORITY_RANGES['high'] self.m_rng = constants.PRIORITY_RANGES['medium'] self.l_rng = constants.PRIORITY_RANGES['low'] self.c_s_at = self.config.core.ingester.sampling_at['critical'] self.h_s_at = self.config.core.ingester.sampling_at['high'] self.m_s_at = self.config.core.ingester.sampling_at['medium'] self.l_s_at = self.config.core.ingester.sampling_at['low'] self.to_expire = {k: 0 for k in metrics.EXPIRY_METRICS} if self.config.core.expiry.batch_delete: self.delete_query = f"expiry_ts:[* TO {self.datastore.ds.now}-{self.config.core.expiry.delay}" \ f"{self.datastore.ds.hour}/DAY]" else: self.delete_query = f"expiry_ts:[* TO {self.datastore.ds.now}-{self.config.core.expiry.delay}" \ f"{self.datastore.ds.hour}]" self.scheduler = BackgroundScheduler(daemon=True) self.scheduler.add_job( self._reload_expiry_queues, 'interval', seconds=self.config.core.metrics.export_interval * 4) self.scheduler.start()
from assemblyline.common import forge from assemblyline.common.digests import get_sha256_for_file from assemblyline.common.identify_defaults import magic_patterns, trusted_mimes from assemblyline.common.str_utils import safe_str from assemblyline.odm.models.tagging import Tagging from assemblyline.remote.datatypes.events import EventSender from assemblyline_ui.config import STORAGE, UI_MESSAGING, config from assemblyline_ui.api.base import api_login, make_api_response, make_subapi_blueprint SUB_API = 'system' system_api = make_subapi_blueprint(SUB_API, api_version=4) system_api._doc = "Perform system actions" ADMIN_FILE_TTL = 60 * 60 * 24 * 365 * 100 # Just keep the file for 100 years... al_re = re.compile(r"^[a-z]+(?:/[a-z0-9\-.+]+)+$") constants = forge.get_constants() event_sender = EventSender('system', host=config.core.redis.nonpersistent.host, port=config.core.redis.nonpersistent.port) @system_api.route("/system_message/", methods=["DELETE"]) @api_login(require_type=['admin'], required_priv=['W']) def clear_system_message(**_): """ Clear the current system message Variables: None
def __init__(self, datastore, logger, classification=None, redis=None, persistent_redis=None, metrics_name='ingester'): self.datastore = datastore self.log = logger # Cache the user groups self.cache_lock = threading.RLock( ) # TODO are middle man instances single threaded now? self._user_groups = {} self._user_groups_reset = time.time() // HOUR_IN_SECONDS self.cache = {} self.notification_queues = {} self.whitelisted = {} self.whitelisted_lock = threading.RLock() # Create a config cache that will refresh config values periodically self.config = forge.CachedObject(forge.get_config) # Module path parameters are fixed at start time. Changing these involves a restart self.is_low_priority = load_module_by_path( self.config.core.ingester.is_low_priority) self.get_whitelist_verdict = load_module_by_path( self.config.core.ingester.get_whitelist_verdict) self.whitelist = load_module_by_path( self.config.core.ingester.whitelist) # Constants are loaded based on a non-constant path, so has to be done at init rather than load constants = forge.get_constants(self.config) self.priority_value = constants.PRIORITIES self.priority_range = constants.PRIORITY_RANGES self.threshold_value = constants.PRIORITY_THRESHOLDS # Connect to the redis servers self.redis = redis or get_client( host=self.config.core.redis.nonpersistent.host, port=self.config.core.redis.nonpersistent.port, private=False, ) self.persistent_redis = persistent_redis or get_client( host=self.config.core.redis.persistent.host, port=self.config.core.redis.persistent.port, private=False, ) # Classification engine self.ce = classification or forge.get_classification() # Metrics gathering factory self.counter = MetricsFactory(metrics_type='ingester', schema=Metrics, redis=self.redis, config=self.config, name=metrics_name) # State. The submissions in progress are stored in Redis in order to # persist this state and recover in case we crash. self.scanning = Hash('m-scanning-table', self.persistent_redis) # Input. The dispatcher creates a record when any submission completes. self.complete_queue = NamedQueue(_completeq_name, self.redis) # Internal. Dropped entries are placed on this queue. # self.drop_queue = NamedQueue('m-drop', self.persistent_redis) # Input. An external process places submission requests on this queue. self.ingest_queue = NamedQueue(INGEST_QUEUE_NAME, self.persistent_redis) # Output. Duplicate our input traffic into this queue so it may be cloned by other systems self.traffic_queue = CommsQueue('submissions', self.redis) # Internal. Unique requests are placed in and processed from this queue. self.unique_queue = PriorityQueue('m-unique', self.persistent_redis) # Internal, delay queue for retrying self.retry_queue = PriorityQueue('m-retry', self.persistent_redis) # Internal, timeout watch queue self.timeout_queue = PriorityQueue('m-timeout', self.redis) # Internal, queue for processing duplicates # When a duplicate file is detected (same cache key => same file, and same # submission parameters) the file won't be ingested normally, but instead a reference # will be written to a duplicate queue. Whenever a file is finished, in the complete # method, not only is the original ingestion finalized, but all entries in the duplicate queue # are finalized as well. This has the effect that all concurrent ingestion of the same file # are 'merged' into a single submission to the system. self.duplicate_queue = MultiQueue(self.persistent_redis) # Output. submissions that should have alerts generated self.alert_queue = NamedQueue(ALERT_QUEUE_NAME, self.persistent_redis) # Utility object to help submit tasks to dispatching self.submit_client = SubmissionClient(datastore=self.datastore, redis=self.redis)
import threading import uuid import zipfile from binascii import hexlify from collections import defaultdict from typing import Tuple, Union, Dict import magic import ssdeep from cart import get_metadata_only from assemblyline.common.digests import get_digests_for_file from assemblyline.common.forge import get_constants from assemblyline.common.str_utils import dotdump, safe_str constants = get_constants() STRONG_INDICATORS = { 'code/vbs': [ re.compile(rb'(^|\n)On Error Resume Next'), re.compile(rb'(^|\n)(?:Private)?[ \t]*Sub[ \t]+\w+\(*'), re.compile(rb'(^|\n)End Module'), re.compile(rb'(^|\n)ExecuteGlobal'), ], 'code/javascript': [ re.compile(rb'function([ \t]*|[ \t]+[\w]+[ \t]*)\([\w \t,]*\)[ \t]*{'), re.compile(rb'\beval[ \t]*\('), re.compile(rb'new[ \t]+ActiveXObject\('), re.compile(rb'xfa\.((resolve|create)Node|datasets|form)'), re.compile(rb'\.oneOfChild'), re.compile(rb'unescape\(')