def test_service_retry_limit(core): watch = WatcherServer(redis=core.redis, redis_persist=core.redis) watch.start() try: # This time have the service 'crash' sha, size = ready_body(core, {'pre': {'drop': 3}}) core.ingest_queue.push( SubmissionInput( dict(metadata={}, params=dict(description="file abc123", services=dict(selected=''), submitter='user', groups=['user'], max_extracted=10000), notification=dict(queue='watcher-recover', threshold=0), files=[dict(sha256=sha, size=size, name='abc123')])).as_primitives()) notification_queue = NamedQueue('nq-watcher-recover', core.redis) dropped_task = notification_queue.pop(timeout=16) assert dropped_task dropped_task = IngestTask(dropped_task) sub = core.ds.submission.get(dropped_task.submission.sid) assert len(sub.errors) == 1 assert len(sub.results) == 3 assert core.pre_service.drops[sha] == 3 assert core.pre_service.hits[sha] == 3 finally: watch.stop() watch.join()
def test_named_queue(redis_connection): if redis_connection: from assemblyline.remote.datatypes.queues.named import NamedQueue, select with NamedQueue('test-named-queue') as nq: nq.delete() for x in range(5): nq.push(x) assert nq.length() == 5 nq.push(*list(range(5))) assert nq.length() == 10 assert nq.peek_next() == nq.pop() assert nq.peek_next() == 1 v = nq.pop() assert v == 1 assert nq.peek_next() == 2 nq.unpop(v) assert nq.peek_next() == 1 assert select(nq) == ('test-named-queue', 1) with NamedQueue('test-named-queue-1') as nq1: nq1.delete() with NamedQueue('test-named-queue-2') as nq2: nq2.delete() nq1.push(1) nq2.push(2) assert select(nq1, nq2) == ('test-named-queue-1', 1) assert select(nq1, nq2) == ('test-named-queue-2', 2)
def __init__(self, working_dir, worker_count=50, spawn_workers=True, use_threading=False, logger=None): self.working_dir = working_dir self.datastore = forge.get_datastore(archive_access=True) self.logger = logger self.plist = [] self.use_threading = use_threading self.instance_id = get_random_id() self.worker_queue = NamedQueue(f"r-worker-{self.instance_id}", ttl=1800) self.done_queue = NamedQueue(f"r-done-{self.instance_id}", ttl=1800) self.hash_queue = Hash(f"r-hash-{self.instance_id}") self.bucket_error = [] self.VALID_BUCKETS = sorted(list( self.datastore.ds.get_models().keys())) self.worker_count = worker_count self.spawn_workers = spawn_workers self.total_count = 0 self.error_map_count = {} self.missing_map_count = {} self.map_count = {} self.last_time = 0 self.last_count = 0 self.error_count = 0
def test_ingest_retry(core: CoreSession, metrics): # ------------------------------------------------------------------------------- # sha, size = ready_body(core) original_retry_delay = assemblyline_core.ingester.ingester._retry_delay assemblyline_core.ingester.ingester._retry_delay = 1 attempts = [] failures = [] original_submit = core.ingest.submit def fail_once(task): attempts.append(task) if len(attempts) > 1: original_submit(task) else: failures.append(task) raise ValueError() core.ingest.submit = fail_once try: core.ingest_queue.push( SubmissionInput( dict(metadata={}, params=dict( description="file abc123", services=dict(selected=''), submitter='user', groups=['user'], ), notification=dict(queue='output-queue-one', threshold=0), files=[dict(sha256=sha, size=size, name='abc123')])).as_primitives()) notification_queue = NamedQueue('nq-output-queue-one', core.redis) first_task = notification_queue.pop(timeout=RESPONSE_TIMEOUT) # One of the submission will get processed fully assert first_task is not None first_task = IngestTask(first_task) first_submission: Submission = core.ds.submission.get( first_task.submission.sid) assert len(attempts) == 2 assert len(failures) == 1 assert first_submission.state == 'completed' assert len(first_submission.files) == 1 assert len(first_submission.errors) == 0 assert len(first_submission.results) == 4 metrics.expect('ingester', 'submissions_ingested', 1) metrics.expect('ingester', 'submissions_completed', 1) metrics.expect('ingester', 'files_completed', 1) metrics.expect('ingester', 'duplicates', 0) metrics.expect('dispatcher', 'submissions_completed', 1) metrics.expect('dispatcher', 'files_completed', 1) finally: core.ingest.submit = original_submit assemblyline_core.ingester.ingester._retry_delay = original_retry_delay
def test_max_extracted_in_several(core): # Make a set of in a non trivial tree, that add up to more than 3 (max_extracted) files children = [ ready_extract( core, [ready_body(core)[0], ready_body(core)[0]])[0], ready_extract( core, [ready_body(core)[0], ready_body(core)[0]])[0] ] sha, size = ready_extract(core, children) core.ingest_queue.push( SubmissionInput( dict(metadata={}, params=dict(description="file abc123", services=dict(selected=''), submitter='user', groups=['user'], max_extracted=3), notification=dict(queue='test-extracted-in-several', threshold=0), files=[dict(sha256=sha, size=size, name='abc123')])).as_primitives()) notification_queue = NamedQueue('nq-test-extracted-in-several', core.redis) task = IngestTask(notification_queue.pop(timeout=10)) sub: Submission = core.ds.submission.get(task.submission.sid) assert len(sub.files) == 1 # We should only get results for each file up to the max depth assert len(sub.results) == 4 * ( 1 + 3) # 4 services, 1 original file, 3 extracted files assert len(sub.errors) == 3 # The number of children that errored out
def test_max_extracted_in_one(core): # Make a set of files that is bigger than max_extracted (3 in this case) children = [ready_body(core)[0] for _ in range(5)] sha, size = ready_extract(core, children) core.ingest_queue.push( SubmissionInput( dict(metadata={}, params=dict(description="file abc123", services=dict(selected=''), submitter='user', groups=['user'], max_extracted=3), notification=dict(queue='test-extracted-in-one', threshold=0), files=[dict(sha256=sha, size=size, name='abc123')])).as_primitives()) notification_queue = NamedQueue('nq-test-extracted-in-one', core.redis) start = time.time() task = notification_queue.pop(timeout=10) print("notification time waited", time.time() - start) assert task is not None task = IngestTask(task) sub: Submission = core.ds.submission.get(task.submission.sid) assert len(sub.files) == 1 # We should only get results for each file up to the max depth assert len(sub.results) == 4 * (1 + 3) assert len(sub.errors) == 2 # The number of children that errored out
def run_once(): counter.reset_mock() core.ingest_queue.push( SubmissionInput( dict(metadata={}, params=dict( description="file abc123", services=dict(selected=''), submitter='user', groups=['user'], ), notification=dict(queue='1', threshold=0), files=[dict(sha256=sha, size=size, name='abc123')])).as_primitives()) notification_queue = NamedQueue('nq-1', core.redis) first_task = notification_queue.pop(timeout=5) # One of the submission will get processed fully assert first_task is not None first_task = IngestTask(first_task) first_submission: Submission = core.ds.submission.get( first_task.submission.sid) assert first_submission.state == 'completed' assert len(first_submission.files) == 1 assert len(first_submission.errors) == 0 assert len(first_submission.results) == 4 return first_submission.sid
def __init__(self, working_dir: str, worker_count: int = 50, spawn_workers: bool = True, use_threading: bool = False, logger: logging.Logger = None): self.working_dir = working_dir self.datastore = forge.get_datastore(archive_access=True) self.logger = logger self.plist: list[Process] = [] self.use_threading = use_threading self.instance_id = get_random_id() self.worker_queue: NamedQueue[dict[str, Any]] = NamedQueue( f"r-worker-{self.instance_id}", ttl=1800) self.done_queue: NamedQueue[dict[str, Any]] = NamedQueue( f"r-done-{self.instance_id}", ttl=1800) self.hash_queue: Hash[str] = Hash(f"r-hash-{self.instance_id}") self.bucket_error: list[str] = [] self.valid_buckets: list[str] = sorted( list(self.datastore.ds.get_models().keys())) self.worker_count = worker_count self.spawn_workers = spawn_workers self.total_count = 0 self.error_map_count: dict[str, int] = {} self.missing_map_count: dict[str, int] = {} self.map_count: dict[str, int] = {} self.last_time: float = 0 self.last_count = 0 self.error_count = 0
def test_extracted_file(core, metrics): sha, size = ready_extract(core, ready_body(core)[0]) core.ingest_queue.push( SubmissionInput( dict(metadata={}, params=dict(description="file abc123", services=dict(selected=''), submitter='user', groups=['user'], max_extracted=10000), notification=dict(queue='text-extracted-file', threshold=0), files=[dict(sha256=sha, size=size, name='abc123')])).as_primitives()) notification_queue = NamedQueue('nq-text-extracted-file', core.redis) task = notification_queue.pop(timeout=RESPONSE_TIMEOUT) assert task task = IngestTask(task) sub = core.ds.submission.get(task.submission.sid) assert len(sub.files) == 1 assert len(sub.results) == 8 assert len(sub.errors) == 0 metrics.expect('ingester', 'submissions_ingested', 1) metrics.expect('ingester', 'submissions_completed', 1) metrics.expect('dispatcher', 'submissions_completed', 1) metrics.expect('dispatcher', 'files_completed', 2)
def get_all_messages(notification_queue, **kwargs): """ Get all messages on the specified notification queue Variables: complete_queue => Queue to get the message from Arguments: None Data Block: None Result example: [] # List of messages """ resp_list = [] u = NamedQueue("nq-%s" % notification_queue, host=config.core.redis.persistent.host, port=config.core.redis.persistent.port) while True: msg = u.pop(blocking=False) if msg is None: break resp_list.append(msg) return make_api_response(resp_list)
def __init__(self, datastore=None, redis=None, redis_persist=None, logger=None): self.config = forge.get_config() self.redis = redis or get_client( host=self.config.core.redis.nonpersistent.host, port=self.config.core.redis.nonpersistent.port, private=False, ) redis_persist = redis_persist or get_client( host=self.config.core.redis.persistent.host, port=self.config.core.redis.persistent.port, private=False, ) self.timeout_watcher = WatcherClient(redis_persist) self.submission_queue = NamedQueue(SUBMISSION_QUEUE, self.redis) self.file_queue = NamedQueue(FILE_QUEUE, self.redis) self.ds = datastore or forge.get_datastore(self.config) self.log = logger or logging.getLogger( "assemblyline.dispatching.client") self.results = datastore.result self.errors = datastore.error self.files = datastore.file self.active_submissions = ExpiringHash(DISPATCH_TASK_HASH, host=redis_persist) self.running_tasks = ExpiringHash(DISPATCH_RUNNING_TASK_HASH, host=self.redis) self.service_data = cast(Dict[str, Service], CachedObject(self._get_services))
def test_dropping_early(core, metrics): # ------------------------------------------------------------------------------- # This time have a file get marked for dropping by a service sha, size = ready_body(core, {'pre': {'result': {'drop_file': True}}}) core.ingest_queue.push( SubmissionInput( dict(metadata={}, params=dict(description="file abc123", services=dict(selected=''), submitter='user', groups=['user'], max_extracted=10000), notification=dict(queue='drop', threshold=0), files=[dict(sha256=sha, size=size, name='abc123')])).as_primitives()) notification_queue = NamedQueue('nq-drop', core.redis) dropped_task = notification_queue.pop(timeout=RESPONSE_TIMEOUT) dropped_task = IngestTask(dropped_task) sub = core.ds.submission.get(dropped_task.submission.sid) assert len(sub.files) == 1 assert len(sub.results) == 1 metrics.expect('ingester', 'submissions_ingested', 1) metrics.expect('ingester', 'submissions_completed', 1) metrics.expect('dispatcher', 'submissions_completed', 1) metrics.expect('dispatcher', 'files_completed', 1)
def test_service_retry_limit(core, metrics): # This time have the service 'crash' sha, size = ready_body(core, {'pre': {'drop': 3}}) core.ingest_queue.push( SubmissionInput( dict(metadata={}, params=dict(description="file abc123", services=dict(selected=''), submitter='user', groups=['user'], max_extracted=10000), notification=dict(queue='watcher-recover', threshold=0), files=[dict(sha256=sha, size=size, name='abc123')])).as_primitives()) notification_queue = NamedQueue('nq-watcher-recover', core.redis) dropped_task = notification_queue.pop(timeout=RESPONSE_TIMEOUT) assert dropped_task dropped_task = IngestTask(dropped_task) sub = core.ds.submission.get(dropped_task.submission.sid) assert len(sub.errors) == 1 assert len(sub.results) == 3 assert core.pre_service.drops[sha] == 3 assert core.pre_service.hits[sha] == 3 # Wait until we get feedback from the metrics channel metrics.expect('ingester', 'submissions_ingested', 1) metrics.expect('ingester', 'submissions_completed', 1) metrics.expect('dispatcher', 'service_timeouts', 3) metrics.expect('service', 'fail_recoverable', 3) metrics.expect('service', 'fail_nonrecoverable', 1) metrics.expect('dispatcher', 'submissions_completed', 1) metrics.expect('dispatcher', 'files_completed', 1)
def __init__(self, redis=None, redis_persist=None): super().__init__('assemblyline.watcher', redis=redis, redis_persist=redis_persist) # Watcher structures self.hash = ExpiringHash(name=WATCHER_HASH, ttl=MAX_TIMEOUT, host=self.redis_persist) self.queue = UniquePriorityQueue(WATCHER_QUEUE, self.redis_persist) # Task management structures self.running_tasks = ExpiringHash( DISPATCH_RUNNING_TASK_HASH, host=self.redis) # TODO, move to persistant? self.scaler_timeout_queue = NamedQueue(SCALER_TIMEOUT_QUEUE, host=self.redis_persist) # Metrics tracking self.counter = MetricsFactory(metrics_type='watcher', schema=Metrics, name='watcher', redis=self.redis, config=self.config) if self.config.core.metrics.apm_server.server_url is not None: self.log.info( f"Exporting application metrics to: {self.config.core.metrics.apm_server.server_url}" ) elasticapm.instrument() self.apm_client = elasticapm.Client( server_url=self.config.core.metrics.apm_server.server_url, service_name="watcher") else: self.apm_client = None
def test_depth_limit(core): # Make a nested set of files that goes deeper than the max depth by one sha, size = ready_body(core) for _ in range(core.config.submission.max_extraction_depth + 1): sha, size = ready_extract(core, sha) core.ingest_queue.push( SubmissionInput( dict( metadata={}, params=dict( description="file abc123", services=dict(selected=''), submitter='user', groups=['user'], # Make sure we can extract enough files that we will definitely hit the depth limit first max_extracted=core.config.submission.max_extraction_depth + 10), notification=dict(queue='test-depth-limit', threshold=0), files=[dict(sha256=sha, size=size, name='abc123')])).as_primitives()) notification_queue = NamedQueue('nq-test-depth-limit', core.redis) start = time.time() task = notification_queue.pop(timeout=10) print("notification time waited", time.time() - start) assert task is not None task = IngestTask(task) sub: Submission = core.ds.submission.get(task.submission.sid) assert len(sub.files) == 1 # We should only get results for each file up to the max depth assert len(sub.results) == 4 * core.config.submission.max_extraction_depth assert len(sub.errors) == 1
def try_run(self): counter = self.counter apm_client = self.apm_client while self.running: self.heartbeat() # Download all messages from the queue that have expired seconds, _ = retry_call(self.redis.time) messages = self.queue.dequeue_range(0, seconds) cpu_mark = time.process_time() time_mark = time.time() # Try to pass on all the messages to their intended recipient, try not to let # the failure of one message from preventing the others from going through for key in messages: # Start of transaction if apm_client: apm_client.begin_transaction('process_messages') message = self.hash.pop(key) if message: try: if message['action'] == WatcherAction.TimeoutTask: self.cancel_service_task(message['task_key'], message['worker']) else: queue = NamedQueue(message['queue'], self.redis) queue.push(message['message']) self.counter.increment('expired') # End of transaction (success) if apm_client: apm_client.end_transaction('watch_message', 'success') except Exception as error: # End of transaction (exception) if apm_client: apm_client.end_transaction('watch_message', 'error') self.log.exception(error) else: # End of transaction (duplicate) if apm_client: apm_client.end_transaction('watch_message', 'duplicate') self.log.warning( f'Handled watch twice: {key} {len(key)} {type(key)}') counter.increment_execution_time('cpu_seconds', time.process_time() - cpu_mark) counter.increment_execution_time('busy_seconds', time.time() - time_mark) if not messages: time.sleep(0.1)
def default_authenticator(auth, req, ses, storage): # This is assemblyline authentication procedure # It will try to authenticate the user in the following order until a method is successful # apikey # username/password # PKI DN # # During the authentication procedure the user/pass and DN methods will be subject to OTP challenge # if OTP is allowed on the server and has been turned on by the user # # Apikey authentication procedure is not subject to OTP challenge but has limited functionality apikey = auth.get('apikey', None) otp = auth.get('otp', 0) webauthn_auth_resp = auth.get('webauthn_auth_resp', None) state = ses.pop('state', None) password = auth.get('password', None) uname = auth.get('username', None) oauth_token = auth.get('oauth_token', None) if not uname: raise AuthenticationException('No user specified for authentication') # Bruteforce protection auth_fail_queue = NamedQueue("ui-failed-%s" % uname, **nonpersistent_config) if auth_fail_queue.length() >= config.auth.internal.max_failures: # Failed 'max_failures' times, stop trying... This will timeout in 'failure_ttl' seconds raise AuthenticationException("Maximum password retry of {retry} was reached. " "This account is locked for the next {ttl} " "seconds...".format(retry=config.auth.internal.max_failures, ttl=config.auth.internal.failure_ttl)) try: validated_user, priv = validate_apikey(uname, apikey, storage) if validated_user: return validated_user, priv validated_user, priv = validate_oauth(uname, oauth_token) if not validated_user: validated_user, priv = validate_ldapuser(uname, password, storage) if not validated_user: validated_user, priv = validate_userpass(uname, password, storage) if validated_user: validate_2fa(validated_user, otp, state, webauthn_auth_resp, storage) return validated_user, priv except AuthenticationException: # Failure appended, push failure parameters auth_fail_queue.push({ 'remote_addr': req.remote_addr, 'host': req.host, 'full_path': req.full_path }) raise raise AuthenticationException("None of the authentication methods succeeded")
def setup_watch_queue(self, sid): """ This function takes a submission ID as a parameter and creates a unique queue where all service result keys for that given submission will be returned to as soon as they come in. If the submission is in the middle of processing, this will also send all currently received keys through the specified queue so the client that requests the watch queue is up to date. :param sid: Submission ID :return: The name of the watch queue that was created """ # Create a unique queue queue_name = reply_queue_name(prefix="D", suffix="WQ") watch_queue = NamedQueue(queue_name, ttl=30) watch_queue.push( WatchQueueMessage({ 'status': 'START' }).as_primitives()) # Add the newly created queue to the list of queues for the given submission self._get_watcher_list(sid).add(queue_name) # Push all current keys to the newly created queue (Queue should have a TTL of about 30 sec to 1 minute) # Download the entire status table from redis dispatch_hash = DispatchHash(sid, self.redis) if dispatch_hash.dispatch_count( ) == 0 and dispatch_hash.finished_count() == 0: # This table is empty? do we have this submission at all? submission = self.ds.submission.get(sid) if not submission or submission.state == 'completed': watch_queue.push( WatchQueueMessage({ "status": "STOP" }).as_primitives()) else: # We do have a submission, remind the dispatcher to work on it self.submission_queue.push({'sid': sid}) else: all_service_status = dispatch_hash.all_results() for status_values in all_service_status.values(): for status in status_values.values(): if status.is_error: watch_queue.push( WatchQueueMessage({ "status": "FAIL", "cache_key": status.key }).as_primitives()) else: watch_queue.push( WatchQueueMessage({ "status": "OK", "cache_key": status.key }).as_primitives()) return queue_name
def test_plumber_clearing(core, metrics): global _global_semaphore _global_semaphore = threading.Semaphore(value=0) start = time.time() try: # Have the plumber cancel tasks sha, size = ready_body(core, {'pre': {'hold': 60}}) core.ingest_queue.push( SubmissionInput( dict(metadata={}, params=dict(description="file abc123", services=dict(selected=''), submitter='user', groups=['user'], max_extracted=10000), notification=dict(queue='test_plumber_clearing', threshold=0), files=[dict(sha256=sha, size=size, name='abc123')])).as_primitives()) metrics.expect('ingester', 'submissions_ingested', 1) service_queue = get_service_queue('pre', core.redis) start = time.time() while service_queue.length() < 1: if time.time() - start > RESPONSE_TIMEOUT: pytest.fail(f'Found { service_queue.length()}') time.sleep(0.1) service_delta = core.ds.service_delta.get('pre') service_delta['enabled'] = False core.ds.service_delta.save('pre', service_delta) notification_queue = NamedQueue('nq-test_plumber_clearing', core.redis) dropped_task = notification_queue.pop(timeout=RESPONSE_TIMEOUT) dropped_task = IngestTask(dropped_task) sub = core.ds.submission.get(dropped_task.submission.sid) assert len(sub.files) == 1 assert len(sub.results) == 3 assert len(sub.errors) == 1 error = core.ds.error.get(sub.errors[0]) assert "disabled" in error.response.message metrics.expect('ingester', 'submissions_completed', 1) metrics.expect('dispatcher', 'submissions_completed', 1) metrics.expect('dispatcher', 'files_completed', 1) metrics.expect('service', 'fail_recoverable', 1) finally: _global_semaphore.release() service_delta = core.ds.service_delta.get('pre') service_delta['enabled'] = True core.ds.service_delta.save('pre', service_delta)
def test_live_namespace(datastore, sio): wq_data = {'wq_id': get_random_id()} wq = NamedQueue(wq_data['wq_id'], private=True) start_msg = {'status_code': 200, 'msg': "Start listening..."} stop_msg = { 'status_code': 200, 'msg': "All messages received, closing queue..." } cachekey_msg = {'status_code': 200, 'msg': get_random_id()} cachekeyerr_msg = {'status_code': 200, 'msg': get_random_id()} test_res_array = [] @sio.on('start', namespace='/live_submission') def on_start(data): test_res_array.append(('on_start', data == start_msg)) @sio.on('stop', namespace='/live_submission') def on_stop(data): test_res_array.append(('on_stop', data == stop_msg)) @sio.on('cachekey', namespace='/live_submission') def on_cachekey(data): test_res_array.append(('on_cachekey', data == cachekey_msg)) @sio.on('cachekeyerr', namespace='/live_submission') def on_stop(data): test_res_array.append(('on_cachekeyerr', data == cachekeyerr_msg)) try: sio.emit('listen', wq_data, namespace='/live_submission') sio.sleep(1) wq.push({"status": "START"}) wq.push({"status": "OK", "cache_key": cachekey_msg['msg']}) wq.push({"status": "FAIL", "cache_key": cachekeyerr_msg['msg']}) wq.push({"status": "STOP"}) start_time = time.time() while len(test_res_array) < 4 and time.time() - start_time < 5: sio.sleep(0.1) assert len(test_res_array) == 4 for test, result in test_res_array: if not result: pytest.fail(f"{test} failed.") finally: sio.disconnect()
def test_plumber_clearing(core): global _global_semaphore _global_semaphore = threading.Semaphore(value=0) start = time.time() watch = WatcherServer(redis=core.redis, redis_persist=core.redis) watch.start() try: # Have the plumber cancel tasks sha, size = ready_body(core, {'pre': {'semaphore': 60}}) core.ingest_queue.push( SubmissionInput( dict(metadata={}, params=dict(description="file abc123", services=dict(selected=''), submitter='user', groups=['user'], max_extracted=10000), notification=dict(queue='test_plumber_clearing', threshold=0), files=[dict(sha256=sha, size=size, name='abc123')])).as_primitives()) service_queue = get_service_queue('pre', core.redis) time.sleep(0.5) while service_queue.length() == 0 and time.time() - start < 20: time.sleep(0.1) service_delta = core.ds.service_delta.get('pre') service_delta['enabled'] = False core.ds.service_delta.save('pre', service_delta) notification_queue = NamedQueue('nq-test_plumber_clearing', core.redis) dropped_task = notification_queue.pop(timeout=5) dropped_task = IngestTask(dropped_task) sub = core.ds.submission.get(dropped_task.submission.sid) assert len(sub.files) == 1 assert len(sub.results) == 3 assert len(sub.errors) == 1 error = core.ds.error.get(sub.errors[0]) assert "disabled" in error.response.message finally: _global_semaphore.release() service_delta = core.ds.service_delta.get('pre') service_delta['enabled'] = True core.ds.service_delta.save('pre', service_delta) watch.stop() watch.join()
def service_failed(self, sid: str, error_key: str, error: Error): task_key = ServiceTask.make_key(sid=sid, service_name=error.response.service_name, sha=error.sha256) task = self.running_tasks.pop(task_key) if not task: self.log.warning(f"[{sid}/{error.sha256}] {error.response.service_name} could not find the specified " f"task in its set of running tasks while processing an error.") return task = ServiceTask(task) self.log.debug(f"[{sid}/{error.sha256}] {task.service_name} Failed with {error.response.status} error.") if error.response.status == "FAIL_NONRECOVERABLE": # This is a NON_RECOVERABLE error, error will be saved and transmitted to the user self.errors.save(error_key, error) # Send the result key to any watching systems msg = {'status': 'FAIL', 'cache_key': error_key} for w in self._get_watcher_list(task.sid).members(): NamedQueue(w, host=self.redis).push(msg) dispatcher = task.metadata['dispatcher__'] result_queue = self._get_queue_from_cache(DISPATCH_RESULT_QUEUE + dispatcher) result_queue.push({ 'sid': task.sid, 'service_task': task.as_primitives(), 'error': error.as_primitives(), 'error_key': error_key })
def send_notification(self, task: IngestTask, failure=None, logfunc=None): if logfunc is None: logfunc = self.log.info if failure: task.failure = failure failure = task.failure if failure: logfunc("%s: %s", failure, str(task.json())) if not task.submission.notification.queue: return note_queue = _notification_queue_prefix + task.submission.notification.queue threshold = task.submission.notification.threshold if threshold is not None and task.score is not None and task.score < threshold: return q = self.notification_queues.get(note_queue, None) if not q: self.notification_queues[note_queue] = q = NamedQueue( note_queue, self.persistent_redis) q.push(task.as_primitives())
def _dispatching_error(self, task, process_table, error): error_key = error.build_key(task=task) if process_table.add_error(error_key): self.errors.save(error_key, error) msg = {'status': 'FAIL', 'cache_key': error_key} for w in self._get_watcher_list(task.sid).members(): NamedQueue(w).push(msg)
def _cleanup_submission(self, task: SubmissionTask, file_list: List[str]): """Clean up code that is the same for canceled and finished submissions""" submission = task.submission sid = submission.sid # Erase the temporary data which may have accumulated during processing for file_hash in file_list: hash_name = get_temporary_submission_data_name(sid, file_hash=file_hash) ExpiringHash(hash_name, host=self.redis).delete() if submission.params.quota_item and submission.params.submitter: self.log.info(f"[{sid}] Submission no longer counts toward {submission.params.submitter.upper()} quota") Hash('submissions-' + submission.params.submitter, self.redis_persist).pop(sid) if task.completed_queue: self.volatile_named_queue(task.completed_queue).push(submission.as_primitives()) # Send complete message to any watchers. watcher_list = ExpiringSet(make_watcher_list_name(sid), host=self.redis) for w in watcher_list.members(): NamedQueue(w).push(WatchQueueMessage({'status': 'STOP'}).as_primitives()) # Clear the timeout watcher watcher_list.delete() self.timeout_watcher.clear(sid) self.active_submissions.pop(sid) # Count the submission as 'complete' either way self.counter.increment('submissions_completed')
def test_create_single_alert(config, datastore): persistent_redis = get_client( host=config.core.redis.persistent.host, port=config.core.redis.persistent.port, private=False, ) alerter = Alerter() # Swap our alerter onto a private queue so our test doesn't get intercepted alerter.alert_queue = alert_queue = NamedQueue(uuid.uuid4().hex, persistent_redis) # Get a random submission submission = random.choice(all_submissions) all_submissions.remove(submission) # Generate a task for the submission ingest_msg = random_model_obj(IngestTask) ingest_msg.submission.sid = submission.sid ingest_msg.submission.metadata = submission.metadata ingest_msg.submission.params = submission.params ingest_msg.submission.files = submission.files alert_queue.push(ingest_msg.as_primitives()) alert_type = alerter.run_once() assert alert_type == 'create' datastore.alert.commit() res = datastore.alert.search("id:*", as_obj=False) assert res['total'] == 1 alert = datastore.alert.get(res['items'][0]['alert_id']) assert alert.sid == submission.sid
def restore_worker(worker_id: str, instance_id: str, working_dir: str): datastore = forge.get_datastore(archive_access=True) done_queue: NamedQueue[dict[str, Any]] = NamedQueue(f"r-done-{instance_id}", ttl=1800) with open(os.path.join(working_dir, "backup.part%s" % worker_id), "rb") as input_file: for line in input_file: bucket_name, key, data = json.loads(line) success = True try: collection = datastore.get_collection(bucket_name) collection.save(key, data) except Exception: success = False done_queue.push({ "success": success, "missing": False, "bucket_name": bucket_name, "key": key }) done_queue.push({"stopped": True})
def test_service_error(core, metrics): # ------------------------------------------------------------------------------- # Have a service produce an error # ------------------------------------------------------------------------------- # This time have a file get marked for dropping by a service sha, size = ready_body( core, { 'core-a': { 'error': { 'archive_ts': time.time() + 250, 'sha256': 'a' * 64, 'response': { 'message': 'words', 'status': 'FAIL_NONRECOVERABLE', 'service_name': 'core-a', 'service_tool_version': 0, 'service_version': '0' }, 'expiry_ts': time.time() + 500 }, 'failure': True, } }) core.ingest_queue.push( SubmissionInput( dict(metadata={}, params=dict(description="file abc123", services=dict(selected=''), submitter='user', groups=['user'], max_extracted=10000), notification=dict(queue='error', threshold=0), files=[dict(sha256=sha, size=size, name='abc123')])).as_primitives()) notification_queue = NamedQueue('nq-error', core.redis) task = IngestTask(notification_queue.pop(timeout=RESPONSE_TIMEOUT)) sub = core.ds.submission.get(task.submission.sid) assert len(sub.files) == 1 assert len(sub.results) == 3 assert len(sub.errors) == 1 metrics.expect('ingester', 'submissions_ingested', 1) metrics.expect('ingester', 'submissions_completed', 1) metrics.expect('dispatcher', 'submissions_completed', 1) metrics.expect('dispatcher', 'files_completed', 1)
def __init__(self, sender, log, config=None, redis=None): self.sender = sender self.log = log self.config = config or forge.get_config() self.datastore = forge.get_datastore(self.config) self.redis = redis or get_client( host=self.config.core.redis.nonpersistent.host, port=self.config.core.redis.nonpersistent.port, private=False, ) self.redis_persist = get_client( host=self.config.core.redis.persistent.host, port=self.config.core.redis.persistent.port, private=False, ) self.status_queue = CommsQueue(STATUS_QUEUE, self.redis) self.dispatch_active_hash = Hash(DISPATCH_TASK_HASH, self.redis_persist) self.dispatcher_submission_queue = NamedQueue(SUBMISSION_QUEUE, self.redis) self.ingest_scanning = Hash('m-scanning-table', self.redis_persist) self.ingest_unique_queue = PriorityQueue('m-unique', self.redis_persist) self.ingest_queue = NamedQueue(INGEST_QUEUE_NAME, self.redis_persist) self.ingest_complete_queue = NamedQueue(COMPLETE_QUEUE_NAME, self.redis) self.alert_queue = NamedQueue(ALERT_QUEUE_NAME, self.redis_persist) constants = forge.get_constants(self.config) self.c_rng = constants.PRIORITY_RANGES['critical'] self.h_rng = constants.PRIORITY_RANGES['high'] self.m_rng = constants.PRIORITY_RANGES['medium'] self.l_rng = constants.PRIORITY_RANGES['low'] self.c_s_at = self.config.core.ingester.sampling_at['critical'] self.h_s_at = self.config.core.ingester.sampling_at['high'] self.m_s_at = self.config.core.ingester.sampling_at['medium'] self.l_s_at = self.config.core.ingester.sampling_at['low'] self.to_expire = {k: 0 for k in metrics.EXPIRY_METRICS} if self.config.core.expiry.batch_delete: self.delete_query = f"expiry_ts:[* TO {self.datastore.ds.now}-{self.config.core.expiry.delay}" \ f"{self.datastore.ds.hour}/DAY]" else: self.delete_query = f"expiry_ts:[* TO {self.datastore.ds.now}-{self.config.core.expiry.delay}" \ f"{self.datastore.ds.hour}]" self.scheduler = BackgroundScheduler(daemon=True) self.scheduler.add_job( self._reload_expiry_queues, 'interval', seconds=self.config.core.metrics.export_interval * 4) self.scheduler.start()
def get_messages(wq_id, **_): """ Get all messages currently on a watch queue. Note: This method is not optimal because it requires the UI to pull the information. The prefered method is the socket server when possible. Variables: wq_id => Queue to get the message from Arguments: None Data Block: None Result example: [] # List of messages """ resp_list = [] u = NamedQueue(wq_id) while True: msg = u.pop(blocking=False) if msg is None: break elif msg['status'] == 'STOP': response = {'type': 'stop', 'err_msg': None, 'status_code': 200, 'msg': "All messages received, closing queue..."} elif msg['status'] == 'START': response = {'type': 'start', 'err_msg': None, 'status_code': 200, 'msg': "Start listening..."} elif msg['status'] == 'OK': response = {'type': 'cachekey', 'err_msg': None, 'status_code': 200, 'msg': msg['cache_key']} elif msg['status'] == 'FAIL': response = {'type': 'cachekeyerr', 'err_msg': None, 'status_code': 200, 'msg': msg['cache_key']} else: response = {'type': 'error', 'err_msg': "Unknown message", 'status_code': 400, 'msg': msg} resp_list.append(response) return make_api_response(resp_list)