def get_summary_from_keys(self, keys, cl_engine=forge.get_classification(), user_classification=None): out = { "tags": [], "attack_matrix": [], "heuristics": { "info": [], "suspicious": [], "malicious": [] }, "classification": cl_engine.UNRESTRICTED, "filtered": False } done_map = {"heuristics": set(), "attack": set(), "tags": set()} if len(keys) == 0: return out keys = [x for x in list(keys) if not x.endswith(".e")] file_keys = list(set([x[:64] for x in keys])) try: items = self.result.multiget(keys, as_obj=False) except MultiKeyError as e: # Generate partial summaries even if results are missing log.warning( f"Trying to generate summary but we are missing result(s): {str(e.keys)}" ) items = e.partial_output out['missing_results'] = e.keys try: files = self.file.multiget(file_keys, as_obj=False) except MultiKeyError as e: # Generate partial summaries even if results are missing log.warning( f"Trying to generate summary but we are missing file(s): {str(e.keys)}" ) files = e.partial_output out['missing_files'] = e.keys for key, item in items.items(): for section in item.get('result', {}).get('sections', []): file_classification = files.get(key[:64], {}).get( 'classification', section['classification']) if user_classification: if not cl_engine.is_accessible(user_classification, section['classification']): out["filtered"] = True continue if not cl_engine.is_accessible(user_classification, file_classification): out["filtered"] = True continue out["classification"] = cl_engine.max_classification( out["classification"], section['classification']) out["classification"] = cl_engine.max_classification( out["classification"], file_classification) h_type = "info" if section.get('heuristic', False): # Get the heuristics data if section['heuristic']['score'] < 100: h_type = "info" elif section['heuristic']['score'] < 1000: h_type = "suspicious" else: h_type = "malicious" cache_key = f"{section['heuristic']['heur_id']}_{key}" if cache_key not in done_map['heuristics']: out['heuristics'][h_type].append({ 'heur_id': section['heuristic']['heur_id'], 'name': section['heuristic']['name'], 'key': key }) done_map['heuristics'].add(cache_key) for attack in section['heuristic'].get('attack', []): # Get attack matrix data attack_id = attack['attack_id'] cache_key = f"{attack_id}_{key}" if cache_key not in done_map['attack']: out['attack_matrix'].append({ "key": key, "attack_id": attack_id, "h_type": h_type, "name": attack['pattern'], "categories": attack['categories'] }) done_map['attack'].add(cache_key) # Get tagging data for tag_type, tags in flatten(section.get('tags', {})).items(): if tags is not None: for tag in tags: cache_key = f"{tag_type}_{tag}_{key}" if cache_key not in done_map['tags']: out['tags'].append({ 'type': tag_type, 'h_type': h_type, 'short_type': tag_type.rsplit(".", 1)[-1], 'value': tag, 'key': key }) done_map['tags'].add(cache_key) return out
import shutil from flask import request from assemblyline.common.dict_utils import flatten from assemblyline_ui.api.base import api_login, make_api_response, make_subapi_blueprint from assemblyline_ui.config import STORAGE, TEMP_SUBMIT_DIR from assemblyline_ui.helper.service import ui_to_submission_params from assemblyline_ui.helper.submission import safe_download, FileTooBigException, InvalidUrlException, ForbiddenLocation from assemblyline_ui.helper.user import check_submission_quota, get_default_user_settings from assemblyline.common import forge from assemblyline.common.uid import get_random_id from assemblyline.odm.messages.submission import Submission from assemblyline_core.submission_client import SubmissionClient, SubmissionException Classification = forge.get_classification() config = forge.get_config() SUB_API = 'submit' submit_api = make_subapi_blueprint(SUB_API, api_version=4) submit_api._doc = "Submit files to the system" # noinspection PyUnusedLocal @submit_api.route("/dynamic/<sha256>/", methods=["GET"]) @api_login(required_priv=['W'], allow_readonly=False) def resubmit_for_dynamic(sha256, *args, **kwargs): """ Resubmit a file for dynamic analysis Variables:
def delete_submission_tree(self, sid, cl_engine=forge.get_classification(), cleanup=True, transport=None): submission = self.submission.get(sid, as_obj=False) if not submission: return # Gather file list errors = submission['errors'] results = submission["results"] files = set() fix_classification_files = set() supp_map = {} temp_files = [x[:64] for x in errors] temp_files.extend([x[:64] for x in results]) temp_files = set(temp_files) # Inspect each files to see if they are reused for temp in temp_files: # Hunt for supplementary files supp_list = set() for res in self.result.stream_search( f"id:{temp}* AND response.supplementary.sha256:*", fl="id", as_obj=False): if res['id'] in results: result = self.result.get(res['id'], as_obj=False) for supp in result['response']['supplementary']: supp_list.add(supp['sha256']) # Check if we delete or update classification if self.submission.search(f"errors:{temp}* OR results:{temp}*", rows=0, as_obj=False)["total"] < 2: files.add(temp) files = files.union(supp_list) else: fix_classification_files.add(temp) supp_map[temp] = supp_list # Filter results and errors errors = [x for x in errors if x[:64] in files] results = [x for x in results if x[:64] in files] # Delete files, errors, results that were only used once for e in errors: self.error.delete(e) for r in results: if r.endswith(".e"): self.emptyresult.delete(r) else: self.result.delete(r) for f in files: self.file.delete(f) if transport: transport.delete(f) if fix_classification_files and cleanup: # Fix classification for the files that remain in the system for f in fix_classification_files: cur_file = self.file.get(f, as_obj=False) if cur_file: # Find possible classification for the file in the system query = f"NOT id:{sid} AND (files.sha256:{f} OR results:{f}* OR errors:{f}*)" classifications = list( self.submission.facet('classification', query=query).keys()) if len(classifications) > 0: new_file_class = classifications[0] else: new_file_class = cl_engine.UNRESTRICTED for c in classifications: new_file_class = cl_engine.min_classification( new_file_class, c) # Find the results for that classification and alter them if the new classification does not match for item in self.result.stream_search( f"id:{f}*", fl="classification,id", as_obj=False): new_class = cl_engine.max_classification( item.get('classification', cl_engine.UNRESTRICTED), new_file_class) if item.get('classification', cl_engine.UNRESTRICTED) != new_class: parts = cl_engine.get_access_control_parts( new_class) update_params = [(Collection.UPDATE_SET, 'classification', new_class)] update_params.extend([(Collection.UPDATE_SET, k, v) for k, v in parts.items()]) self.result.update(item['id'], update_params) # Alter the file classification if the new classification does not match if cur_file['classification'] != new_file_class: parts = cl_engine.get_access_control_parts( new_file_class) update_params = [(Collection.UPDATE_SET, 'classification', new_file_class)] update_params.extend([(Collection.UPDATE_SET, k, v) for k, v in parts.items()]) self.file.update(f, update_params) # Fix associated supplementary files for supp in supp_map.get(f, set()): cur_supp = self.file.get(supp, as_obj=False) if cur_supp: if cur_supp['classification'] != new_file_class: parts = cl_engine.get_access_control_parts( new_file_class) update_params = [ (Collection.UPDATE_SET, 'classification', new_file_class) ] update_params.extend([ (Collection.UPDATE_SET, k, v) for k, v in parts.items() ]) self.file.update(supp, update_params) # Delete the submission and cached trees and summaries self.submission.delete(sid) for t in [ x['id'] for x in self.submission_tree.stream_search( f"id:{sid}*", fl="id", as_obj=False) ]: self.submission_tree.delete(t) for s in [ x['id'] for x in self.submission_summary.stream_search( f"id:{sid}*", fl="id", as_obj=False) ]: self.submission_summary.delete(s)
def get_or_create_file_tree(self, submission, max_depth, cl_engine=forge.get_classification(), user_classification=None): if user_classification is not None: user_classification = cl_engine.normalize_classification( user_classification, long_format=False) cache_key = f"{submission['sid']}_{user_classification}" for illegal_char in [" ", ":", "/"]: cache_key = cache_key.replace(illegal_char, "") else: cache_key = submission['sid'] if isinstance(submission, Model): submission = submission.as_primitives() num_files = len(list(set([x[:64] for x in submission['results']]))) max_score = submission['max_score'] cached_tree = self.submission_tree.get_if_exists(cache_key, as_obj=False) if cached_tree: tree = json.loads(cached_tree['tree']) if self._is_valid_tree(tree, num_files, max_score): return { "tree": tree, "classification": cached_tree['classification'], "filtered": cached_tree['filtered'], "partial": False } partial = False files = {} scores = {} missing_files = [] file_hashes = [x[:64] for x in submission['results']] file_hashes.extend([x[:64] for x in submission['errors']]) file_hashes.extend([f['sha256'] for f in submission['files']]) try: temp_file_data_map = self.file.multiget(list(set(file_hashes)), as_dictionary=True, as_obj=False) except MultiKeyError as e: log.warning( f"Trying to generate file tree but we are missing file(s): {str(e.keys)}" ) temp_file_data_map = e.partial_output missing_files = e.keys partial = True forbidden_files = set() max_classification = cl_engine.UNRESTRICTED file_data_map = {} for key, value in temp_file_data_map.items(): if user_classification and not cl_engine.is_accessible( user_classification, value['classification']): partial = True forbidden_files.add(key) continue file_data_map[key] = value max_classification = cl_engine.max_classification( max_classification, value['classification']) try: results_data = self.result.multiget( [x for x in submission['results'] if not x.endswith(".e")], as_obj=False) except MultiKeyError as e: log.warning( f"Trying to generate file tree but we are missing result(s): {str(e.keys)}" ) results_data = e.partial_output partial = True for key, item in results_data.items(): sha256 = key[:64] # Get scores if sha256 not in scores: scores[sha256] = 0 scores[sha256] += item["result"]["score"] # Get files extracted = item['response']['extracted'] if len(extracted) == 0: continue if sha256 not in files: files[sha256] = [] files[sha256].extend(extracted) tree_cache = [] def recurse_tree(child_p, placeholder, parents_p, lvl=0): if lvl == max_depth + 1: # Enforce depth protection while building the tree return c_sha256 = child_p['sha256'] c_name = child_p['name'] if c_sha256 in placeholder: placeholder[c_sha256]['name'].append(c_name) else: children_list = {} truncated = False child_list = files.get(c_sha256, []) for new_child in child_list: if new_child['sha256'] in tree_cache: truncated = True continue tree_cache.append(child['sha256']) if new_child['sha256'] not in parents_p: recurse_tree(new_child, children_list, parents_p + [c_sha256], lvl + 1) try: placeholder[c_sha256] = { "name": [c_name], "type": file_data_map[c_sha256]['type'], "sha256": file_data_map[c_sha256]['sha256'], "size": file_data_map[c_sha256]['size'], "children": children_list, "truncated": truncated, "score": scores.get(c_sha256, 0), } except KeyError: if c_sha256 not in forbidden_files and c_sha256 not in missing_files: file_data_map[c_sha256] = self.file.get(c_sha256, as_obj=False) placeholder[c_sha256] = { "name": [c_name], "type": file_data_map[c_sha256]['type'], "sha256": file_data_map[c_sha256]['sha256'], "size": file_data_map[c_sha256]['size'], "children": children_list, "truncated": truncated, "score": scores.get(c_sha256, 0), } tree = {} for f in submission['files']: sha256 = f['sha256'] name = f['name'] if sha256 in tree: tree[sha256]['name'].append(name) else: parents = [sha256] children = {} c_list = files.get(sha256, []) for child in c_list: tree_cache.append(child['sha256']) recurse_tree(child, children, parents) try: tree[sha256] = { "name": [name], "children": children, "type": file_data_map[sha256]['type'], "sha256": file_data_map[sha256]['sha256'], "size": file_data_map[sha256]['size'], "truncated": False, "score": scores.get(sha256, 0), } except KeyError: if sha256 not in forbidden_files and sha256 not in missing_files: file_data_map[sha256] = self.file.get(sha256, as_obj=False) tree[sha256] = { "name": [name], "children": children, "type": file_data_map[sha256]['type'], "sha256": file_data_map[sha256]['sha256'], "size": file_data_map[sha256]['size'], "truncated": False, "score": scores.get(sha256, 0), } if not partial: cached_tree = { 'expiry_ts': now_as_iso(days_until_archive * 24 * 60 * 60), 'tree': json.dumps(tree), 'classification': max_classification, 'filtered': len(forbidden_files) > 0 } self.submission_tree.save(cache_key, cached_tree) return { 'tree': tree, 'classification': max_classification, 'filtered': len(forbidden_files) > 0, 'partial': partial }
import os import random import tempfile from assemblyline.common import forge from assemblyline.common.dict_utils import flatten from assemblyline.common.hexdump import hexdump from assemblyline_v4_service.common.base import ServiceBase from assemblyline_v4_service.common.result import Result, ResultSection, BODY_FORMAT, Heuristic # DO NOT IMPORT IN YOUR SERVICE. These are just for creating randomized results. from assemblyline.odm.randomizer import get_random_phrase, get_random_ip, get_random_host, get_random_tags # DO NOT LIST BODY FORMATS LIKE THIS. This is again for the data randomizer. FORMAT_LIST = [BODY_FORMAT.TEXT, BODY_FORMAT.MEMORY_DUMP] cl_engine = forge.get_classification() class ResultSample(ServiceBase): def __init__(self, config=None): super(ResultSample, self).__init__(config) def start(self): # ================================================================== # On Startup actions: # Your service might have to do some warming up on startup to make things faster self.log.info(f"start() from {self.service_attributes.name} service called") def execute(self, request): # ==================================================================
def delete_submission_tree_bulk(self, sid, cl_engine=forge.get_classification(), cleanup=True, transport=None): submission = self.submission.get(sid, as_obj=False) if not submission: return # Create plans s_plan = self.submission.get_bulk_plan() st_plan = self.submission_tree.get_bulk_plan() ss_plan = self.submission_summary.get_bulk_plan() e_plan = self.error.get_bulk_plan() er_plan = self.emptyresult.get_bulk_plan() r_plan = self.result.get_bulk_plan() f_plan = self.file.get_bulk_plan() # Add delete operation for submission and cache s_plan.add_delete_operation(sid) for x in self.submission_tree.stream_search(f"id:{sid}*", fl="id,_index", as_obj=False): st_plan.add_delete_operation(x['id'], index=x['_index']) for x in self.submission_summary.stream_search(f"id:{sid}*", fl="id,_index", as_obj=False): ss_plan.add_delete_operation(x['id'], index=x['_index']) # Gather file list errors = submission['errors'] results = submission["results"] files = set() fix_classification_files = set() supp_map = {} temp_files = [x[:64] for x in errors] temp_files.extend([x[:64] for x in results]) temp_files = set(temp_files) # Inspect each files to see if they are reused for temp in temp_files: # Hunt for supplementary files supp_list = set() for res in self.result.stream_search( f"id:{temp}* AND response.supplementary.sha256:*", fl="id", as_obj=False): if res['id'] in results: result = self.result.get(res['id'], as_obj=False) for supp in result['response']['supplementary']: supp_list.add(supp['sha256']) # Check if we delete or update classification if self.submission.search(f"errors:{temp}* OR results:{temp}*", rows=0, as_obj=False)["total"] < 2: files.add(temp) files = files.union(supp_list) else: fix_classification_files.add(temp) supp_map[temp] = supp_list # Filter results and errors errors = [x for x in errors if x[:64] in files] results = [x for x in results if x[:64] in files] # Delete files, errors, results that were only used once for e in errors: e_plan.add_delete_operation(e) for r in results: if r.endswith(".e"): er_plan.add_delete_operation(r) else: r_plan.add_delete_operation(r) for f in files: f_plan.add_delete_operation(f) if transport: transport.delete(f) if fix_classification_files and cleanup: # Fix classification for the files that remain in the system for f in fix_classification_files: cur_file = self.file.get(f, as_obj=False) if cur_file: # Find possible classification for the file in the system query = f"NOT id:{sid} AND (files.sha256:{f} OR results:{f}* OR errors:{f}*)" classifications = list( self.submission.facet('classification', query=query).keys()) if len(classifications) > 0: new_file_class = classifications[0] else: new_file_class = cl_engine.UNRESTRICTED for c in classifications: new_file_class = cl_engine.min_classification( new_file_class, c) # Find the results for that classification and alter them if the new classification does not match for item in self.result.stream_search( f"id:{f}*", fl="classification,id,_index", as_obj=False): new_class = cl_engine.max_classification( item.get('classification', cl_engine.UNRESTRICTED), new_file_class) if item.get('classification', cl_engine.UNRESTRICTED) != new_class: data = cl_engine.get_access_control_parts( new_class) data['classification'] = new_class r_plan.add_update_operation(item['id'], data, index=item['_index']) # Alter the file classification if the new classification does not match if cur_file['classification'] != new_file_class: data = cl_engine.get_access_control_parts( new_file_class) data['classification'] = new_file_class f_plan.add_update_operation(f, data) # Fix associated supplementary files for supp in supp_map.get(f, set()): cur_supp = self.file.get(supp, as_obj=False) if cur_supp: if cur_supp['classification'] != new_file_class: data = cl_engine.get_access_control_parts( new_file_class) data['classification'] = new_file_class f_plan.add_update_operation(supp, data) # Proceed with plan self.multi_index_bulk( [s_plan, st_plan, ss_plan, e_plan, er_plan, r_plan, f_plan])
def try_run(self): try: self.service_class = load_module_by_path(SERVICE_PATH) except ValueError: raise except Exception: LOG.error( "Could not find service in path. Check your environment variables." ) raise self.load_service_manifest() if not os.path.isfile(FILE_PATH): LOG.info(f"File not found: {FILE_PATH}") return self.file_dir = os.path.dirname(FILE_PATH) # Get filename and working dir file_name = os.path.basename(FILE_PATH) working_dir = os.path.join( self.file_dir, f'{os.path.basename(FILE_PATH)}_{SERVICE_NAME.lower()}') # Start service self.service.start_service() # Identify the file file_info = self.identify.fileinfo(FILE_PATH) if file_info['type'] == "archive/cart" or file_info[ 'magic'] == "custom: archive/cart": # This is a CART file, uncart it and recreate the file info object original_temp = os.path.join(tempfile.gettempdir(), file_info['sha256']) with open(FILE_PATH, 'rb') as ifile, open(original_temp, 'wb') as ofile: unpack_stream(ifile, ofile) file_info = self.identify.fileinfo(original_temp) target_file = os.path.join(tempfile.gettempdir(), file_info['sha256']) shutil.move(original_temp, target_file) LOG.info( f"File was a CaRT archive, it was un-CaRTed to {target_file} for processing" ) else: # It not a cart, move the file to the right place to be processed target_file = os.path.join(tempfile.gettempdir(), file_info['sha256']) shutil.copyfile(FILE_PATH, target_file) # Create service processing task service_task = ServiceTask( dict( sid=get_random_id(), metadata={}, service_name=SERVICE_NAME, service_config=self.submission_params, fileinfo=dict( magic=file_info['magic'], md5=file_info['md5'], mime=file_info['mime'], sha1=file_info['sha1'], sha256=file_info['sha256'], size=file_info['size'], type=file_info['type'], ), filename=file_name, min_classification=forge.get_classification().UNRESTRICTED, max_files=501, # TODO: get the actual value ttl=3600)) LOG.info(f"Starting task with SID: {service_task.sid}") # Set the working directory to a directory with same parent as input file if os.path.isdir(working_dir): shutil.rmtree(working_dir) if not os.path.isdir(working_dir): os.makedirs(os.path.join(working_dir, 'working_directory')) self.service.handle_task(service_task) # Move the result.json and extracted/supplementary files to the working directory source = os.path.join(tempfile.gettempdir(), 'working_directory') if not os.path.exists(source): os.makedirs(source) files = os.listdir(source) for f in files: shutil.move(os.path.join(source, f), os.path.join(working_dir, 'working_directory')) # Cleanup files from the original directory created by the service base shutil.rmtree(source) result_json = os.path.join( tempfile.gettempdir(), f'{service_task.sid}_{service_task.fileinfo.sha256}_result.json') if not os.path.exists(result_json): raise Exception( "A service error occured and no result json was found.") # Validate the generated result with open(result_json, 'r') as fh: try: result = json.load(fh) result.pop('temp_submission_data', None) for file in result['response']['extracted'] + result[ 'response']['supplementary']: file.pop('path', None) # Load heuristics heuristics = get_heuristics() # Transform heuristics and calculate score total_score = 0 for section in result['result']['sections']: # Ignore tag and sig safe flags since we have no connection to the safelist section.pop('zeroize_on_tag_safe', None) section.pop('zeroize_on_sig_safe', None) if section['heuristic']: heur_id = section['heuristic']['heur_id'] try: section['heuristic'], new_tags = HeuristicHandler( ).service_heuristic_to_result_heuristic( section['heuristic'], heuristics) for tag in new_tags: section['tags'].setdefault(tag[0], []) if tag[1] not in section['tags'][tag[0]]: section['tags'][tag[0]].append(tag[1]) total_score += section['heuristic']['score'] except InvalidHeuristicException: section['heuristic'] = None section['heuristic']['name'] = heuristics[heur_id][ 'name'] result['result']['score'] = total_score # Add timestamps for creation, archive and expiry result['created'] = now_as_iso() result['archive_ts'] = now_as_iso(1 * 24 * 60 * 60) result['expiry_ts'] = now_as_iso(service_task.ttl * 24 * 60 * 60) result = Result(result) # Print the result on console if in debug mode if args.debug: f"{SERVICE_NAME.upper()}-RESULT".center(60, '-') for line in pprint.pformat( result.result.as_primitives()).split('\n'): LOG.debug(line) except Exception as e: LOG.error(f"Invalid result created: {str(e)}") LOG.info( f"Cleaning up file used for temporary processing: {target_file}") os.unlink(target_file) LOG.info( f"Moving {result_json} to the working directory: {working_dir}/result.json" ) shutil.move(result_json, os.path.join(working_dir, 'result.json')) LOG.info( f"Successfully completed task. Output directory: {working_dir}")
def __init__(self, config=None, datastore=None, redis=None, redis_persist=None): super().__init__('assemblyline.scaler', config=config, datastore=datastore, redis=redis, redis_persist=redis_persist) self.scaler_timeout_queue = NamedQueue(SCALER_TIMEOUT_QUEUE, host=self.redis_persist) self.error_count_lock = threading.Lock() self.error_count: dict[str, list[float]] = {} self.status_table = ExpiringHash(SERVICE_STATE_HASH, host=self.redis, ttl=30 * 60) self.service_event_sender = EventSender('changes.services', host=self.redis) self.service_change_watcher = EventWatcher( self.redis, deserializer=ServiceChange.deserialize) self.service_change_watcher.register('changes.services.*', self._handle_service_change_event) core_env: dict[str, str] = {} # If we have privileged services, we must be able to pass the necessary environment variables for them to # function properly. for secret in re.findall( r'\${\w+}', open('/etc/assemblyline/config.yml', 'r').read()) + ['UI_SERVER']: env_name = secret.strip("${}") core_env[env_name] = os.environ[env_name] labels = { 'app': 'assemblyline', 'section': 'service', 'privilege': 'service' } if self.config.core.scaler.additional_labels: labels.update({ k: v for k, v in ( _l.split("=") for _l in self.config.core.scaler.additional_labels) }) if KUBERNETES_AL_CONFIG: self.log.info( f"Loading Kubernetes cluster interface on namespace: {NAMESPACE}" ) self.controller = KubernetesController( logger=self.log, prefix='alsvc_', labels=labels, namespace=NAMESPACE, priority='al-service-priority', cpu_reservation=self.config.services.cpu_reservation, log_level=self.config.logging.log_level, core_env=core_env) # If we know where to find it, mount the classification into the service containers if CLASSIFICATION_CONFIGMAP: self.controller.config_mount( 'classification-config', config_map=CLASSIFICATION_CONFIGMAP, key=CLASSIFICATION_CONFIGMAP_KEY, target_path='/etc/assemblyline/classification.yml') if CONFIGURATION_CONFIGMAP: self.controller.core_config_mount( 'assemblyline-config', config_map=CONFIGURATION_CONFIGMAP, key=CONFIGURATION_CONFIGMAP_KEY, target_path='/etc/assemblyline/config.yml') else: self.log.info("Loading Docker cluster interface.") self.controller = DockerController( logger=self.log, prefix=NAMESPACE, labels=labels, log_level=self.config.logging.log_level, core_env=core_env) self._service_stage_hash.delete() if DOCKER_CONFIGURATION_PATH and DOCKER_CONFIGURATION_VOLUME: self.controller.core_mounts.append( (DOCKER_CONFIGURATION_VOLUME, '/etc/assemblyline/')) with open( os.path.join(DOCKER_CONFIGURATION_PATH, 'config.yml'), 'w') as handle: yaml.dump(self.config.as_primitives(), handle) with open( os.path.join(DOCKER_CONFIGURATION_PATH, 'classification.yml'), 'w') as handle: yaml.dump(get_classification().original_definition, handle) # If we know where to find it, mount the classification into the service containers if CLASSIFICATION_HOST_PATH: self.controller.global_mounts.append( (CLASSIFICATION_HOST_PATH, '/etc/assemblyline/classification.yml')) # Information about services self.profiles: dict[str, ServiceProfile] = {} self.profiles_lock = threading.RLock() # Prepare a single threaded scheduler self.state = collection.Collection( period=self.config.core.metrics.export_interval) self.stopping = threading.Event() self.main_loop_exit = threading.Event() # Load the APM connection if any self.apm_client = None if self.config.core.metrics.apm_server.server_url: elasticapm.instrument() self.apm_client = elasticapm.Client( server_url=self.config.core.metrics.apm_server.server_url, service_name="scaler")
import functools from assemblyline.common import version from assemblyline.common.logformat import AL_LOG_FORMAT from assemblyline.common import forge, log as al_log from assemblyline.remote.datatypes.hash import Hash from assemblyline.remote.datatypes.queues.comms import CommsQueue from assemblyline.remote.datatypes.set import ExpiringSet from assemblyline.remote.datatypes.user_quota_tracker import UserQuotaTracker config = forge.get_config() ################################################################# # Configuration CLASSIFICATION = forge.get_classification() ALLOW_RAW_DOWNLOADS = config.ui.allow_raw_downloads APP_ID = "https://%s" % config.ui.fqdn APP_NAME = "Assemblyline" AUDIT = config.ui.audit SECRET_KEY = config.ui.secret_key DEBUG = config.ui.debug DOWNLOAD_ENCODING = config.ui.download_encoding MAX_CLASSIFICATION = CLASSIFICATION.UNRESTRICTED ORGANISATION = config.system.organisation SYSTEM_TYPE = config.system.type BUILD_MASTER = version.FRAMEWORK_VERSION BUILD_LOWER = version.SYSTEM_VERSION
def create_menu(user, path): user['groups'].insert(0, "ALL") submission_submenu = [{ "class": "dropdown-header", "active": False, "link": None, "title": "Personal" }, { "class": "", "active": (path == "/submissions.html?user=%s" % user['uname']), "link": "/submissions.html?user=%s" % user['uname'], "title": "My Submissions" }, { "class": "divider", "active": False, "link": None, "title": None }, { "class": "dropdown-header", "active": False, "link": None, "title": "Groups" }] submission_submenu.extend([{ "class": "", "active": (path == "/submissions.html?group=%s" % x), "link": "/submissions.html?group=%s" % x, "title": x } for x in user['groups']]) help_submenu = [{ "class": "dropdown-header", "active": False, "link": None, "title": "Documentation" }, { "class": "", "active": path.startswith("/api_doc.html"), "link": "/api_doc.html", "title": "API Documentation" }] if forge.get_classification().enforce: help_submenu.extend([{ "class": "", "active": path.startswith("/classification_help.html"), "link": "/classification_help.html", "title": "Classification Help" }]) if not config.ui.read_only: help_submenu.extend([{ "class": "", "active": path.startswith("/configuration.html"), "link": "/configuration.html", "title": "Configuration Settings" }]) help_submenu.extend([{ "class": "", "active": path.startswith("/search_help.html"), "link": "/search_help.html", "title": "Search Help" }]) if not config.ui.read_only: help_submenu.extend([{ "class": "", "active": path.startswith("/services.html"), "link": "/services.html", "title": "Service Listing" }, { "class": "divider", "active": False, "link": None, "title": None }, { "class": "dropdown-header", "active": False, "link": None, "title": "Heuristics" }, { "class": "", "active": path.startswith("/heuristics.html"), "link": "/heuristics.html", "title": "Malware Heuristics" }, { "class": "divider", "active": False, "link": None, "title": None }, { "class": "dropdown-header", "active": False, "link": None, "title": "Statistics" }, { "class": "", "active": path.startswith("/heuristics_stats.html"), "link": "/heuristics_stats.html", "title": "Heuristic Statistics" }, { "class": "", "active": path.startswith("/signature_statistics.html"), "link": "/signature_statistics.html", "title": "Signature Statistics" }]) alerting_submenu = [{ "class": "", "active": path.startswith("/alerts.html"), "link": "/alerts.html", "title": "View Alerts", "has_submenu": False }, { "class": "", "active": path.startswith("/workflows.html"), "link": "/workflows.html", "title": "Workflow filters", "has_submenu": False }] menu = [{ "class": "", "active": path.split("?")[0] == "/" or path.startswith("/submit.html"), "link": "/submit.html", "title": "Submit", "has_submenu": False }, { "class": "", "active": path.startswith("/submissions.html"), "link": "#", "title": "Submissions", "has_submenu": True, "submenu": submission_submenu }, { "class": "", "active": path.startswith("/alerts.html") or path.startswith("/workflows.html"), "link": "#", "title": "Alerts", "has_submenu": True, "submenu": alerting_submenu }] if not config.ui.read_only: if 'admin' in user['type'] or 'signature_manager' in user['type']: signature_submenu = [{ "class": "", "active": path.startswith("/signatures.html"), "link": "/signatures.html", "title": "Signature management", "has_submenu": False }, { "class": "", "active": path.startswith("/source_management.html"), "link": "/source_management.html", "title": "Source management", "has_submenu": False }] menu.append({ "class": "", "active": path.startswith("/signatures.html") or path.startswith("/source_management.html"), "link": "#", "title": "Signatures", "has_submenu": True, "submenu": signature_submenu }) else: menu.append({ "class": "", "active": path.startswith("/signatures.html"), "link": "/signatures.html", "title": "Signatures", "has_submenu": False }) search_submenu = [ { "class": "", "active": path.startswith("/search.html") and ("search_scope=all" in path or "search_scope" not in path), "link": "/search.html", "title": "All indexes", "has_submenu": False }, { "class": "divider", "active": False, "link": None, "title": None }, { "class": "dropdown-header", "active": False, "link": None, "title": "Specific indexes" }, ] for idx in ["Alert", "File", "Result", "Signature", "Submission"]: search_submenu.append({ "class": "", "active": path.startswith("/search.html") and f"search_scope={idx.lower()}" in path, "link": f"/search.html?search_scope={idx.lower()}", "title": f"{idx} Index", "has_submenu": False }) menu.extend([{ "class": "", "active": path.startswith("/search.html"), "link": "/search.html", "title": "Search", "has_submenu": True, "submenu": search_submenu }, { "class": "", "active": path.startswith("/api_doc.html") or path.startswith("/classification_help.html") or path.startswith("/configuration.html") or path.startswith("/heuristics.html") or path.startswith("/heuristics_stats.html") or path.startswith("/signature_statistics.html") or path.startswith("/search_help.html") or path.startswith("/services.html"), "link": "#", "title": "Help", "has_submenu": True, "submenu": help_submenu }]) return menu
def __init__(self, *args, yml_config=None, **kwargs): super().__init__(*args, **kwargs) self.engine = forge.get_classification(yml_config=yml_config)
def test_dispatch_file(clean_redis): service_queue = lambda name: get_service_queue(name, clean_redis) ds = MockDatastore(collections=[ 'submission', 'result', 'service', 'error', 'file', 'filescore' ]) file_hash = get_random_hash(64) sub = random_model_obj(models.submission.Submission) sub.sid = sid = 'first-submission' sub.params.ignore_cache = False disp = Dispatcher(ds, clean_redis, clean_redis, logging) disp.active_submissions.add( sid, SubmissionTask(dict(submission=sub)).as_primitives()) dh = DispatchHash(sid=sid, client=clean_redis) print('==== first dispatch') # Submit a problem, and check that it gets added to the dispatch hash # and the right service queues file_task = FileTask({ 'sid': 'first-submission', 'min_classification': get_classification().UNRESTRICTED, 'file_info': dict(sha256=file_hash, type='unknown', magic='a', md5=get_random_hash(32), mime='a', sha1=get_random_hash(40), size=10), 'depth': 0, 'max_files': 5 }) disp.dispatch_file(file_task) assert dh.dispatch_time(file_hash, 'extract') > 0 assert dh.dispatch_time(file_hash, 'wrench') > 0 assert service_queue('extract').length() == 1 assert service_queue('wrench').length() == 1 # Making the same call again will queue it up again print('==== second dispatch') disp.dispatch_file(file_task) assert dh.dispatch_time(file_hash, 'extract') > 0 assert dh.dispatch_time(file_hash, 'wrench') > 0 assert service_queue('extract').length() == 2 assert service_queue('wrench').length() == 2 # assert len(mq) == 4 # Push back the timestamp in the dispatch hash to simulate a timeout, # make sure it gets pushed into that service queue again print('==== third dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.fail_recoverable(file_hash, 'extract') disp.dispatch_file(file_task) assert dh.dispatch_time(file_hash, 'extract') > 0 assert dh.dispatch_time(file_hash, 'wrench') > 0 assert service_queue('extract').length() == 1 # assert len(mq) == 1 # Mark extract as finished, wrench as failed print('==== fourth dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.finish(file_hash, 'extract', 'result-key', 0, 'U') dh.fail_nonrecoverable(file_hash, 'wrench', 'error-key') disp.dispatch_file(file_task) assert dh.finished(file_hash, 'extract') assert dh.finished(file_hash, 'wrench') assert service_queue('av-a').length() == 1 assert service_queue('av-b').length() == 1 assert service_queue('frankenstrings').length() == 1 # Have the AVs fail, frankenstrings finishes print('==== fifth dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.fail_nonrecoverable(file_hash, 'av-a', 'error-a') dh.fail_nonrecoverable(file_hash, 'av-b', 'error-b') dh.finish(file_hash, 'frankenstrings', 'result-key', 0, 'U') disp.dispatch_file(file_task) assert dh.finished(file_hash, 'av-a') assert dh.finished(file_hash, 'av-b') assert dh.finished(file_hash, 'frankenstrings') assert service_queue('xerox').length() == 1 # Finish the xerox service and check if the submission completion got checked print('==== sixth dispatch') [service_queue(name).delete() for name in disp.scheduler.services] dh.finish(file_hash, 'xerox', 'result-key', 0, 'U') disp.dispatch_file(file_task) assert dh.finished(file_hash, 'xerox') assert len(disp.submission_queue) == 1
def __init__(self, datastore=None, logger=None, classification=None, redis=None, persistent_redis=None, metrics_name='ingester', config=None): super().__init__('assemblyline.ingester', logger, redis=redis, redis_persist=persistent_redis, datastore=datastore, config=config) # Cache the user groups self.cache_lock = threading.RLock() self._user_groups = {} self._user_groups_reset = time.time() // HOUR_IN_SECONDS self.cache = {} self.notification_queues = {} self.whitelisted = {} self.whitelisted_lock = threading.RLock() # Module path parameters are fixed at start time. Changing these involves a restart self.is_low_priority = load_module_by_path( self.config.core.ingester.is_low_priority) self.get_whitelist_verdict = load_module_by_path( self.config.core.ingester.get_whitelist_verdict) self.whitelist = load_module_by_path( self.config.core.ingester.whitelist) # Constants are loaded based on a non-constant path, so has to be done at init rather than load constants = forge.get_constants(self.config) self.priority_value: dict[str, int] = constants.PRIORITIES self.priority_range: dict[str, Tuple[int, int]] = constants.PRIORITY_RANGES self.threshold_value: dict[str, int] = constants.PRIORITY_THRESHOLDS # Classification engine self.ce = classification or forge.get_classification() # Metrics gathering factory self.counter = MetricsFactory(metrics_type='ingester', schema=Metrics, redis=self.redis, config=self.config, name=metrics_name) # State. The submissions in progress are stored in Redis in order to # persist this state and recover in case we crash. self.scanning = Hash('m-scanning-table', self.redis_persist) # Input. The dispatcher creates a record when any submission completes. self.complete_queue = NamedQueue(COMPLETE_QUEUE_NAME, self.redis) # Input. An external process places submission requests on this queue. self.ingest_queue = NamedQueue(INGEST_QUEUE_NAME, self.redis_persist) # Output. Duplicate our input traffic into this queue so it may be cloned by other systems self.traffic_queue = CommsQueue('submissions', self.redis) # Internal. Unique requests are placed in and processed from this queue. self.unique_queue = PriorityQueue('m-unique', self.redis_persist) # Internal, delay queue for retrying self.retry_queue = PriorityQueue('m-retry', self.redis_persist) # Internal, timeout watch queue self.timeout_queue: PriorityQueue[str] = PriorityQueue( 'm-timeout', self.redis) # Internal, queue for processing duplicates # When a duplicate file is detected (same cache key => same file, and same # submission parameters) the file won't be ingested normally, but instead a reference # will be written to a duplicate queue. Whenever a file is finished, in the complete # method, not only is the original ingestion finalized, but all entries in the duplicate queue # are finalized as well. This has the effect that all concurrent ingestion of the same file # are 'merged' into a single submission to the system. self.duplicate_queue = MultiQueue(self.redis_persist) # Output. submissions that should have alerts generated self.alert_queue = NamedQueue(ALERT_QUEUE_NAME, self.redis_persist) # Utility object to help submit tasks to dispatching self.submit_client = SubmissionClient(datastore=self.datastore, redis=self.redis) if self.config.core.metrics.apm_server.server_url is not None: self.log.info( f"Exporting application metrics to: {self.config.core.metrics.apm_server.server_url}" ) elasticapm.instrument() self.apm_client = elasticapm.Client( server_url=self.config.core.metrics.apm_server.server_url, service_name="ingester") else: self.apm_client = None