def submit(): if KEY_ANALYSIS not in request.values: abort( Response( "missing {} field (see documentation)".format(KEY_ANALYSIS), 400)) r = json.loads(request.values[KEY_ANALYSIS]) # the specified company needs to match the company of this node # TODO eventually we'll have a single node that serves API to all configured companies if KEY_COMPANY_NAME in r and r[KEY_COMPANY_NAME] != saq.CONFIG['global'][ 'company_name']: abort( Response( "wrong company {} (are you sending to the correct system?)". format(r[KEY_COMPANY_NAME]), 400)) if KEY_DESCRIPTION not in r: abort( Response("missing {} field in submission".format(KEY_DESCRIPTION), 400)) root = RootAnalysis() root.uuid = str(uuid.uuid4()) # does the engine use a different drive for the workload? analysis_mode = r[ KEY_ANALYSIS_MODE] if KEY_ANALYSIS_MODE in r else saq.CONFIG['engine'][ 'default_analysis_mode'] if analysis_mode != ANALYSIS_MODE_CORRELATION: root.storage_dir = workload_storage_dir(root.uuid) else: root.storage_dir = storage_dir_from_uuid(root.uuid) root.initialize_storage() try: root.analysis_mode = r[ KEY_ANALYSIS_MODE] if KEY_ANALYSIS_MODE in r else saq.CONFIG[ 'engine']['default_analysis_mode'] root.company_id = saq.CONFIG['global'].getint('company_id') root.tool = r[KEY_TOOL] if KEY_TOOL in r else 'api' root.tool_instance = r[ KEY_TOOL_INSTANCE] if KEY_TOOL_INSTANCE in r else 'api({})'.format( request.remote_addr) root.alert_type = r[KEY_TYPE] if KEY_TYPE in r else saq.CONFIG['api'][ 'default_alert_type'] root.description = r[KEY_DESCRIPTION] root.event_time = LOCAL_TIMEZONE.localize(datetime.datetime.now()) if KEY_EVENT_TIME in r: try: root.event_time = parse_event_time(r[KEY_EVENT_TIME]) except ValueError as e: abort( Response( "invalid event time format for {} (use {} format)". format(r[KEY_EVENT_TIME], event_time_format_json_tz), 400)) root.details = r[KEY_DETAILS] if KEY_DETAILS in r else {} # go ahead and allocate storage # XXX use temp dir instead... if KEY_TAGS in r: for tag in r[KEY_TAGS]: root.add_tag(tag) # add the observables if KEY_OBSERVABLES in r: for o in r[KEY_OBSERVABLES]: # check for required fields for field in [KEY_O_TYPE, KEY_O_VALUE]: if field not in o: abort( Response( "an observable is missing the {} field".format( field), 400)) o_type = o[KEY_O_TYPE] o_value = o[KEY_O_VALUE] o_time = None if KEY_O_TIME in o: try: o_time = parse_event_time(o[KEY_O_TIME]) except ValueError: abort( Response( "an observable has an invalid time format {} (use {} format)" .format(o[KEY_O_TIME], event_time_format_json_tz), 400)) observable = root.add_observable(o_type, o_value, o_time=o_time) if KEY_O_TAGS in o: for tag in o[KEY_O_TAGS]: observable.add_tag(tag) if KEY_O_DIRECTIVES in o: for directive in o[KEY_O_DIRECTIVES]: # is this a valid directive? if directive not in VALID_DIRECTIVES: abort( Response( "observable {} has invalid directive {} (choose from {})" .format('{}:{}'.format(o_type, o_value), directive, ','.join(VALID_DIRECTIVES)), 400)) observable.add_directive(directive) if KEY_O_LIMITED_ANALYSIS in o: for module_name in o[KEY_O_LIMITED_ANALYSIS]: observable.limit_analysis(module_name) # save the files to disk and add them as observables of type file for f in request.files.getlist('file'): logging.debug("recording file {}".format(f.filename)) #temp_dir = tempfile.mkdtemp(dir=saq.CONFIG.get('api', 'incoming_dir')) #_path = os.path.join(temp_dir, secure_filename(f.filename)) try: #if os.path.exists(_path): #logging.error("duplicate file name {}".format(_path)) #abort(400) #logging.debug("saving file to {}".format(_path)) #try: #f.save(_path) #except Exception as e: #logging.error("unable to save file to {}: {}".format(_path, e)) #abort(400) full_path = os.path.join(root.storage_dir, f.filename) try: dest_dir = os.path.dirname(full_path) if not os.path.isdir(dest_dir): try: os.makedirs(dest_dir) except Exception as e: logging.error( "unable to create directory {}: {}".format( dest_dir, e)) abort(400) logging.debug("saving file {}".format(full_path)) f.save(full_path) # add this as a F_FILE type observable root.add_observable( F_FILE, os.path.relpath(full_path, start=root.storage_dir)) except Exception as e: logging.error( "unable to copy file from {} to {} for root {}: {}". format(_path, full_path, root, e)) abort(400) except Exception as e: logging.error("unable to deal with file {}: {}".format(f, e)) report_exception() abort(400) #finally: #try: #shutil.rmtree(temp_dir) #except Exception as e: #logging.error("unable to delete temp dir {}: {}".format(temp_dir, e)) try: if not root.save(): logging.error("unable to save analysis") abort( Response( "an error occured trying to save the alert - review the logs", 400)) # if we received a submission for correlation mode then we go ahead and add it to the database if root.analysis_mode == ANALYSIS_MODE_CORRELATION: ALERT(root) # add this analysis to the workload root.schedule() except Exception as e: logging.error("unable to sync to database: {}".format(e)) report_exception() abort( Response( "an error occured trying to save the alert - review the logs", 400)) return json_result({'result': {'uuid': root.uuid}}) except Exception as e: logging.error("error processing submit: {}".format(e)) report_exception() try: if os.path.isdir(root.storage_dir): logging.info("removing failed submit dir {}".format( root.storage_dir)) shutil.rmtree(root.storage_dir) except Exception as e2: logging.error("unable to delete failed submit dir {}: {}".format( root.storage_dir, e)) raise e
class HTTPScanningEngine(ANPNodeEngine, MySQLCollectionEngine, Engine): # XXX do I need to specify Engine here? def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # if set to True then we don't delete the work directories self.keep_work_dir = False # the location of the incoming http streams self.bro_http_dir = os.path.join(saq.SAQ_HOME, self.config['bro_http_dir']) # the list of streams (connection ids) that we need to process self.stream_list = collections.deque() # http whitelist self.whitelist = None # path to the whitelist file self.whitelist_path = os.path.join(saq.SAQ_HOME, self.config['whitelist_path']) @property def name(self): return 'http_scanner' def initialize_collection(self, *args, **kwargs): # before we start collecting, make sure that everything in our local directory # has a matching entry in the workload database # TODO pass super().initialize_collection(*args, **kwargs) def anp_command_handler(self, anp, command): """Handle inbound ANP commands from remote http engines.""" if command.command == ANP_COMMAND_COPY_FILE: anp.send_message(ANPCommandOK()) elif command.command == ANP_COMMAND_PROCESS: self.add_sql_work_item(command.target) anp.send_message(ANPCommandOK()) else: self.default_command_handler(anp, command) def get_next_stream(self): """Returns the next HTTP stream to be processed or None if nothing is available to be processed.""" # do we have a list yet? if len(self.stream_list) == 0: for file_name in os.listdir(self.bro_http_dir): m = REGEX_CONNECTION_ID.match(file_name) if m: self.stream_list.append(m.group(1)) if len(self.stream_list) == 0: return None return self.stream_list.popleft() def submit_stream(self, stream_prefix, node_id): # submit http request files logging.info("sending stream {}".format(stream_prefix)) source_files = [ os.path.join(self.bro_http_dir, '{}.request'.format(stream_prefix)), os.path.join(self.bro_http_dir, '{}.request.entity'.format(stream_prefix)), os.path.join(self.bro_http_dir, '{}.reply'.format(stream_prefix)), os.path.join(self.bro_http_dir, '{}.reply.entity'.format(stream_prefix)), os.path.join(self.bro_http_dir, '{}.ready'.format(stream_prefix)) ] sent_files = [] for source_file in source_files: if not os.path.exists(source_file): continue result = self.submit_command(ANPCommandCOPY_FILE(source_file, source_file), node_id) if result is None: # no servers available at the moment return False elif result.command == ANP_COMMAND_OK: sent_files.append(source_file) continue elif result.command == ANP_COMMAND_ERROR: raise RuntimeError("remote server returned error message: {}".fomrat(result.error_message)) else: raise ValueError("got unexpected command {}".format(result)) # tell the remote system to process the files result = self.submit_command(ANPCommandPROCESS(stream_prefix), node_id) if result is None: logging.warning("did not receive a response for PROCESS command on {}".format(stream_prefix)) return False elif result.command == ANP_COMMAND_OK: # if we get this far then all the files have been sent for sent_file in sent_files: try: logging.info("removing {}".format(sent_file)) os.remove(sent_file) except Exception as e: logging.error("unable to delete {}: {}".format(sent_file, e)) elif result.command == ANP_COMMAND_ERROR: logging.warning("remote server returned error message: {}".format(result.error_message)) return False else: logging.error("got unexpected command {}".format(result)) return False def collect_client_mode(self): while not self.collection_shutdown: # gather extracted http files and submit them to the server node stream_prefix = self.get_next_stream() if stream_prefix is None: # nothing to do right now... logging.debug("no streams available to send") return False # do we have an anp node to send data to? node_id = self.get_available_node() if node_id is None: logging.info("waiting for available ANP node...") return False try: self.submit_stream(stream_prefix, node_id) except Exception as e: logging.error("unable to submit stream {}: {}".format(stream_prefix, e)) report_exception() def collect_local_mode(self): # gather extracted files and just process them stream_prefix = self.get_next_stream() if stream_prefix: self.add_work_item(stream_prefix) return True return False def collect_server_mode(self): # in server mode we just process our local workload return MySQLCollectionEngine.collect(self) def process(self, stream_prefix): # process the .ready file # file format is as follows # # C7kebl1wNwKQ1qOPck.1.ready # time = 1537467014.49546 # interrupted = F # finish_msg = message ends normally # body_length = 433994 # content_gap_length = 0 # header_length = 494 # details = { HTTP_DETAILS_REQUEST: [], HTTP_DETAILS_REPLY: [], HTTP_DETAILS_READY: [], } base_path = os.path.join(self.bro_http_dir, stream_prefix) # the ready file contains stream summary info ready_path = '{}.ready'.format(base_path) # http request headers request_path = '{}.request'.format(base_path) # http request content (POST content for example) request_entity_path = '{}.request.entity'.format(base_path) # http response headers reply_path = '{}.reply'.format(base_path) # http response content reply_entity_path = '{}.reply.entity'.format(base_path) # make sure we have at least the files we expect (summary, and request headers) for path in [ ready_path, request_path ]: if not os.path.exists(path): logging.error("missing expected file {}".format(path)) return False # parse the ready file stream_time = None interrupted = False content_gap_length = 0 with open(ready_path, 'r') as fp: for line in fp: details[HTTP_DETAILS_READY].append(line.strip()) key, value = [_.strip() for _ in line.split(' = ')] if key == 'time': stream_time = datetime.datetime.fromtimestamp(float(value)) elif key == 'interrupted': interrupted = value == 'T' elif key == 'content_gap_length': content_gap_length = int(value) # parse the request request_headers = [] # of tuples of key, value request_headers_lookup = {} # key = key.lower() with open(request_path, 'r') as fp: request_ipv4 = fp.readline().strip() request_method = fp.readline().strip() request_original_uri = fp.readline().strip() request_unescaped_uri = fp.readline().strip() request_version = fp.readline().strip() logging.info("processing {} ipv4 {} method {} uri {}".format(stream_prefix, request_ipv4, request_method, request_original_uri)) details[HTTP_DETAILS_REQUEST].append(request_ipv4) details[HTTP_DETAILS_REQUEST].append(request_method) details[HTTP_DETAILS_REQUEST].append(request_original_uri) details[HTTP_DETAILS_REQUEST].append(request_unescaped_uri) details[HTTP_DETAILS_REQUEST].append(request_version) for line in fp: details[HTTP_DETAILS_REQUEST].append(line.strip()) key, value = [_.strip() for _ in line.split('\t')] request_headers.append((key, value)) request_headers_lookup[key.lower()] = value # parse the response if it exists reply_headers = [] # of tuples of key, value reply_headers_lookup = {} # key = key.lower() reply_version = None reply_code = None reply_reason = None reply_ipv4 = None reply_port = None if os.path.exists(reply_path): with open(reply_path, 'r') as fp: first_line = fp.readline() details[HTTP_DETAILS_REPLY].append(first_line) reply_ipv4, reply_port = [_.strip() for _ in first_line.split('\t')] reply_port = int(reply_port) reply_version = fp.readline().strip() reply_code = fp.readline().strip() reply_reason = fp.readline().strip() details[HTTP_DETAILS_REPLY].append(reply_version) details[HTTP_DETAILS_REPLY].append(reply_code) details[HTTP_DETAILS_REPLY].append(reply_reason) for line in fp: details[HTTP_DETAILS_REPLY].append(line.strip()) key, value = [_.strip() for _ in line.split('\t')] reply_headers.append((key, value)) reply_headers_lookup[key.lower()] = value self.root = RootAnalysis() self.root.uuid = str(uuid.uuid4()) self.root.storage_dir = os.path.join(self.collection_dir, self.root.uuid[0:3], self.root.uuid) self.root.initialize_storage() self.root.tool = 'ACE - Bro HTTP Scanner' self.root.tool_instance = self.hostname self.root.alert_type = 'http' self.root.description = 'BRO HTTP Scanner Detection - {} {}'.format(request_method, request_original_uri) self.root.event_time = datetime.datetime.now() if stream_time is None else stream_time self.root.details = details self.root.add_observable(F_IPV4, request_ipv4) if reply_ipv4: self.root.add_observable(F_IPV4, reply_ipv4) self.root.add_observable(F_IPV4_CONVERSATION, create_ipv4_conversation(request_ipv4, reply_ipv4)) if 'host' in request_headers_lookup: self.root.add_observable(F_FQDN, request_headers_lookup['host']) uri = request_original_uri[:] if 'host' in request_headers_lookup: # I don't think we'll ever see https here as that gets parsed as a different protocol in bro # we should only be seeing HTTP traffic uri = '{}://{}{}{}'.format('https' if reply_port == 443 else 'http', request_headers_lookup['host'], # if the default port is used then leave it out, otherwise include it in the url '' if reply_port == 80 else ':{}'.format(reply_port), uri) self.root.add_observable(F_URL, uri) if request_original_uri != request_unescaped_uri: uri = request_unescaped_uri[:] if 'host' in request_headers_lookup: uri = '{}:{}'.format(request_headers_lookup['host'], uri) self.root.add_observable(F_URL, uri) # move all the files into the work directory and add them as file observables shutil.move(ready_path, self.root.storage_dir) self.root.add_observable(F_FILE, os.path.basename(ready_path)) shutil.move(request_path, self.root.storage_dir) self.root.add_observable(F_FILE, os.path.basename(request_path)) if os.path.exists(request_entity_path): shutil.move(request_entity_path, self.root.storage_dir) self.root.add_observable(F_FILE, os.path.basename(request_entity_path)) if os.path.exists(reply_path): shutil.move(reply_path, self.root.storage_dir) self.root.add_observable(F_FILE, os.path.basename(reply_path)) if os.path.exists(reply_entity_path): shutil.move(reply_entity_path, self.root.storage_dir) self.root.add_observable(F_FILE, os.path.basename(reply_entity_path)) try: self.root.save() except Exception as e: logging.error("unable to save {}: {}".format(self.root, e)) report_exception() return False # has the destination host been whitelisted? try: if self.whitelist is None: self.whitelist = BrotexWhitelist(self.whitelist_path) self.whitelist.load_whitelist() else: self.whitelist.check_whitelist() if 'host' in request_headers_lookup and request_headers_lookup['host']: if self.whitelist.is_whitelisted_fqdn(request_headers_lookup['host']): logging.debug("stream {} whitelisted by fqdn {}".format(stream_prefix, request_headers_lookup['host'])) return except Exception as e: logging.error("whitelist check failed for {}: {}".format(stream_prefix, e)) report_exception() # now analyze the file try: self.analyze(self.root) except Exception as e: logging.error("analysis failed for {}: {}".format(path, e)) report_exception() def post_analysis(self, root): if self.should_alert(self.root): self.root.submit() self.cancel_analysis() def cleanup(self, work_item): if not self.root: return if self.root.delayed: return if not self.keep_work_dir: logging.debug("deleting {}".format(self.root.storage_dir)) self.root.delete()
def _create_analysis(url, reprocess, details, db, c): assert isinstance(url, str) assert isinstance(reprocess, bool) assert isinstance(details, dict) sha256_url = hash_url(url) if reprocess: # if we're reprocessing the url then we clear any existing analysis # IF the current analysis has completed # it's OK if we delete nothing here execute_with_retry("""DELETE FROM cloudphish_analysis_results WHERE sha256_url = UNHEX(%s) AND status = 'ANALYZED'""", (sha256_url,), commit=True) # if we're at this point it means that when we asked the database for an entry from cloudphish_analysis_results # it was empty, OR, we cleared existing analysis # however, we could have multiple requests coming in at the same time for the same url # so we need to take that into account here # first we'll generate our analysis uuid we're going to use _uuid = str(uuid.uuid4()) # so first we try to insert it try: execute_with_retry(db, c, ["""INSERT INTO cloudphish_analysis_results ( sha256_url, uuid, insert_date ) VALUES ( UNHEX(%s), %s, NOW() )""", """INSERT INTO cloudphish_url_lookup ( sha256_url, url ) VALUES ( UNHEX(%s), %s )"""], [(sha256_url, _uuid), (sha256_url, url)], commit=True) except pymysql.err.IntegrityError as e: # (<class 'pymysql.err.IntegrityError'>--(1062, "Duplicate entry # if we get a duplicate key entry here then it means that an entry was created between when we asked # and now if e.args[0] != 1062: raise e # so just return that one that was already created return get_cached_analysis(url) # at this point we've inserted an entry into cloudphish_analysis_results for this url # now at it's processing to the workload root = RootAnalysis() root.uuid = _uuid root.storage_dir = storage_dir_from_uuid(root.uuid) root.initialize_storage() root.analysis_mode = ANALYSIS_MODE_CLOUDPHISH # this is kind of a kludge but, # the company_id initially starts out as whatever the default is for this node # later, should the analysis turn into an alert, the company_id changes to whatever # is stored as the "d" field in the KEY_DETAILS_CONTEXT root.company_id = saq.COMPANY_ID root.tool = 'ACE - Cloudphish' root.tool_instance = saq.SAQ_NODE root.alert_type = ANALYSIS_TYPE_CLOUDPHISH root.description = 'ACE Cloudphish Detection - {}'.format(url) root.event_time = datetime.datetime.now() root.details = { KEY_DETAILS_URL: url, KEY_DETAILS_SHA256_URL: sha256_url, # this used to be configurable but it's always true now KEY_DETAILS_ALERTABLE: True, KEY_DETAILS_CONTEXT: details, # <-- optionally contains the source company_id } url_observable = root.add_observable(F_URL, url) if url_observable: url_observable.add_directive(DIRECTIVE_CRAWL) root.save() root.schedule() return get_cached_analysis(url)
def upload(uuid): validate_uuid(uuid) if KEY_UPLOAD_MODIFIERS not in request.values: abort(Response("missing key {} in request".format(KEY_UPLOAD_MODIFIERS), 400)) if KEY_ARCHIVE not in request.files: abort(Response("missing files key {}".format(KEY_ARCHIVE), 400)) upload_modifiers = json.loads(request.values[KEY_UPLOAD_MODIFIERS]) if not isinstance(upload_modifiers, dict): abort(Response("{} should be a dict".format(KEY_UPLOAD_MODIFIERS), 400)) overwrite = False if KEY_OVERWRITE in upload_modifiers: overwrite = upload_modifiers[KEY_OVERWRITE] if not isinstance(overwrite, bool): abort(Response("{} should be a boolean".format(KEY_OVERWRITE), 400)) sync = False if KEY_SYNC in upload_modifiers: sync = upload_modifiers[KEY_SYNC] if not isinstance(sync, bool): abort(Response("{} should be a boolean".format(KEY_SYNC), 400)) logging.info("requested upload for {}".format(uuid)) # does the target directory already exist? target_dir = storage_dir_from_uuid(uuid) if os.path.exists(target_dir): # are we over-writing it? if not overwrite: abort(Response("{} already exists (specify overwrite modifier to replace the data)".format(target_dir), 400)) # if we are overwriting the entry then we need to completely clear the # TODO implement this try: os.makedirs(target_dir) except Exception as e: logging.error("unable to create directory {}: {}".format(target_dir, e)) report_exception() abort(Response("unable to create directory {}: {}".format(target_dir, e), 400)) logging.debug("target directory for {} is {}".format(uuid, target_dir)) # save the tar file so we can extract it fp, tar_path = tempfile.mkstemp(suffix='.tar', prefix='upload_{}'.format(uuid), dir=saq.TEMP_DIR) os.close(fp) try: request.files[KEY_ARCHIVE].save(tar_path) t = tarfile.open(tar_path, 'r|') t.extractall(path=target_dir) logging.debug("extracted {} to {}".format(uuid, target_dir)) # update the root analysis to indicate it's new location root = RootAnalysis(storage_dir=target_dir) root.load() root.location = saq.SAQ_NODE root.company_id = saq.COMPANY_ID root.company_name = saq.COMPANY_NAME root.save() if sync: root.schedule() # looks like it worked return json_result({'result': True}) except Exception as e: logging.error("unable to upload {}: {}".format(uuid, e)) report_exception() abort(Response("unable to upload {}: {}".format(uuid, e))) finally: try: os.remove(tar_path) except Exception as e: logging.error("unable to remove {}: {}".format(tar_path,e ))
def post_smtp_analysis(self, root): from saq.modules.email import EmailAnalysis, SMTPStreamAnalysis, \ BrotexSMTPPackageAnalysis, \ KEY_ENVELOPES_MAIL_FROM, KEY_ENVELOPES_RCPT_TO # get the paths to the email scanning system #email_scanner_dir = saq.CONFIG['engine_email_scanner']['collection_dir'] email_scanner_dir = self.collection_dir # create a new analysis root for each email analysis we found for analysis in root.all_analysis: if not isinstance(analysis, EmailAnalysis) or not analysis.email: continue env_mail_from = None env_rcpt_to = None connection_id = None # the observable for this EmailAnalysis will be a file email_file = analysis.observable if email_file.type != F_FILE: logging.warning( "the observable for {} should be F_FILE but it is {}". format(analysis, email_file.type)) else: # this will be either an rfc822 file generated by the SMTPStreamAnalysis module # (which will have the envelope information) # OR it is a "broken stream" file, which does not stream_analysis = [ a for a in root.all_analysis if isinstance(a, SMTPStreamAnalysis) and email_file in a.observables ] if len(stream_analysis) > 1: logging.error("there should not be more than one of these") elif len(stream_analysis) == 1: stream_analysis = stream_analysis[0] logging.debug( "detected stream analysis for {}".format(email_file)) # get the MAIL FROM and RCPT TO from this if not analysis.env_mail_from: if email_file.value in stream_analysis.envelopes: analysis.env_mail_from = stream_analysis.envelopes[ email_file.value][KEY_ENVELOPES_MAIL_FROM] if not analysis.env_rcpt_to: if email_file.value in stream_analysis.envelopes: analysis.env_rcpt_to = stream_analysis.envelopes[ email_file.value][KEY_ENVELOPES_RCPT_TO] # get the original brotex package file that the stream came from stream_package = stream_analysis.observable # get the BrotexSMTPPackageAnalysis for this stream package so we can get the connection id package_analysis = [ a for a in root.all_analysis if isinstance(a, BrotexSMTPPackageAnalysis) and stream_package in a.observables ] if len(package_analysis) > 1: logging.error( "there should not be more than one of these!") elif len(package_analysis) == 1: package_analysis = package_analysis[0] connection_id = package_analysis.connection_id # if we could not find the stream, we will want to find the brotex smtp package so we can have the connection id package_analysis = [ a for a in root.all_analysis if isinstance(a, BrotexSMTPPackageAnalysis) and email_file in a.observables ] if len(package_analysis) > 1: logging.error( "there should not be more than one of these!") elif len(package_analysis) == 1: package_analysis = package_analysis[0] connection_id = package_analysis.connection_id subroot = RootAnalysis() subroot.company_name = root.company_name subroot.tool = root.tool subroot.tool_instance = root.tool_instance subroot.alert_type = root.alert_type subroot.description = 'Brotex SMTP Stream Detection - ' if analysis.decoded_subject: subroot.description += '{} '.format(analysis.decoded_subject) elif analysis.subject: subroot.description += '{} '.format(analysis.subject) else: subroot.description += '(no subject) ' if analysis.env_mail_from: subroot.description += 'From {} '.format( normalize_email_address(analysis.env_mail_from)) elif analysis.mail_from: subroot.description += 'From {} '.format( normalize_email_address(analysis.mail_from)) if analysis.env_rcpt_to: if len(analysis.env_rcpt_to) == 1: subroot.description += 'To {} '.format( analysis.env_rcpt_to[0]) else: subroot.description += 'To ({} recipients) '.format( len(analysis.env_rcpt_to)) elif analysis.mail_to: if isinstance(analysis.mail_to, list): # XXX I think this *has* to be a list if len(analysis.mail_to) == 1: subroot.description += 'To {} '.format( analysis.mail_to[0]) else: subroot.description += 'To ({} recipients) '.format( len(analysis.mail_to)) else: subroot.description += 'To {} '.format( analysis.mail_to) subroot.event_time = root.event_time subroot.details = analysis.details subroot.details['connection_id'] = connection_id subroot.uuid = str(uuid.uuid4()) # we use a temporary directory while we process the file subroot.storage_dir = os.path.join(email_scanner_dir, subroot.uuid[0:3], subroot.uuid) subroot.initialize_storage() # copy the original file src_path = os.path.join(root.storage_dir, analysis.observable.value) dest_path = os.path.join(subroot.storage_dir, analysis.observable.value) subroot.add_observable( F_FILE, os.path.relpath(dest_path, start=subroot.storage_dir)) # so the EmailAnalysis that will trigger on the RFC822 file (or whatever you have) # will *not* have the envelope headers # so we do that here in the main alert env_mail_from = None if analysis.env_mail_from: # this is to handle this: <*****@*****.**> SIZE=80280 # XXX assuming there can be no spaces in an email address env_mail_from = analysis.env_mail_from.split(' ', 1) env_mail_from = env_mail_from[0] # is this not the empty indicator? if env_mail_from != '<>': env_mail_from = normalize_email_address(env_mail_from) subroot.add_observable(F_EMAIL_ADDRESS, env_mail_from) if analysis.env_rcpt_to: for address in analysis.env_rcpt_to: address = normalize_email_address(address) if address: subroot.add_observable(F_EMAIL_ADDRESS, address) if env_mail_from: subroot.add_observable( F_EMAIL_CONVERSATION, create_email_conversation( env_mail_from, address)) try: subroot.save() except Exception as e: logging.error("unable to save {}: {}".format(alert, e)) report_exception() continue # TODO also add the stream and update any envelopment headers and stuff try: logging.debug("copying {} to {}".format(src_path, dest_path)) shutil.copy(src_path, dest_path) except Exception as e: logging.error("unable to copy {} to {}: {}".format( src_path, dest_path, e)) report_exception() continue # submit the path to the database of the email scanner for analysis try: submit_sql_work_item('EMAIL', subroot.storage_dir) except Exception as e: logging.error("unable to add work item: {}".format(e)) report_exception() continue
def post_http_analysis(self, root): from saq.modules.http import BrotexHTTPPackageAnalysis, \ KEY_TIME, \ KEY_SRC_IP, \ KEY_SRC_PORT, \ KEY_DEST_IP, \ KEY_DEST_PORT, \ KEY_METHOD, \ KEY_HOST, \ KEY_URI, \ KEY_REFERRER, \ KEY_USER_AGENT, \ KEY_STATUS_CODE, \ KEY_FILES # get the paths to the http scanning system #http_scanner_dir = saq.CONFIG['engine_http_scanner']['collection_dir'] http_scanner_dir = self.collection_dir analysis = None for a in root.all_analysis: if isinstance(a, BrotexHTTPPackageAnalysis) and a.requests: analysis = a break # this can happen if the request was whitelisted if analysis: for request in analysis.requests: subroot = RootAnalysis() subroot.company_name = root.company_name subroot.tool = root.tool subroot.tool_instance = root.tool_instance subroot.alert_type = root.alert_type subroot.description = "Brotex HTTP Stream Detection - " if request[KEY_HOST]: subroot.description += " {} ".format(request[KEY_HOST]) if request[KEY_DEST_IP]: subroot.description += " ({}) ".format( request[KEY_DEST_IP]) if request[KEY_URI]: # don't want to show all the fragments and query params try: parts = urlparse(request[KEY_URI]) subroot.description += parts.path except Exception as e: logging.warning("unable to parse {}: {}".format( request[KEY_URI], e)) subroot.description += request[KEY_URI] subroot.event_time = root.event_time subroot.details = request subroot.uuid = str(uuid.uuid4()) # we use a temporary directory while we process the file subroot.storage_dir = os.path.join(http_scanner_dir, subroot.uuid[0:3], subroot.uuid) subroot.initialize_storage() if request[KEY_SRC_IP]: subroot.add_observable(F_IPV4, request[KEY_SRC_IP]) if request[KEY_DEST_IP]: subroot.add_observable(F_IPV4, request[KEY_DEST_IP]) if request[KEY_SRC_IP] and request[KEY_DEST_IP]: subroot.add_observable( F_IPV4_CONVERSATION, create_ipv4_conversation(request[KEY_SRC_IP], request[KEY_DEST_IP])) if request[KEY_HOST]: subroot.add_observable(F_FQDN, request[KEY_HOST]) if request[KEY_URI]: subroot.add_observable(F_URL, request[KEY_URI]) if request[KEY_REFERRER]: subroot.add_observable(F_URL, request[KEY_REFERRER]) for file_path in request[KEY_FILES]: src_path = os.path.join(root.storage_dir, file_path) dest_path = os.path.join(subroot.storage_dir, os.path.basename(file_path)) try: shutil.copy(src_path, dest_path) except Exception as e: logging.error("unable to copy {} to {}: {}".format( src_path, dest_path, e)) report_exception() subroot.add_observable( F_FILE, os.path.basename(file_path)) # already relative try: subroot.save() except Exception as e: logging.error("unable to save {}: {}".format(alert, e)) report_exception() continue # submit the path to the database of the email scanner for analysis try: submit_sql_work_item( 'HTTP', subroot.storage_dir) # XXX hard coded constant except: # failure is already logged inside the call continue