def _get_cached_analysis(url, db, c): sha256 = hash_url(url) # have we already requested and/or processed this URL before? c.execute("""SELECT ar.status, ar.result, ar.http_result_code, ar.http_message, HEX(ar.sha256_content), cm.node, cm.name, ar.uuid FROM cloudphish_analysis_results AS ar LEFT JOIN cloudphish_content_metadata AS cm ON ar.sha256_content = cm.sha256_content WHERE sha256_url = UNHEX(%s)""", (sha256,)) row = c.fetchone() if row: status, result, http_result, http_message, sha256_content, node, file_name, uuid = row if file_name: file_name = file_name.decode('unicode_internal') storage_dir = storage_dir_from_uuid(uuid) root_details = None if os.path.exists(storage_dir): try: root = RootAnalysis(storage_dir=storage_dir_from_uuid(uuid)) root.load() root_details = root.details except Exception as e: # this isn't really an error -- another process may be in the middle of processing this url # the database contents should be correct though logging.debug("unable to load cloudphish analysis {}: {}".format(uuid, e)) #report_exception() # keep track of the most popular URLs # old URLs get cleaned out c.execute("UPDATE cloudphish_url_lookup SET last_lookup = NOW() WHERE sha256_url = UNHEX(%s)", (sha256,)) db.commit() return CloudphishAnalysisResult(RESULT_OK, # result root_details, # details status=status, analysis_result=result, http_result=http_result, http_message=http_message, sha256_content=sha256_content, sha256_url=sha256, location=node, file_name=file_name, uuid=uuid) # if we have not then we return None return None
def clear(uuid, lock_uuid, db, c): validate_uuid(uuid) validate_uuid(lock_uuid) # make sure this uuid is locked with with the given lock_uuid # this is less a security feature than it is a mistake-blocker :-) c.execute("SELECT uuid FROM locks WHERE uuid = %s AND lock_uuid = %s", (uuid, lock_uuid)) row = c.fetchone() if row is None: logging.warning("request to clear uuid {} with invalid lock uuid {}".format(uuid, lock_uuid)) abort(Response("nope", 400)) target_dir = storage_dir_from_uuid(uuid) if saq.CONFIG['engine']['work_dir'] and not os.path.isdir(target_dir): target_dir = workload_storage_dir(uuid) if not os.path.isdir(target_dir): logging.error("request to clear unknown target {}".format(target_dir)) abort(Response("unknown target {}".format(target_dir))) logging.info("received request to clear {} from {}".format(uuid, request.remote_addr)) try: shutil.rmtree(target_dir) except Exception as e: logging.error("unable to clear {}: {}".format(target_dir, e)) report_exception() abort(Response("clear failed")) # looks like it worked return json_result({'result': True})
def get_file(uuid, file_uuid_or_name): storage_dir = storage_dir_from_uuid(uuid) if saq.CONFIG['service_engine']['work_dir'] and not os.path.isdir(storage_dir): storage_dir = workload_storage_dir(uuid) root = RootAnalysis(storage_dir=storage_dir) root.load() # is this a UUID? try: validate_uuid(file_uuid_or_name) file_observable = root.get_observable(file_uuid_or_name) if file_observable is None: abort(Response("invalid file_uuid {}".format(file_uuid_or_name), 400)) except ValueError: file_observable = root.find_observable(lambda o: o.type == F_FILE and o.value == file_uuid_or_name) if file_observable is None: abort(Response("invalid file name {}".format(file_uuid_or_name), 400)) # NOTE we use an absolute path here because if we don't then # send_from_directory makes it relavive from the app root path # which is (/opt/ace/aceapi) target_path = os.path.join(saq.SAQ_HOME, root.storage_dir, file_observable.value) if not os.path.exists(target_path): abort(Response("file path {} does not exist".format(target_path), 400)) # XXX revisit how we save (name) files return send_from_directory(os.path.dirname(target_path), os.path.basename(target_path), as_attachment=True, attachment_filename=os.path.basename(target_path).encode().decode('latin-1', errors='ignore'))
def resubmit(uuid): try: root = RootAnalysis(storage_dir=storage_dir_from_uuid(uuid)) root.load() root.reset() root.schedule() return json_result({'result': 'success'}) except Exception as e: return json_result({'result': 'failed', 'error': str(e)})
def get_details(uuid, name): root = RootAnalysis(storage_dir=storage_dir_from_uuid(uuid)) root.load() # find the analysis with this name for analysis in root.all_analysis: if analysis.external_details_path == name: analysis.load() return json_result({'result': analysis.details}) abort(Response("invalid uuid or invalid details name", 400))
def get_analysis(uuid): storage_dir = storage_dir_from_uuid(uuid) if saq.CONFIG['engine']['work_dir'] and not os.path.isdir(storage_dir): storage_dir = workload_storage_dir(uuid) if not os.path.exists(storage_dir): abort(Response("invalid uuid {}".format(uuid), 400)) root = RootAnalysis(storage_dir=storage_dir) root.load() return json_result({'result': root.json})
def test_mailbox_submission(self): from flask import url_for from saq.analysis import _JSONEncoder from saq.modules.email import EmailAnalysis t = saq.LOCAL_TIMEZONE.localize(datetime.datetime.now()).astimezone(pytz.UTC).strftime(event_time_format_json_tz) with open(os.path.join('test_data', 'emails', 'splunk_logging.email.rfc822'), 'rb') as fp: result = self.client.post(url_for('analysis.submit'), data={ 'analysis': json.dumps({ 'analysis_mode': 'email', 'tool': 'unittest', 'tool_instance': 'unittest_instance', 'type': 'mailbox', 'description': 'testing', 'event_time': t, 'details': { }, 'observables': [ { 'type': F_FILE, 'value': 'rfc822.email', 'time': t, 'tags': [], 'directives': [ DIRECTIVE_ORIGINAL_EMAIL ], 'limited_analysis': [] }, ], 'tags': [ ], }, cls=_JSONEncoder), 'file': (fp, 'rfc822.email'), }, content_type='multipart/form-data') result = result.get_json() self.assertIsNotNone(result) self.assertTrue('result' in result) result = result['result'] self.assertIsNotNone(result['uuid']) uuid = result['uuid'] # make sure we don't clean up the anaysis so we can check it saq.CONFIG['analysis_mode_email']['cleanup'] = 'no' engine = TestEngine(local_analysis_modes=['email']) engine.enable_module('analysis_module_file_type', 'email') engine.enable_module('analysis_module_email_analyzer', 'email') engine.enable_module('analysis_module_mailbox_email_analyzer', 'email') engine.controlled_stop() engine.start() engine.wait() root = RootAnalysis(storage_dir=storage_dir_from_uuid(uuid)) root.load() observable = root.find_observable(lambda o: o.has_directive(DIRECTIVE_ORIGINAL_EMAIL)) self.assertIsNotNone(observable) analysis = observable.get_analysis(EmailAnalysis) self.assertIsNotNone(analysis) # these should be the same self.assertEquals(analysis.details, root.details)
def get_submission(uuid): storage_dir = storage_dir_from_uuid(uuid) if saq.CONFIG['service_engine']['work_dir'] and not os.path.isdir(storage_dir): storage_dir = workload_storage_dir(uuid) if not os.path.exists(storage_dir): abort(Response("invalid uuid {}".format(uuid), 400)) root = RootAnalysis(storage_dir=storage_dir) if root.submission is None: abort(Response("no submission data available", 404)) return json_result({'result': root.submission})
def test_bro_smtp_stream_submission(self): from flask import url_for from saq.analysis import _JSONEncoder from saq.modules.email import EmailAnalysis, BroSMTPStreamAnalysis t = saq.LOCAL_TIMEZONE.localize(datetime.datetime.now()).astimezone(pytz.UTC).strftime(event_time_format_json_tz) with open(os.path.join('test_data', 'smtp_streams', 'CBmtfvapmTMqCEUw6'), 'rb') as fp: result = self.client.post(url_for('analysis.submit'), data={ 'analysis': json.dumps({ 'analysis_mode': ANALYSIS_MODE_EMAIL, 'tool': 'unittest', 'tool_instance': 'unittest_instance', 'type': ANALYSIS_TYPE_BRO_SMTP, 'description': 'BRO SMTP Scanner Detection - ', 'event_time': t, 'details': { }, 'observables': [ { 'type': F_FILE, 'value': 'CBmtfvapmTMqCEUw6', 'time': t, 'tags': [], 'directives': [ DIRECTIVE_ORIGINAL_SMTP ], 'limited_analysis': [] }, ], 'tags': [ ], }, cls=_JSONEncoder), 'file': (fp, 'CBmtfvapmTMqCEUw6'), }, content_type='multipart/form-data') result = result.get_json() self.assertIsNotNone(result) self.assertTrue('result' in result) result = result['result'] self.assertIsNotNone(result['uuid']) uuid = result['uuid'] # make sure we don't clean up the anaysis so we can check it saq.CONFIG['analysis_mode_email']['cleanup'] = 'no' engine = TestEngine(local_analysis_modes=[ANALYSIS_MODE_EMAIL]) engine.enable_module('analysis_module_file_type', 'email') engine.enable_module('analysis_module_email_analyzer', 'email') engine.enable_module('analysis_module_bro_smtp_analyzer', 'email') engine.controlled_stop() engine.start() engine.wait() root = RootAnalysis(storage_dir=storage_dir_from_uuid(uuid)) root.load() observable = root.find_observable(lambda o: o.has_directive(DIRECTIVE_ORIGINAL_SMTP)) self.assertIsNotNone(observable) analysis = observable.get_analysis(BroSMTPStreamAnalysis) self.assertIsNotNone(analysis)
def get_details(uuid, name): storage_dir = storage_dir_from_uuid(uuid) if saq.CONFIG['engine']['work_dir'] and not os.path.isdir(storage_dir): storage_dir = workload_storage_dir(uuid) root = RootAnalysis(storage_dir=storage_dir) root.load() # find the analysis with this name for analysis in root.all_analysis: if analysis.external_details_path == name: #analysis.load() return json_result({'result': analysis.details}) abort(Response("invalid uuid or invalid details name", 400))
def create_root_analysis(tool=None, tool_instance=None, alert_type=None, desc=None, event_time=None, action_counts=None, details=None, name=None, remediation=None, state=None, uuid=None, location=None, storage_dir=None, company_name=None, company_id=None, analysis_mode=None): """Returns a default RootAnalysis object with expected values for testing.""" return RootAnalysis(tool=tool if tool else EV_ROOT_ANALYSIS_TOOL, tool_instance=tool_instance if tool_instance else EV_ROOT_ANALYSIS_TOOL_INSTANCE, alert_type=alert_type if alert_type else EV_ROOT_ANALYSIS_ALERT_TYPE, desc=desc if desc else EV_ROOT_ANALYSIS_DESCRIPTION, event_time=event_time if event_time else EV_TEST_DATE, action_counters=action_counts if action_counts else None, details=details if details else None, name=name if name else EV_ROOT_ANALYSIS_NAME, remediation=remediation if remediation else None, state=state if state else None, uuid=uuid if uuid else EV_ROOT_ANALYSIS_UUID, location=location if location else None, storage_dir=storage_dir if storage_dir else storage_dir_from_uuid(uuid if uuid else EV_ROOT_ANALYSIS_UUID), company_name=company_name if company_name else None, company_id=company_id if company_id else None, analysis_mode=analysis_mode if analysis_mode else 'test_groups')
def download(uuid): validate_uuid(uuid) target_dir = storage_dir_from_uuid(uuid) if not os.path.isdir(target_dir): logging.error( "request to download unknown target {}".format(target_dir)) abort(make_response("unknown target {}".format(target_dir), 400)) #abort(Response("unknown target {}".format(target_dir))) logging.info("received request to download {} to {}".format( uuid, request.remote_addr)) # create the tar file we're going to send back fp, path = tempfile.mkstemp(prefix="download_{}".format(uuid), suffix='.tar', dir=saq.TEMP_DIR) try: tar = tarfile.open(fileobj=os.fdopen(fp, 'wb'), mode='w|') tar.add(target_dir, '.') tar.close() os.lseek(fp, 0, os.SEEK_SET) def _iter_send(): while True: data = os.read(fp, io.DEFAULT_BUFFER_SIZE) if data == b'': raise StopIteration() yield data return Response(_iter_send(), mimetype='application/octet-stream') finally: try: os.remove(path) except: pass
def test_upload(self): # first create something to upload root = create_root_analysis(uuid=str(uuid.uuid4()), storage_dir=os.path.join( saq.TEMP_DIR, 'test_upload')) root.initialize_storage() root.details = {'hello': 'world'} with open(os.path.join(root.storage_dir, 'test.dat'), 'w') as fp: fp.write('test') file_observable = root.add_observable(F_FILE, 'test.dat') root.save() # create a tar file of the entire thing fp, tar_path = tempfile.mkstemp(suffix='.tar', prefix='upload_{}'.format(root.uuid), dir=saq.TEMP_DIR) tar = tarfile.open(fileobj=os.fdopen(fp, 'wb'), mode='w|') tar.add(root.storage_dir, '.') tar.close() # upload it with open(tar_path, 'rb') as fp: result = self.client.post(url_for('engine.upload', uuid=root.uuid), data={ 'upload_modifiers': json.dumps({ 'overwrite': False, 'sync': True, }), 'archive': (fp, os.path.basename(tar_path)) }) # make sure it uploaded root = RootAnalysis(storage_dir=storage_dir_from_uuid(root.uuid)) root.load() self.assertEquals(root.details, {'hello': 'world'})
def submit(): if KEY_ANALYSIS not in request.values: abort( Response( "missing {} field (see documentation)".format(KEY_ANALYSIS), 400)) r = json.loads(request.values[KEY_ANALYSIS]) # the specified company needs to match the company of this node # TODO eventually we'll have a single node that serves API to all configured companies if KEY_COMPANY_NAME in r and r[KEY_COMPANY_NAME] != saq.CONFIG['global'][ 'company_name']: abort( Response( "wrong company {} (are you sending to the correct system?)". format(r[KEY_COMPANY_NAME]), 400)) if KEY_DESCRIPTION not in r: abort( Response("missing {} field in submission".format(KEY_DESCRIPTION), 400)) root = RootAnalysis() root.uuid = str(uuid.uuid4()) # does the engine use a different drive for the workload? analysis_mode = r[ KEY_ANALYSIS_MODE] if KEY_ANALYSIS_MODE in r else saq.CONFIG['engine'][ 'default_analysis_mode'] if analysis_mode != ANALYSIS_MODE_CORRELATION: root.storage_dir = workload_storage_dir(root.uuid) else: root.storage_dir = storage_dir_from_uuid(root.uuid) root.initialize_storage() try: root.analysis_mode = r[ KEY_ANALYSIS_MODE] if KEY_ANALYSIS_MODE in r else saq.CONFIG[ 'engine']['default_analysis_mode'] root.company_id = saq.CONFIG['global'].getint('company_id') root.tool = r[KEY_TOOL] if KEY_TOOL in r else 'api' root.tool_instance = r[ KEY_TOOL_INSTANCE] if KEY_TOOL_INSTANCE in r else 'api({})'.format( request.remote_addr) root.alert_type = r[KEY_TYPE] if KEY_TYPE in r else saq.CONFIG['api'][ 'default_alert_type'] root.description = r[KEY_DESCRIPTION] root.event_time = LOCAL_TIMEZONE.localize(datetime.datetime.now()) if KEY_EVENT_TIME in r: try: root.event_time = parse_event_time(r[KEY_EVENT_TIME]) except ValueError as e: abort( Response( "invalid event time format for {} (use {} format)". format(r[KEY_EVENT_TIME], event_time_format_json_tz), 400)) root.details = r[KEY_DETAILS] if KEY_DETAILS in r else {} # go ahead and allocate storage # XXX use temp dir instead... if KEY_TAGS in r: for tag in r[KEY_TAGS]: root.add_tag(tag) # add the observables if KEY_OBSERVABLES in r: for o in r[KEY_OBSERVABLES]: # check for required fields for field in [KEY_O_TYPE, KEY_O_VALUE]: if field not in o: abort( Response( "an observable is missing the {} field".format( field), 400)) o_type = o[KEY_O_TYPE] o_value = o[KEY_O_VALUE] o_time = None if KEY_O_TIME in o: try: o_time = parse_event_time(o[KEY_O_TIME]) except ValueError: abort( Response( "an observable has an invalid time format {} (use {} format)" .format(o[KEY_O_TIME], event_time_format_json_tz), 400)) observable = root.add_observable(o_type, o_value, o_time=o_time) if KEY_O_TAGS in o: for tag in o[KEY_O_TAGS]: observable.add_tag(tag) if KEY_O_DIRECTIVES in o: for directive in o[KEY_O_DIRECTIVES]: # is this a valid directive? if directive not in VALID_DIRECTIVES: abort( Response( "observable {} has invalid directive {} (choose from {})" .format('{}:{}'.format(o_type, o_value), directive, ','.join(VALID_DIRECTIVES)), 400)) observable.add_directive(directive) if KEY_O_LIMITED_ANALYSIS in o: for module_name in o[KEY_O_LIMITED_ANALYSIS]: observable.limit_analysis(module_name) # save the files to disk and add them as observables of type file for f in request.files.getlist('file'): logging.debug("recording file {}".format(f.filename)) #temp_dir = tempfile.mkdtemp(dir=saq.CONFIG.get('api', 'incoming_dir')) #_path = os.path.join(temp_dir, secure_filename(f.filename)) try: #if os.path.exists(_path): #logging.error("duplicate file name {}".format(_path)) #abort(400) #logging.debug("saving file to {}".format(_path)) #try: #f.save(_path) #except Exception as e: #logging.error("unable to save file to {}: {}".format(_path, e)) #abort(400) full_path = os.path.join(root.storage_dir, f.filename) try: dest_dir = os.path.dirname(full_path) if not os.path.isdir(dest_dir): try: os.makedirs(dest_dir) except Exception as e: logging.error( "unable to create directory {}: {}".format( dest_dir, e)) abort(400) logging.debug("saving file {}".format(full_path)) f.save(full_path) # add this as a F_FILE type observable root.add_observable( F_FILE, os.path.relpath(full_path, start=root.storage_dir)) except Exception as e: logging.error( "unable to copy file from {} to {} for root {}: {}". format(_path, full_path, root, e)) abort(400) except Exception as e: logging.error("unable to deal with file {}: {}".format(f, e)) report_exception() abort(400) #finally: #try: #shutil.rmtree(temp_dir) #except Exception as e: #logging.error("unable to delete temp dir {}: {}".format(temp_dir, e)) try: if not root.save(): logging.error("unable to save analysis") abort( Response( "an error occured trying to save the alert - review the logs", 400)) # if we received a submission for correlation mode then we go ahead and add it to the database if root.analysis_mode == ANALYSIS_MODE_CORRELATION: ALERT(root) # add this analysis to the workload root.schedule() except Exception as e: logging.error("unable to sync to database: {}".format(e)) report_exception() abort( Response( "an error occured trying to save the alert - review the logs", 400)) return json_result({'result': {'uuid': root.uuid}}) except Exception as e: logging.error("error processing submit: {}".format(e)) report_exception() try: if os.path.isdir(root.storage_dir): logging.info("removing failed submit dir {}".format( root.storage_dir)) shutil.rmtree(root.storage_dir) except Exception as e2: logging.error("unable to delete failed submit dir {}: {}".format( root.storage_dir, e)) raise e
def get_analysis(uuid): root = RootAnalysis(storage_dir=storage_dir_from_uuid(uuid)) root.load() return json_result({'result': root.json})
def get_status(uuid): try: validate_uuid(uuid) except ValueError as e: abort(Response(str(e), 400)) storage_dir = storage_dir_from_uuid(uuid) if saq.CONFIG['engine']['work_dir'] and not os.path.isdir(storage_dir): storage_dir = workload_storage_dir(uuid) if not os.path.exists(storage_dir): abort(Response("invalid uuid {}".format(uuid), 400)) result = { 'workload': None, 'delayed_analysis': [], 'locks': None, 'alert': None } with get_db_connection() as db: c = db.cursor() # is this still in the workload? c.execute( """ SELECT id, uuid, node_id, analysis_mode, insert_date FROM workload WHERE uuid = %s """, (uuid, )) row = c.fetchone() if row is not None: result['workload'] = { 'id': row[0], 'uuid': row[1], 'node_id': row[2], 'analysis_mode': row[3], 'insert_date': row[4] } # is this an alert? c.execute( """ SELECT id, uuid, location, insert_date, storage_dir, disposition, disposition_time, detection_count FROM alerts WHERE uuid = %s """, (uuid, )) row = c.fetchone() if row is not None: result['alert'] = { 'id': row[0], 'uuid': row[1], 'location': row[2], 'insert_date': row[3], 'storage_dir': row[4], 'disposition': row[5], 'disposition_time': row[6], 'detection_count': row[7] } # is there any delayed analysis scheduled for it? c.execute( """ SELECT id, uuid, observable_uuid, analysis_module, insert_date, delayed_until, node_id FROM delayed_analysis WHERE uuid = %s ORDER BY delayed_until """, (uuid, )) for row in c: result['delayed_analysis'].append({ 'id': row[0], 'uuid': row[1], 'observable_uuid': row[2], 'analysis_module': row[3], 'insert_date': row[4], 'delayed_until': row[5], 'node_id': row[6] }) # are there any locks on it? c.execute( """ SELECT uuid, lock_uuid, lock_time, lock_owner FROM locks WHERE uuid = %s """, (uuid, )) row = c.fetchone() if row is not None: result['locks'] = { 'uuid': row[0], 'lock_uuid': row[1], 'lock_time': row[2], 'lock_owner': row[3] } return json_result({'result': result})
def upload(uuid): validate_uuid(uuid) if KEY_UPLOAD_MODIFIERS not in request.values: abort(Response("missing key {} in request".format(KEY_UPLOAD_MODIFIERS), 400)) if KEY_ARCHIVE not in request.files: abort(Response("missing files key {}".format(KEY_ARCHIVE), 400)) upload_modifiers = json.loads(request.values[KEY_UPLOAD_MODIFIERS]) if not isinstance(upload_modifiers, dict): abort(Response("{} should be a dict".format(KEY_UPLOAD_MODIFIERS), 400)) overwrite = False if KEY_OVERWRITE in upload_modifiers: overwrite = upload_modifiers[KEY_OVERWRITE] if not isinstance(overwrite, bool): abort(Response("{} should be a boolean".format(KEY_OVERWRITE), 400)) sync = False if KEY_SYNC in upload_modifiers: sync = upload_modifiers[KEY_SYNC] if not isinstance(sync, bool): abort(Response("{} should be a boolean".format(KEY_SYNC), 400)) logging.info("requested upload for {}".format(uuid)) # does the target directory already exist? target_dir = storage_dir_from_uuid(uuid) if os.path.exists(target_dir): # are we over-writing it? if not overwrite: abort(Response("{} already exists (specify overwrite modifier to replace the data)".format(target_dir), 400)) # if we are overwriting the entry then we need to completely clear the # TODO implement this try: os.makedirs(target_dir) except Exception as e: logging.error("unable to create directory {}: {}".format(target_dir, e)) report_exception() abort(Response("unable to create directory {}: {}".format(target_dir, e), 400)) logging.debug("target directory for {} is {}".format(uuid, target_dir)) # save the tar file so we can extract it fp, tar_path = tempfile.mkstemp(suffix='.tar', prefix='upload_{}'.format(uuid), dir=saq.TEMP_DIR) os.close(fp) try: request.files[KEY_ARCHIVE].save(tar_path) t = tarfile.open(tar_path, 'r|') t.extractall(path=target_dir) logging.debug("extracted {} to {}".format(uuid, target_dir)) # update the root analysis to indicate it's new location root = RootAnalysis(storage_dir=target_dir) root.load() root.location = saq.SAQ_NODE root.company_id = saq.COMPANY_ID root.company_name = saq.COMPANY_NAME root.save() if sync: root.schedule() # looks like it worked return json_result({'result': True}) except Exception as e: logging.error("unable to upload {}: {}".format(uuid, e)) report_exception() abort(Response("unable to upload {}: {}".format(uuid, e))) finally: try: os.remove(tar_path) except Exception as e: logging.error("unable to remove {}: {}".format(tar_path,e ))
def _create_analysis(url, reprocess, details, db, c): assert isinstance(url, str) assert isinstance(reprocess, bool) assert isinstance(details, dict) sha256_url = hash_url(url) if reprocess: # if we're reprocessing the url then we clear any existing analysis # IF the current analysis has completed # it's OK if we delete nothing here execute_with_retry("""DELETE FROM cloudphish_analysis_results WHERE sha256_url = UNHEX(%s) AND status = 'ANALYZED'""", (sha256_url,), commit=True) # if we're at this point it means that when we asked the database for an entry from cloudphish_analysis_results # it was empty, OR, we cleared existing analysis # however, we could have multiple requests coming in at the same time for the same url # so we need to take that into account here # first we'll generate our analysis uuid we're going to use _uuid = str(uuid.uuid4()) # so first we try to insert it try: execute_with_retry(db, c, ["""INSERT INTO cloudphish_analysis_results ( sha256_url, uuid, insert_date ) VALUES ( UNHEX(%s), %s, NOW() )""", """INSERT INTO cloudphish_url_lookup ( sha256_url, url ) VALUES ( UNHEX(%s), %s )"""], [(sha256_url, _uuid), (sha256_url, url)], commit=True) except pymysql.err.IntegrityError as e: # (<class 'pymysql.err.IntegrityError'>--(1062, "Duplicate entry # if we get a duplicate key entry here then it means that an entry was created between when we asked # and now if e.args[0] != 1062: raise e # so just return that one that was already created return get_cached_analysis(url) # at this point we've inserted an entry into cloudphish_analysis_results for this url # now at it's processing to the workload root = RootAnalysis() root.uuid = _uuid root.storage_dir = storage_dir_from_uuid(root.uuid) root.initialize_storage() root.analysis_mode = ANALYSIS_MODE_CLOUDPHISH # this is kind of a kludge but, # the company_id initially starts out as whatever the default is for this node # later, should the analysis turn into an alert, the company_id changes to whatever # is stored as the "d" field in the KEY_DETAILS_CONTEXT root.company_id = saq.COMPANY_ID root.tool = 'ACE - Cloudphish' root.tool_instance = saq.SAQ_NODE root.alert_type = ANALYSIS_TYPE_CLOUDPHISH root.description = 'ACE Cloudphish Detection - {}'.format(url) root.event_time = datetime.datetime.now() root.details = { KEY_DETAILS_URL: url, KEY_DETAILS_SHA256_URL: sha256_url, # this used to be configurable but it's always true now KEY_DETAILS_ALERTABLE: True, KEY_DETAILS_CONTEXT: details, # <-- optionally contains the source company_id } url_observable = root.add_observable(F_URL, url) if url_observable: url_observable.add_directive(DIRECTIVE_CRAWL) root.save() root.schedule() return get_cached_analysis(url)
def test_complete_processing(self): from saq.modules.email import BroSMTPStreamAnalysis # disable cleanup so we can check the results after saq.CONFIG['analysis_mode_email']['cleanup'] = 'no' self.process_pcap(os.path.join(saq.SAQ_HOME, 'test_data', 'pcaps', 'smtp.pcap')) self.start_api_server() engine = TestEngine() engine.enable_module('analysis_module_bro_smtp_analyzer', 'email') engine.start() collector = BroSMTPStreamCollector() collector.load_groups() collector.start() # look for all the expected log entries wait_for_log_count('found smtp stream', 1, 5) wait_for_log_count('copied file from', 1, 5) wait_for_log_count('scheduled BRO SMTP Scanner Detection -', 1, 5) wait_for_log_count('completed analysis RootAnalysis', 1, 20) engine.controlled_stop() engine.wait() collector.stop() collector.wait() # get the uuids returned by the api calls r = re.compile(r' uuid ([a-f0-9-]+)') for result in search_log('submit remote'): m = r.search(result.getMessage()) self.assertIsNotNone(m) uuid = m.group(1) with self.subTest(uuid=uuid): root = RootAnalysis(uuid=uuid, storage_dir=storage_dir_from_uuid(uuid)) root.load() # find the SMTP stream file_observable = root.find_observable(lambda x: x.type == F_FILE) self.assertTrue(bool(file_observable)) # ensure it has the required directives self.assertTrue(file_observable.has_directive(DIRECTIVE_ORIGINAL_SMTP)) self.assertTrue(file_observable.has_directive(DIRECTIVE_NO_SCAN)) # ensure the bro smtp analyzer ran on it smtp_analysis = file_observable.get_analysis(BroSMTPStreamAnalysis) self.assertIsNotNone(smtp_analysis) # ensure it extracted a file email_observable = smtp_analysis.find_observable(lambda x: x.type == F_FILE) self.assertTrue(bool(email_observable)) # and then ensure that it was treated as an email #import pdb; pdb.set_trace() self.assertTrue(email_observable.has_directive(DIRECTIVE_NO_SCAN)) self.assertTrue(email_observable.has_directive(DIRECTIVE_ORIGINAL_EMAIL)) self.assertTrue(email_observable.has_directive(DIRECTIVE_ARCHIVE))
def test_bro_http_submission(self): saq.CONFIG['analysis_mode_http']['cleanup'] = 'no' from flask import url_for from saq.analysis import _JSONEncoder from saq.modules.email import EmailAnalysis t = saq.LOCAL_TIMEZONE.localize(datetime.datetime.now()).astimezone( pytz.UTC).strftime(event_time_format_json_tz) ready_fp = open( os.path.join('test_data', 'http_streams', 'CZZiJd1zicZKNMMrV1.0.ready'), 'rb') reply_fp = open( os.path.join('test_data', 'http_streams', 'CZZiJd1zicZKNMMrV1.0.reply'), 'rb') reply_entity_fp = open( os.path.join('test_data', 'http_streams', 'CZZiJd1zicZKNMMrV1.0.reply.entity'), 'rb') request_fp = open( os.path.join('test_data', 'http_streams', 'CZZiJd1zicZKNMMrV1.0.request'), 'rb') result = self.client.post( url_for('analysis.submit'), data={ 'analysis': json.dumps( { 'analysis_mode': ANALYSIS_MODE_HTTP, 'tool': 'unittest', 'tool_instance': 'unittest_instance', 'type': ANALYSIS_TYPE_BRO_HTTP, 'description': 'BRO HTTP Scanner Detection - {}'.format( 'CZZiJd1zicZKNMMrV1.0'), 'event_time': t, 'details': {}, 'observables': [ { 'type': F_FILE, 'value': 'CZZiJd1zicZKNMMrV1.0.ready' }, { 'type': F_FILE, 'value': 'CZZiJd1zicZKNMMrV1.0.reply' }, { 'type': F_FILE, 'value': 'CZZiJd1zicZKNMMrV1.0.reply.entity' }, { 'type': F_FILE, 'value': 'CZZiJd1zicZKNMMrV1.0.request' }, ], 'tags': [], }, cls=_JSONEncoder), 'file': [ (ready_fp, 'CZZiJd1zicZKNMMrV1.0.ready'), (reply_fp, 'CZZiJd1zicZKNMMrV1.0.reply'), (reply_entity_fp, 'CZZiJd1zicZKNMMrV1.0.reply.entity'), (request_fp, 'CZZiJd1zicZKNMMrV1.0.request'), ], }, content_type='multipart/form-data') ready_fp.close() reply_fp.close() reply_entity_fp.close() request_fp.close() result = result.get_json() self.assertIsNotNone(result) self.assertTrue('result' in result) result = result['result'] self.assertIsNotNone(result['uuid']) uuid = result['uuid'] # make sure we have a job ready engine = TestEngine(analysis_pools={ANALYSIS_MODE_HTTP: 1}, local_analysis_modes=[ANALYSIS_MODE_HTTP]) engine.enable_module('analysis_module_bro_http_analyzer', ANALYSIS_MODE_HTTP) engine.controlled_stop() engine.start() engine.wait() root = RootAnalysis(storage_dir=storage_dir_from_uuid(uuid)) root.load() self.verify(root)