def test_detections_000_ole(self): submissions = {} # key = storage_dir, value = path to file for file_name in os.listdir(OFFICE_SAMPLES): source_path = os.path.join(OFFICE_SAMPLES, file_name) root = create_root_analysis(uuid=str(uuid.uuid4())) root.initialize_storage() shutil.copy(source_path, root.storage_dir) root.add_observable(F_FILE, file_name) root.save() root.schedule() submissions[root.storage_dir] = source_path engine = TestEngine() engine.enable_module('analysis_module_archive') engine.enable_module('analysis_module_file_type') engine.enable_module('analysis_module_olevba_v1_1') engine.enable_module('analysis_module_officeparser_v1_0') engine.enable_module('analysis_module_yara_scanner_v3_4') engine.controlled_stop() engine.start() engine.wait() for storage_dir in submissions: with self.subTest(storage_dir=storage_dir, source_path=submissions[storage_dir]): root = RootAnalysis() root.storage_dir = storage_dir root.load() detections = root.all_detection_points self.assertGreater(len(detections), 0)
def submit(): if KEY_ANALYSIS not in request.values: abort( Response( "missing {} field (see documentation)".format(KEY_ANALYSIS), 400)) r = json.loads(request.values[KEY_ANALYSIS]) # the specified company needs to match the company of this node # TODO eventually we'll have a single node that serves API to all configured companies if KEY_COMPANY_NAME in r and r[KEY_COMPANY_NAME] != saq.CONFIG['global'][ 'company_name']: abort( Response( "wrong company {} (are you sending to the correct system?)". format(r[KEY_COMPANY_NAME]), 400)) if KEY_DESCRIPTION not in r: abort( Response("missing {} field in submission".format(KEY_DESCRIPTION), 400)) root = RootAnalysis() root.uuid = str(uuid.uuid4()) # does the engine use a different drive for the workload? analysis_mode = r[ KEY_ANALYSIS_MODE] if KEY_ANALYSIS_MODE in r else saq.CONFIG['engine'][ 'default_analysis_mode'] if analysis_mode != ANALYSIS_MODE_CORRELATION: root.storage_dir = workload_storage_dir(root.uuid) else: root.storage_dir = storage_dir_from_uuid(root.uuid) root.initialize_storage() try: root.analysis_mode = r[ KEY_ANALYSIS_MODE] if KEY_ANALYSIS_MODE in r else saq.CONFIG[ 'engine']['default_analysis_mode'] root.company_id = saq.CONFIG['global'].getint('company_id') root.tool = r[KEY_TOOL] if KEY_TOOL in r else 'api' root.tool_instance = r[ KEY_TOOL_INSTANCE] if KEY_TOOL_INSTANCE in r else 'api({})'.format( request.remote_addr) root.alert_type = r[KEY_TYPE] if KEY_TYPE in r else saq.CONFIG['api'][ 'default_alert_type'] root.description = r[KEY_DESCRIPTION] root.event_time = LOCAL_TIMEZONE.localize(datetime.datetime.now()) if KEY_EVENT_TIME in r: try: root.event_time = parse_event_time(r[KEY_EVENT_TIME]) except ValueError as e: abort( Response( "invalid event time format for {} (use {} format)". format(r[KEY_EVENT_TIME], event_time_format_json_tz), 400)) root.details = r[KEY_DETAILS] if KEY_DETAILS in r else {} # go ahead and allocate storage # XXX use temp dir instead... if KEY_TAGS in r: for tag in r[KEY_TAGS]: root.add_tag(tag) # add the observables if KEY_OBSERVABLES in r: for o in r[KEY_OBSERVABLES]: # check for required fields for field in [KEY_O_TYPE, KEY_O_VALUE]: if field not in o: abort( Response( "an observable is missing the {} field".format( field), 400)) o_type = o[KEY_O_TYPE] o_value = o[KEY_O_VALUE] o_time = None if KEY_O_TIME in o: try: o_time = parse_event_time(o[KEY_O_TIME]) except ValueError: abort( Response( "an observable has an invalid time format {} (use {} format)" .format(o[KEY_O_TIME], event_time_format_json_tz), 400)) observable = root.add_observable(o_type, o_value, o_time=o_time) if KEY_O_TAGS in o: for tag in o[KEY_O_TAGS]: observable.add_tag(tag) if KEY_O_DIRECTIVES in o: for directive in o[KEY_O_DIRECTIVES]: # is this a valid directive? if directive not in VALID_DIRECTIVES: abort( Response( "observable {} has invalid directive {} (choose from {})" .format('{}:{}'.format(o_type, o_value), directive, ','.join(VALID_DIRECTIVES)), 400)) observable.add_directive(directive) if KEY_O_LIMITED_ANALYSIS in o: for module_name in o[KEY_O_LIMITED_ANALYSIS]: observable.limit_analysis(module_name) # save the files to disk and add them as observables of type file for f in request.files.getlist('file'): logging.debug("recording file {}".format(f.filename)) #temp_dir = tempfile.mkdtemp(dir=saq.CONFIG.get('api', 'incoming_dir')) #_path = os.path.join(temp_dir, secure_filename(f.filename)) try: #if os.path.exists(_path): #logging.error("duplicate file name {}".format(_path)) #abort(400) #logging.debug("saving file to {}".format(_path)) #try: #f.save(_path) #except Exception as e: #logging.error("unable to save file to {}: {}".format(_path, e)) #abort(400) full_path = os.path.join(root.storage_dir, f.filename) try: dest_dir = os.path.dirname(full_path) if not os.path.isdir(dest_dir): try: os.makedirs(dest_dir) except Exception as e: logging.error( "unable to create directory {}: {}".format( dest_dir, e)) abort(400) logging.debug("saving file {}".format(full_path)) f.save(full_path) # add this as a F_FILE type observable root.add_observable( F_FILE, os.path.relpath(full_path, start=root.storage_dir)) except Exception as e: logging.error( "unable to copy file from {} to {} for root {}: {}". format(_path, full_path, root, e)) abort(400) except Exception as e: logging.error("unable to deal with file {}: {}".format(f, e)) report_exception() abort(400) #finally: #try: #shutil.rmtree(temp_dir) #except Exception as e: #logging.error("unable to delete temp dir {}: {}".format(temp_dir, e)) try: if not root.save(): logging.error("unable to save analysis") abort( Response( "an error occured trying to save the alert - review the logs", 400)) # if we received a submission for correlation mode then we go ahead and add it to the database if root.analysis_mode == ANALYSIS_MODE_CORRELATION: ALERT(root) # add this analysis to the workload root.schedule() except Exception as e: logging.error("unable to sync to database: {}".format(e)) report_exception() abort( Response( "an error occured trying to save the alert - review the logs", 400)) return json_result({'result': {'uuid': root.uuid}}) except Exception as e: logging.error("error processing submit: {}".format(e)) report_exception() try: if os.path.isdir(root.storage_dir): logging.info("removing failed submit dir {}".format( root.storage_dir)) shutil.rmtree(root.storage_dir) except Exception as e2: logging.error("unable to delete failed submit dir {}: {}".format( root.storage_dir, e)) raise e
def _create_analysis(url, reprocess, details, db, c): assert isinstance(url, str) assert isinstance(reprocess, bool) assert isinstance(details, dict) sha256_url = hash_url(url) if reprocess: # if we're reprocessing the url then we clear any existing analysis # IF the current analysis has completed # it's OK if we delete nothing here execute_with_retry("""DELETE FROM cloudphish_analysis_results WHERE sha256_url = UNHEX(%s) AND status = 'ANALYZED'""", (sha256_url,), commit=True) # if we're at this point it means that when we asked the database for an entry from cloudphish_analysis_results # it was empty, OR, we cleared existing analysis # however, we could have multiple requests coming in at the same time for the same url # so we need to take that into account here # first we'll generate our analysis uuid we're going to use _uuid = str(uuid.uuid4()) # so first we try to insert it try: execute_with_retry(db, c, ["""INSERT INTO cloudphish_analysis_results ( sha256_url, uuid, insert_date ) VALUES ( UNHEX(%s), %s, NOW() )""", """INSERT INTO cloudphish_url_lookup ( sha256_url, url ) VALUES ( UNHEX(%s), %s )"""], [(sha256_url, _uuid), (sha256_url, url)], commit=True) except pymysql.err.IntegrityError as e: # (<class 'pymysql.err.IntegrityError'>--(1062, "Duplicate entry # if we get a duplicate key entry here then it means that an entry was created between when we asked # and now if e.args[0] != 1062: raise e # so just return that one that was already created return get_cached_analysis(url) # at this point we've inserted an entry into cloudphish_analysis_results for this url # now at it's processing to the workload root = RootAnalysis() root.uuid = _uuid root.storage_dir = storage_dir_from_uuid(root.uuid) root.initialize_storage() root.analysis_mode = ANALYSIS_MODE_CLOUDPHISH # this is kind of a kludge but, # the company_id initially starts out as whatever the default is for this node # later, should the analysis turn into an alert, the company_id changes to whatever # is stored as the "d" field in the KEY_DETAILS_CONTEXT root.company_id = saq.COMPANY_ID root.tool = 'ACE - Cloudphish' root.tool_instance = saq.SAQ_NODE root.alert_type = ANALYSIS_TYPE_CLOUDPHISH root.description = 'ACE Cloudphish Detection - {}'.format(url) root.event_time = datetime.datetime.now() root.details = { KEY_DETAILS_URL: url, KEY_DETAILS_SHA256_URL: sha256_url, # this used to be configurable but it's always true now KEY_DETAILS_ALERTABLE: True, KEY_DETAILS_CONTEXT: details, # <-- optionally contains the source company_id } url_observable = root.add_observable(F_URL, url) if url_observable: url_observable.add_directive(DIRECTIVE_CRAWL) root.save() root.schedule() return get_cached_analysis(url)
def execute_analysis(self, url): analysis = url.get_analysis(CloudphishAnalysis) if analysis is None: analysis = self.create_analysis(url) try: parsed_url = urlparse(url.value) if parsed_url.hostname and '.' not in parsed_url.hostname: logging.debug("ignoring invalid FQDN {} in url {}".format( parsed_url.hostname, url.value)) return False # only analyze http, https and ftp schemes if parsed_url.scheme not in ['http', 'https', 'ftp']: logging.debug( "{} is not a supported scheme for cloudphish".format( parsed_url.scheme)) return False except: pass # start the clock if analysis.query_start is None: analysis.query_start = int(time.time()) #else: ## or has the clock expired? #if int(time.time()) - analysis.query_start > self.query_timeout: #logging.warning("cloudphish query for {} has timed out".format(url.value)) #analysis.result = RESULT_ERROR #analysis.result_details = 'QUERY TIMED OUT' #return # do we have a local cache result for this url? sha256_url = hash_url(url.value) cache_dir = os.path.join(self.local_cache_dir, sha256_url[0:2]) cache_path = os.path.join(cache_dir, sha256_url) alert_cache_path = '{}.ace.tar.gz'.format(cache_path) used_cache = False json_result = None # XXX need to fix this correctly #if os.path.exists(cache_path): if False: logging.debug("using local cache results for {}".format(url.value)) try: with open(cache_path, 'r') as fp: json_result = json.load(fp) used_cache = True except Exception as e: logging.warning( "unable to load local cache result for {} from {}: {}". format(url.value, cache_path, e)) #report_exception() else: logging.debug("making cloudphish query for {}".format(url.value)) try: response = requests.request( 'POST', self.get_submit_url(), params={ 'url': url.value, 'c': self.root.uuid, # context 'i': self.root.company_name if self.root.company_name else saq.CONFIG['global']['company_name'], 'd': self.root.company_id if self.root.company_id else saq.CONFIG['global'].getint('company_id'), 'a': '1' if self.generate_alert else '0', 's': self.engine.name, }, data={ 't': json.dumps( self.engine.get_tracking_information(self.root)), }, timeout=self.timeout, proxies=saq.PROXIES if self.use_proxy else {}, verify=saq.CA_CHAIN_PATH, stream=False) except Exception as e: logging.warning("cloudphish request failed: {}".format(e)) analysis.result = RESULT_ERROR analysis.result_details = 'REQUEST FAILED ({})'.format(e) return False if response.status_code != 200: logging.error( "cloudphish returned status {} for {} - {}".format( response.status_code, url.value, response.reason)) analysis.result = RESULT_ERROR analysis.result_details = 'REQUEST FAILED ({}:{})'.format( response.status_code, response.reason) return False # check the results first # if the analysis isn't ready yet then we come back later json_result = response.json() if json_result[KEY_RESULT] == RESULT_OK: if json_result[KEY_STATUS] == STATUS_ANALYZING or json_result[ KEY_STATUS] == STATUS_NEW: # deal with the possibility that cloudphish messed up if json_result[KEY_ANALYSIS_RESULT] != SCAN_RESULT_ALERT: # has the clock expired? if int(time.time() ) - analysis.query_start > self.query_timeout: logging.warning( "cloudphish query for {} has timed out".format( url.value)) analysis.result = RESULT_ERROR analysis.result_details = 'QUERY TIMED OUT' return False # otherwise we delay analysis logging.info( "waiting for cloudphish analysis of {} ({})". format(url.value, json_result[KEY_STATUS])) return self.delay_analysis(url, analysis, seconds=self.frequency) # cache the analysis results if we didn't load it from cache while True: if not os.path.isdir(cache_dir): try: os.mkdir(cache_dir) except Exception as e: logging.error( "unable to create directory {}: {}".format( cache_dir, e)) report_exception() break cache_path = os.path.join(cache_dir, sha256_url) if os.path.exists(cache_path): logging.debug( "cloudphish cache entry {} already exists".format( cache_path)) #break try: logging.debug( "saving cloudphish cache entry {} for {}".format( cache_path, url.value)) with open(cache_path, 'wb') as fp: fp.write(response.content) except Exception as e: logging.error( "unable to save cloudphish cache entry for {} at {}: {}" .format(url.value, cache_path, e)) report_exception() cache_path = None break break # save the analysis results analysis.query_result = json_result # sha256 E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855 is the hash for the empty string # we ignore this case if analysis.sha256_content and analysis.sha256_content.upper( ) == 'E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855': logging.debug("ignoring result of 0 length data for {}".format( url.value)) return False # what did cloudphish see? if analysis.analysis_result == SCAN_RESULT_ALERT: temp_dir = None try: # create a temporary directory to load the alert into temp_dir = tempfile.mkdtemp( prefix='cloudphish_', dir=os.path.join(saq.SAQ_HOME, saq.CONFIG['global']['tmp_dir'])) # is the alert cached? if os.path.exists(alert_cache_path): logging.debug("using alert cache {} for url {}".format( alert_cache_path, url.value)) p = Popen(['tar', 'zxf', alert_cache_path, '-C', temp_dir], stdout=PIPE, stderr=PIPE) else: # grab the alert it created logging.info("downloading alert info for {}".format( url.value)) response = requests.request( 'GET', self.get_download_alert_url(), params={'s': analysis.sha256_content}, timeout=self.timeout, proxies=saq.PROXIES if self.use_proxy else {}, verify=saq.CA_CHAIN_PATH, stream=True) p = Popen(['tar', 'zxf', '-', '-C', temp_dir], stdin=PIPE, stdout=PIPE, stderr=PIPE) alert_cache_fp = None try: alert_cache_fp = open(alert_cache_path, 'wb') except Exception as e: logging.error( "unable to cache alert data for {} at {}: {}". format(url.value, alert_cache_path, e)) report_exception() for chunk in response.iter_content(chunk_size=None): if alert_cache_fp: try: alert_cache_fp.write(chunk) except Exception as e: logging.error( "error writing data to cache alert data for {} at {}: {}" .format(url.value, alert_cache_path, e)) report_exception() try: alert_cache_fp.close() except: pass finally: alert_cache_fp = None p.stdin.write(chunk) if alert_cache_fp: try: alert_cache_fp.close() except: pass stdout, stderr = p.communicate() if stderr: logging.warning( "tar produced output on stderr for {}: {}".format( url.value, stderr)) # load the new alert cloudphish_alert = RootAnalysis() cloudphish_alert.storage_dir = temp_dir try: cloudphish_alert.load() except Exception as e: logging.warning( "unable to load cloudphish alert for {}: {}".format( url.value, e)) # XXX there is a reason for this but I forget what it was # merge this alert into the analysis for this url self.root.merge(analysis, cloudphish_alert) finally: # make sure we clean up these temp directories try: if temp_dir: shutil.rmtree(temp_dir) except Exception as e: logging.error("unable to delete directory {}: {}".format( temp_dir, e)) report_exception() # are we forcing the download of the URL? elif url.has_directive( DIRECTIVE_FORCE_DOWNLOAD) and analysis.file_name: target_file = os.path.join(self.root.storage_dir, analysis.file_name) if os.path.exists(target_file): logging.warning("target file {} exists".format(target_file)) return try: logging.info("downloading file {} from {}".format( target_file, url.value)) response = requests.request( 'GET', self.get_download_url(), params={'s': analysis.sha256_content}, timeout=self.timeout, proxies=saq.PROXIES if self.use_proxy else {}, verify=saq.CA_CHAIN_PATH, stream=True) with open(target_file, 'wb') as fp: for chunk in response.iter_content( chunk_size=io.DEFAULT_BUFFER_SIZE): if chunk: fp.write(chunk) analysis.add_observable( F_FILE, os.path.relpath(target_file, start=self.root.storage_dir)) except Exception as e: logging.error( "unable to download file {} for url {} from cloudphish: {}" .format(target_file, url.value, e)) report_exception() return True
def process(self, binary_path): logging.debug("processing {0}".format(binary_path)) analysis_start_time = datetime.datetime.now() # load the JSON acquired from Carbon Black try: with open('{0}.json'.format(binary_path), 'r') as fp: binary_json = json.load(fp) except Exception as e: logging.error( "unable to parse JSON from Carbon Black for {}: {}".format( binary_path, str(e))) return # we have to copy the file into the new storage directory for it to be analyzed # we use the file name that Carbon Black saw on the endpoint try: file_name = binary_json['observed_filename'][-1] except Exception as e: logging.error( "cannot determine file name for {}".format(binary_path)) file_name = 'unknown' # we need to figure out if this is a path from a Windows machine or a Unix machine # so we count the number of backslashes and forward slashes # it's a hack but it should work 99.9% of the time if file_name.count('\\') > file_name.count('/'): logging.debug("{0} appears to be a windows path".format(file_name)) file_name = ntpath.basename(file_name) else: logging.debug("{0} appears to be a unix path".format(file_name)) file_name = os.path.basename(file_name) # figure out when this binary arrived to the carbon black server # some times the time does not have the .%fZ at the end for some reason time_stamp_format = "%Y-%m-%dT%H:%M:%SZ" if '.' in binary_json['server_added_timestamp']: time_stamp_format = "%Y-%m-%dT%H:%M:%S.%fZ" event_time = datetime.datetime.strptime( binary_json['server_added_timestamp'], time_stamp_format).replace(tzinfo=pytz.utc) event_time = pytz.timezone('US/Eastern').normalize(event_time) # create the root analysis object root = RootAnalysis() # set all of the properties individually # XXX fix me # it looks like the construction logic doesn't quite work here # when loading from the arguments to the constructor, the internal # variables with leading underscores get set rather than the properties # representing the database columns it was designed that way to allow the # JSON stuff to work correctly, so I'll need to revisit that later root.tool = 'ACE - Carbon Black Binary Analysis' root.tool_instance = socket.gethostname() root.alert_type = 'carbon_black_binary' root.description = 'Carbon Black binary {0}'.format(file_name) root.event_time = event_time root.details = binary_json # XXX database.Alert does not automatically create this root.uuid = str(uuid.uuid4()) # we use a temporary directory while we process the file root.storage_dir = os.path.join(self.work_dir, root.uuid[0:3], root.uuid) root.initialize_storage() logging.debug("using storage directory {0} for {1}".format( root.storage_dir, binary_path)) dest_path = os.path.join(root.storage_dir, file_name) try: shutil.copy(binary_path, dest_path) except Exception as e: logging.error("unable to copy {0} to {1}: {2}".format( binary_path, dest_path, str(e))) report_exception() return # note that the path is relative to the storage directory root.add_observable(F_FILE, file_name) # the endpoints are stored as an array of host names optionally appended with a pipe and count # I assume the number of times that executable has executed on that host? for endpoint in binary_json['endpoint']: if '|' in endpoint: endpoint = endpoint[:endpoint.index('|')] root.add_observable(F_HOSTNAME, endpoint) for file_path in binary_json['observed_filename']: root.add_observable(F_FILE_PATH, file_path) # now analyze the file try: self.analyze(root) except Exception as e: logging.error("analysis failed for {0}: {1}".format( binary_path, str(e))) report_exception() logging.info("completed {0} analysis time {1}".format( binary_path, datetime.datetime.now() - analysis_start_time))
def execute_analysis(self, url): # don't run cloudphish on cloudphish alerts if self.root.alert_type == ANALYSIS_TYPE_CLOUDPHISH: return False # we keep track of what URLs we've given to cloudphish to process if self.state is None: self.state = {} self.state['requests'] = {} analysis = url.get_analysis(CloudphishAnalysis) if analysis is None: try: if len(self.state['requests'] ) >= self.cloudphish_request_limit: logging.info( f"skipping cloudphis analysis for {url.value} reached cloudphish limit for {self.root}" ) return False # do basic URL sanity checks parsed_url = urlparse(url.value) #if parsed_url.hostname and '.' not in parsed_url.hostname: #logging.debug("ignoring invalid FQDN {} in url {}".format(parsed_url.hostname, url.value)) #return False # only analyze http, https and ftp schemes if parsed_url.scheme not in ['http', 'https', 'ftp']: logging.debug( "{} is not a supported scheme for cloudphish".format( parsed_url.scheme)) return False # URL seems ok analysis = self.create_analysis(url) except Exception as e: logging.debug("possible invalid URL: {}: {}".format( url.value, e)) return False # start the clock XXX isn't this built-in to the delay analysis system? if analysis.query_start is None: analysis.query_start = int(time.time()) #else: ## or has the clock expired? #if int(time.time()) - analysis.query_start > self.query_timeout: #logging.warning("cloudphish query for {} has timed out".format(url.value)) #analysis.result = RESULT_ERROR #analysis.result_details = 'QUERY TIMED OUT' #return # do we have a local cache result for this url? sha256_url = hash_url(url.value) json_result = None # once we decide on a cloudphish server to use we need to keep using the same one # for the same url if 'cloudphish_server' in self.state: cloudphish_server = self.state['cloudphish_server'] else: cloudphish_server = self.get_cloudphish_server() self.state['cloudphish_server'] = cloudphish_server logging.debug("making cloudphish query against {} for {}".format( cloudphish_server, url.value)) try: context = { 'c': self.root.uuid, # context 't': None, # tracking (see below) } tracking = [] for o in self.root.all_observables: if o.has_directive(DIRECTIVE_TRACKED): tracking.append({ 'type': o.type, 'value': o.value, 'time': None if o.time is None else o.time.strftime(event_time_format_json_tz) }) context['t'] = json.dumps(tracking, cls=_JSONEncoder) response = ace_api.cloudphish_submit( url.value, context=context, remote_host=cloudphish_server, ssl_verification=saq.CA_CHAIN_PATH, proxies=saq.PROXIES if self.use_proxy else None, timeout=self.timeout) logging.debug( "got result {} for cloudphish query @ {} for {}".format( response, cloudphish_server, url.value)) except Exception as e: logging.warning("cloudphish request failed: {}".format(e)) analysis.result = RESULT_ERROR analysis.result_details = 'REQUEST FAILED ({})'.format(e) return True # check the results first # if the analysis isn't ready yet then we come back later if response[KEY_RESULT] == RESULT_OK: if response[KEY_STATUS] == STATUS_ANALYZING or response[ KEY_STATUS] == STATUS_NEW: # keep track of the requests that resulted in work for ACE self.state['requests'][url.value] = True # otherwise we delay analysis logging.info( "waiting for cloudphish analysis of {} ({})".format( url.value, response[KEY_STATUS])) if not self.delay_analysis(url, analysis, seconds=self.frequency, timeout_seconds=self.query_timeout): # analysis timed out analysis.result = RESULT_ERROR analysis.result_details = 'QUERY TIMED OUT' return True # sha256 E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855 is the hash for the empty string # we ignore this case if response[KEY_SHA256_CONTENT] and response[KEY_SHA256_CONTENT].upper() == \ 'E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855': logging.debug("ignoring result of 0 length data for {}".format( url.value)) analysis.result = RESULT_ERROR analysis.result_details = 'EMPTY CONTENT' return True # save the analysis results analysis.query_result = response # did cloudphish generate an alert? if analysis.analysis_result == SCAN_RESULT_ALERT: # if cloudphish generated an alert then we'll need to wait for the alert correlation to finish # TODO temp_dir = None try: # create a temporary directory to load the alert into temp_dir = tempfile.mkdtemp(prefix='cloudphish_', dir=saq.TEMP_DIR) # grab the alert it created logging.info("downloading alert info for {}".format(url.value)) ace_api.download( analysis.uuid, temp_dir, remote_host=cloudphish_server, ssl_verification=saq.CA_CHAIN_PATH, proxies=saq.PROXIES if self.use_proxy else None, timeout=self.timeout) #response = requests.request('GET', self.get_download_alert_url(), #params={ 's': analysis.sha256_content }, #timeout=self.timeout, #proxies=saq.PROXIES if self.use_proxy else {}, #verify=saq.CA_CHAIN_PATH, #stream=True) # load the new alert cloudphish_alert = RootAnalysis() cloudphish_alert.storage_dir = temp_dir try: cloudphish_alert.load() except Exception as e: logging.warning( "unable to load cloudphish alert for {}: {}".format( url.value, e)) # XXX there is a reason for this but I forget what it was lol # merge this alert into the analysis for this url self.root.merge(analysis, cloudphish_alert) finally: # make sure we clean up these temp directories try: if temp_dir: shutil.rmtree(temp_dir) except Exception as e: logging.error("unable to delete directory {}: {}".format( temp_dir, e)) report_exception() # are we forcing the download of the URL? elif url.has_directive( DIRECTIVE_FORCE_DOWNLOAD) and analysis.file_name: # TODO fix this file naming scheme target_file = os.path.join(self.root.storage_dir, analysis.file_name) if os.path.exists(target_file): logging.warning("target file {} exists".format(target_file)) return True try: logging.info("downloading file {} from {}".format( target_file, url.value)) ace_api.cloudphish_download( url=url.value, output_path=target_file, remote_host=cloudphish_server, ssl_verification=saq.CA_CHAIN_PATH, proxies=saq.PROXIES if self.use_proxy else None, timeout=self.timeout) #response = requests.request('GET', self.get_download_url(), #params={ 's': analysis.sha256_content }, #timeout=self.timeout, #proxies=saq.PROXIES if self.use_proxy else {}, #verify=saq.CA_CHAIN_PATH, #stream=True) #with open(target_file, 'wb') as fp: #for chunk in response.iter_content(chunk_size=io.DEFAULT_BUFFER_SIZE): #if chunk: #fp.write(chunk) analysis.add_observable( F_FILE, os.path.relpath(target_file, start=self.root.storage_dir)) except Exception as e: logging.error( "unable to download file {} for url {} from cloudphish: {}" .format(target_file, url.value, e)) report_exception() return True
def post_smtp_analysis(self, root): from saq.modules.email import EmailAnalysis, SMTPStreamAnalysis, \ BrotexSMTPPackageAnalysis, \ KEY_ENVELOPES_MAIL_FROM, KEY_ENVELOPES_RCPT_TO # get the paths to the email scanning system #email_scanner_dir = saq.CONFIG['engine_email_scanner']['collection_dir'] email_scanner_dir = self.collection_dir # create a new analysis root for each email analysis we found for analysis in root.all_analysis: if not isinstance(analysis, EmailAnalysis) or not analysis.email: continue env_mail_from = None env_rcpt_to = None connection_id = None # the observable for this EmailAnalysis will be a file email_file = analysis.observable if email_file.type != F_FILE: logging.warning( "the observable for {} should be F_FILE but it is {}". format(analysis, email_file.type)) else: # this will be either an rfc822 file generated by the SMTPStreamAnalysis module # (which will have the envelope information) # OR it is a "broken stream" file, which does not stream_analysis = [ a for a in root.all_analysis if isinstance(a, SMTPStreamAnalysis) and email_file in a.observables ] if len(stream_analysis) > 1: logging.error("there should not be more than one of these") elif len(stream_analysis) == 1: stream_analysis = stream_analysis[0] logging.debug( "detected stream analysis for {}".format(email_file)) # get the MAIL FROM and RCPT TO from this if not analysis.env_mail_from: if email_file.value in stream_analysis.envelopes: analysis.env_mail_from = stream_analysis.envelopes[ email_file.value][KEY_ENVELOPES_MAIL_FROM] if not analysis.env_rcpt_to: if email_file.value in stream_analysis.envelopes: analysis.env_rcpt_to = stream_analysis.envelopes[ email_file.value][KEY_ENVELOPES_RCPT_TO] # get the original brotex package file that the stream came from stream_package = stream_analysis.observable # get the BrotexSMTPPackageAnalysis for this stream package so we can get the connection id package_analysis = [ a for a in root.all_analysis if isinstance(a, BrotexSMTPPackageAnalysis) and stream_package in a.observables ] if len(package_analysis) > 1: logging.error( "there should not be more than one of these!") elif len(package_analysis) == 1: package_analysis = package_analysis[0] connection_id = package_analysis.connection_id # if we could not find the stream, we will want to find the brotex smtp package so we can have the connection id package_analysis = [ a for a in root.all_analysis if isinstance(a, BrotexSMTPPackageAnalysis) and email_file in a.observables ] if len(package_analysis) > 1: logging.error( "there should not be more than one of these!") elif len(package_analysis) == 1: package_analysis = package_analysis[0] connection_id = package_analysis.connection_id subroot = RootAnalysis() subroot.company_name = root.company_name subroot.tool = root.tool subroot.tool_instance = root.tool_instance subroot.alert_type = root.alert_type subroot.description = 'Brotex SMTP Stream Detection - ' if analysis.decoded_subject: subroot.description += '{} '.format(analysis.decoded_subject) elif analysis.subject: subroot.description += '{} '.format(analysis.subject) else: subroot.description += '(no subject) ' if analysis.env_mail_from: subroot.description += 'From {} '.format( normalize_email_address(analysis.env_mail_from)) elif analysis.mail_from: subroot.description += 'From {} '.format( normalize_email_address(analysis.mail_from)) if analysis.env_rcpt_to: if len(analysis.env_rcpt_to) == 1: subroot.description += 'To {} '.format( analysis.env_rcpt_to[0]) else: subroot.description += 'To ({} recipients) '.format( len(analysis.env_rcpt_to)) elif analysis.mail_to: if isinstance(analysis.mail_to, list): # XXX I think this *has* to be a list if len(analysis.mail_to) == 1: subroot.description += 'To {} '.format( analysis.mail_to[0]) else: subroot.description += 'To ({} recipients) '.format( len(analysis.mail_to)) else: subroot.description += 'To {} '.format( analysis.mail_to) subroot.event_time = root.event_time subroot.details = analysis.details subroot.details['connection_id'] = connection_id subroot.uuid = str(uuid.uuid4()) # we use a temporary directory while we process the file subroot.storage_dir = os.path.join(email_scanner_dir, subroot.uuid[0:3], subroot.uuid) subroot.initialize_storage() # copy the original file src_path = os.path.join(root.storage_dir, analysis.observable.value) dest_path = os.path.join(subroot.storage_dir, analysis.observable.value) subroot.add_observable( F_FILE, os.path.relpath(dest_path, start=subroot.storage_dir)) # so the EmailAnalysis that will trigger on the RFC822 file (or whatever you have) # will *not* have the envelope headers # so we do that here in the main alert env_mail_from = None if analysis.env_mail_from: # this is to handle this: <*****@*****.**> SIZE=80280 # XXX assuming there can be no spaces in an email address env_mail_from = analysis.env_mail_from.split(' ', 1) env_mail_from = env_mail_from[0] # is this not the empty indicator? if env_mail_from != '<>': env_mail_from = normalize_email_address(env_mail_from) subroot.add_observable(F_EMAIL_ADDRESS, env_mail_from) if analysis.env_rcpt_to: for address in analysis.env_rcpt_to: address = normalize_email_address(address) if address: subroot.add_observable(F_EMAIL_ADDRESS, address) if env_mail_from: subroot.add_observable( F_EMAIL_CONVERSATION, create_email_conversation( env_mail_from, address)) try: subroot.save() except Exception as e: logging.error("unable to save {}: {}".format(alert, e)) report_exception() continue # TODO also add the stream and update any envelopment headers and stuff try: logging.debug("copying {} to {}".format(src_path, dest_path)) shutil.copy(src_path, dest_path) except Exception as e: logging.error("unable to copy {} to {}: {}".format( src_path, dest_path, e)) report_exception() continue # submit the path to the database of the email scanner for analysis try: submit_sql_work_item('EMAIL', subroot.storage_dir) except Exception as e: logging.error("unable to add work item: {}".format(e)) report_exception() continue
def post_http_analysis(self, root): from saq.modules.http import BrotexHTTPPackageAnalysis, \ KEY_TIME, \ KEY_SRC_IP, \ KEY_SRC_PORT, \ KEY_DEST_IP, \ KEY_DEST_PORT, \ KEY_METHOD, \ KEY_HOST, \ KEY_URI, \ KEY_REFERRER, \ KEY_USER_AGENT, \ KEY_STATUS_CODE, \ KEY_FILES # get the paths to the http scanning system #http_scanner_dir = saq.CONFIG['engine_http_scanner']['collection_dir'] http_scanner_dir = self.collection_dir analysis = None for a in root.all_analysis: if isinstance(a, BrotexHTTPPackageAnalysis) and a.requests: analysis = a break # this can happen if the request was whitelisted if analysis: for request in analysis.requests: subroot = RootAnalysis() subroot.company_name = root.company_name subroot.tool = root.tool subroot.tool_instance = root.tool_instance subroot.alert_type = root.alert_type subroot.description = "Brotex HTTP Stream Detection - " if request[KEY_HOST]: subroot.description += " {} ".format(request[KEY_HOST]) if request[KEY_DEST_IP]: subroot.description += " ({}) ".format( request[KEY_DEST_IP]) if request[KEY_URI]: # don't want to show all the fragments and query params try: parts = urlparse(request[KEY_URI]) subroot.description += parts.path except Exception as e: logging.warning("unable to parse {}: {}".format( request[KEY_URI], e)) subroot.description += request[KEY_URI] subroot.event_time = root.event_time subroot.details = request subroot.uuid = str(uuid.uuid4()) # we use a temporary directory while we process the file subroot.storage_dir = os.path.join(http_scanner_dir, subroot.uuid[0:3], subroot.uuid) subroot.initialize_storage() if request[KEY_SRC_IP]: subroot.add_observable(F_IPV4, request[KEY_SRC_IP]) if request[KEY_DEST_IP]: subroot.add_observable(F_IPV4, request[KEY_DEST_IP]) if request[KEY_SRC_IP] and request[KEY_DEST_IP]: subroot.add_observable( F_IPV4_CONVERSATION, create_ipv4_conversation(request[KEY_SRC_IP], request[KEY_DEST_IP])) if request[KEY_HOST]: subroot.add_observable(F_FQDN, request[KEY_HOST]) if request[KEY_URI]: subroot.add_observable(F_URL, request[KEY_URI]) if request[KEY_REFERRER]: subroot.add_observable(F_URL, request[KEY_REFERRER]) for file_path in request[KEY_FILES]: src_path = os.path.join(root.storage_dir, file_path) dest_path = os.path.join(subroot.storage_dir, os.path.basename(file_path)) try: shutil.copy(src_path, dest_path) except Exception as e: logging.error("unable to copy {} to {}: {}".format( src_path, dest_path, e)) report_exception() subroot.add_observable( F_FILE, os.path.basename(file_path)) # already relative try: subroot.save() except Exception as e: logging.error("unable to save {}: {}".format(alert, e)) report_exception() continue # submit the path to the database of the email scanner for analysis try: submit_sql_work_item( 'HTTP', subroot.storage_dir) # XXX hard coded constant except: # failure is already logged inside the call continue
def process(self, process): logging.debug("processing json") analysis_start_time = datetime.datetime.now() try: file_path = process['cmdline'].split('"')[-2] except: logging.error("cannot determine file path for {}".format( process['cmdline'])) file_path = 'unknown' try: file_name = file_path.split('\\')[-1] except: logging.error( "cannot determine file name for {}".format(file_path)) file_name = 'unknown' # figure out when this binary arrived to the carbon black server # some times the time does not have the .%fZ at the end for some reason time_stamp_format = "%Y-%m-%dT%H:%M:%SZ" if '.' in process['start']: time_stamp_format = "%Y-%m-%dT%H:%M:%S.%fZ" event_time = datetime.datetime.strptime( process['start'], time_stamp_format).replace(tzinfo=pytz.utc) event_time = pytz.timezone('US/Eastern').normalize(event_time) # create the root analysis object root = RootAnalysis() # set all of the properties individually # XXX fix me # it looks like the construction logic doesn't quite work here # when loading from the arguments to the constructor, the internal # variables with leading underscores get set rather than the properties # representing the database columns it was designed that way to allow the # JSON stuff to work correctly, so I'll need to revisit that later root.tool = 'ACE - Carbon Black Internet Office File Analysis' root.tool_instance = socket.gethostname() root.alert_type = 'carbon_black_internet_office_file' root.description = 'Carbon Black Internet Office File {0}'.format( file_name) root.event_time = event_time root.details = process # XXX database.Alert does not automatically create this root.uuid = str(uuid.uuid4()) # we use a temporary directory while we process the file root.storage_dir = os.path.join(self.work_dir, root.uuid[0:3], root.uuid) root.initialize_storage() # note that the path is relative to the storage directory fl_observable = root.add_observable( F_FILE_LOCATION, create_file_location(process['hostname'], file_path)) if fl_observable: fl_observable.add_directive(DIRECTIVE_COLLECT_FILE) root.add_observable(F_FILE_PATH, file_path) root.add_observable(F_FILE_NAME, file_name) root.add_observable(F_HOSTNAME, process['hostname']) # now analyze the file try: self.analyze(root) except Exception as e: logging.error("analysis failed for {}: {}".format( process['id'], e)) report_exception() logging.info("completed {} analysis time {}".format( process['id'], datetime.datetime.now() - analysis_start_time))
def process(self, work_item): url, alertable, details = work_item # any other result means we should process it logging.info("processing url {} (alertable {})".format(url, alertable)) #logging.debug("details = {}".format(details)) sha256_url = hash_url(url) # create or update our analysis entry with get_db_connection('cloudphish') as db: c = db.cursor() c.execute( """UPDATE analysis_results SET status = %s WHERE sha256_url = UNHEX(%s)""", (STATUS_ANALYZING, sha256_url)) db.commit() root = RootAnalysis() # create a temporary storage directory for this work root.tool = 'ACE - Cloudphish' root.tool_instance = self.location root.alert_type = 'cloudphish' root.description = 'ACE Cloudphish Detection - {}'.format(url) root.event_time = datetime.datetime.now() root.uuid = str(uuid.uuid4()) root.storage_dir = os.path.join(self.work_dir, root.uuid[0:2], root.uuid) root.initialize_storage() if 'i' in details: root.company_name = details['i'] if 'd' in details: root.company_id = details['d'] root.details = { KEY_DETAILS_URL: url, KEY_DETAILS_SHA256_URL: sha256_url, KEY_DETAILS_ALERTABLE: alertable, KEY_DETAILS_CONTEXT: details, } url_observable = root.add_observable(F_URL, url) if url_observable is None: logging.error("request for invalid url received: {}".format(url)) return url_observable.add_directive(DIRECTIVE_CRAWL) # the "details context" can also contain observables for key in root.details[KEY_DETAILS_CONTEXT].keys(): if key in VALID_OBSERVABLE_TYPES: root.add_observable(key, root.details[KEY_DETAILS_CONTEXT][key]) try: self.analyze(root) except Exception as e: logging.error("analysis failed for {}: {}".format(url, e)) report_exception() with get_db_connection('cloudphish') as db: c = db.cursor() c.execute( """UPDATE analysis_results SET result = %s, status = %s, http_result_code = NULL, http_message = NULL, sha256_content = NULL WHERE sha256_url = UNHEX(%s)""", (SCAN_RESULT_ERROR, STATUS_ANALYZED, sha256_url)) db.commit() return