def setUp(self, *args, **kwargs): super().setUp(*args, **kwargs) # make sure we have a connection to carbon black import cbapi_legacy as cbapi cb_url = saq.CONFIG['carbon_black']['url'] cb_token = saq.CONFIG['carbon_black']['token'] cb = cbapi.CbApi(cb_url, token=cb_token, ssl_verify=False) # XXX <-- get rid of that try: info = cb.info() except Exception as e: self.skipTest("carbon black not available at {}".format(cb_url))
def collect_binaries(self): # get the list of hashes available to download in the past X minutes cb = cbapi.CbApi(self.cb_url, token=self.cb_token, ssl_verify=False) # XXX <-- get rid of that # do we need a new query to execute? if self.current_query is None: # build the time range for the carbon black query if self.last_search_time is not None: # have we already searched at least one time? time_range = 'server_added_timestamp:[{} TO *]'.format( self.last_search_time.strftime('%Y-%m-%dT%H:%M:%S')) #(datetime.datetime.utcnow() - datetime.timedelta(minutes=self.search_offset)).strftime('%Y-%m-%dT%H:%M:%S')) elif self.initial_search_offset == 0: time_range = '' # get EVERYTHING available (useful when running this entire system for the first time or to get caught up) else: # first time running, go back N hours time_range = 'server_added_timestamp:[{} TO *]'.format( (datetime.datetime.utcnow() - datetime.timedelta(hours=self.initial_search_offset) ).strftime('%Y-%m-%dT%H:%M:%S')) self.current_query = 'is_executable_image:true -digsig_result:Signed {}'.format( time_range) self.current_index = 0 try: json_result = cb.binary_search(self.current_query, start=self.current_index, rows=self.download_batch_size, sort='server_added_timestamp asc') # if we're executing a new query for the first time # then remember how many binaries we need to get if self.current_result_count is None: self.current_result_count = json_result['total_results'] except requests.exceptions.HTTPError as e: logging.error( "carbon black server returned an error: {}".format(e)) return except Exception as e: logging.error( "communication error with carbon black server: {}".format(e)) return logging.info( "requested binary data from {} at index {} result count {} with query {}" .format(self.cb_url, self.current_index, self.current_result_count, self.current_query)) if len(json_result['results']) < 1: logging.debug("got no more results from search") self.current_query = None self.current_index = None self.current_result_count = None return for binary in json_result['results']: if self.shutdown_event.is_set(): return # move to the next set of items after processing these self.current_index += 1 # figure out when this binary arrived to the carbon black server # some times the time does not have the .%fZ at the end for some reason time_stamp_format = "%Y-%m-%dT%H:%M:%SZ" if '.' in binary['server_added_timestamp']: time_stamp_format = "%Y-%m-%dT%H:%M:%S.%fZ" event_time = datetime.datetime.strptime( binary['server_added_timestamp'], time_stamp_format).replace(tzinfo=pytz.utc) # this also becomes our new starting point next time we search if self.last_search_time is None or event_time > self.last_search_time: # we move one second past the last time we saw something added self.last_search_time = event_time + datetime.timedelta( seconds=1) binary_dir = os.path.join(self.storage_dir, binary['md5'][0:2]) binary_path = os.path.join(binary_dir, binary['md5']) binary_zip_path = '{}.zip'.format(binary_path) binary_json_path = '{}.json'.format(binary_path) submit_path = '{}.submit'.format(binary_path) # have we already submitted this one for analysis? if os.path.exists(submit_path): logging.debug("already submitted {}".format(binary['md5'])) continue # have we already downloaded this md5? if os.path.exists(binary_path): logging.debug("already have binary {} at {}".format( binary['md5'], binary_path)) continue else: # go get it from Carbon Black if not os.path.isdir(binary_dir): try: os.makedirs(binary_dir) except Exception as e: logging.error( "unable to create directory {}: {}".format( binary_dir, e)) continue logging.info("downloading {}".format(binary['md5'])) try: # XXX see if you can do this without pulling the entire binary into memory binary_content = cb.binary(binary['md5']) except Exception as e: logging.info("unable to download {}: {}".format( binary['md5'], e)) continue if len(binary_content) == 0: logging.warning( "got 0 bytes for {}".format(binary_zip_path)) continue with open(binary_zip_path, 'wb') as fp: try: fp.write(binary_content) except Exception as e: logging.error("unable to write to {}: {}".format( binary_zip_path, e)) # also save the json that came with the file with open(binary_json_path, 'w') as fp: json.dump(binary, fp, indent=4) # extract the file with zipfile.ZipFile(binary_zip_path) as zip_fp: with zip_fp.open('filedata') as unzipped_fp: with open(binary_path, 'wb') as fp: fp.write(unzipped_fp.read()) # delete the zip file os.remove(binary_zip_path) logging.debug("downloaded {}".format(binary_path)) # we have to copy the file into the new storage directory for it to be analyzed # we use the file name that Carbon Black saw on the endpoint try: file_name = binary['observed_filename'][-1] except Exception as e: logging.error( "cannot determine file name for {}".format(binary_path)) file_name = 'unknown' # we need to figure out if this is a path from a Windows machine or a Unix machine # so we count the number of backslashes and forward slashes # it's a hack but it should work 99.9% of the time if file_name.count('\\') > file_name.count('/'): logging.debug( "{} appears to be a windows path".format(file_name)) file_name = ntpath.basename(file_name) else: logging.debug("{} appears to be a unix path".format(file_name)) file_name = os.path.basename(file_name) observables = [] for endpoint in binary['endpoint']: if '|' in endpoint: endpoint = endpoint[:endpoint.index('|')] observables.append({'type': F_HOSTNAME, 'value': endpoint}) for file_path in binary['observed_filename']: observables.append({'type': F_FILE_PATH, 'value': file_path}) # create a new submission request for this self.work_list.append( CarbonBlackBinarySubmission( description='Carbon Black binary {}'.format(file_name), analysis_mode=ANALYSIS_MODE_BINARY, tool='ACE - Carbon Black Binary Analysis', tool_instance=self.fqdn, type='carbon_black_binary', event_time=event_time, details=binary, observables=[], tags=[], files=[binary_path]))
def collect(self): # get the list of hashes available to download in the past X minutes # TODO past X minutes cb = cbapi.CbApi(self.config['url'], ssl_verify=False, token=self.config['token']) total_results = None # how far back do we look? # normally we look back over some period of time for any new binaries that were uploaded if self.last_search_time is not None: # have we already searched at least one time? # NOTE remember to use UTC time here self.time_range = 'server_added_timestamp:[{0} TO *]'.format(( datetime.datetime.utcnow() - datetime.timedelta(minutes=self.config.getint('search_offset')) ).strftime('%Y-%m-%dT%H:%M:%S')) elif self.config.getint('initial_search_offset') == 0: self.time_range = '' # get EVERYTHING available (useful when running this entire system for the first time or to get caught up) else: # first time running, go back N hours self.time_range = 'server_added_timestamp:[{0} TO *]'.format( (datetime.datetime.utcnow() - datetime.timedelta( hours=self.config.getint('initial_search_offset')) ).strftime('%Y-%m-%dT%H:%M:%S')) # remember the last time we searched # this was used to determine the next time range # now it's just a marker that at least one search was performed self.last_search_time = datetime.datetime.utcnow() while not self.shutdown: query = 'is_executable_image:true -digsig_result:Signed {}'.format( self.time_range) try: json_result = cb.binary_search(query, start=self.index, rows=self.increment) except requests.exceptions.HTTPError as e: logging.error( "carbon black server returned an error: {}".format(e)) return except Exception as e: logging.error( "communication error with carbon black server: {}".format( e)) #report_exception() return logging.info( "requested binary data from {0} index {1} of {2} with query {3}" .format(self.config['url'], self.index, json_result['total_results'], query)) self.index += self.increment if len(json_result['results']) < 1: logging.debug("got no more results from search") # then we reset and use a new time range next time self.index = 0 self.time_range = None return for binary in json_result['results']: if self.shutdown: return binary_dir = os.path.join( os.path.join(self.config['storage_dir'], binary['md5'][0:2])) binary_path = os.path.join(self.config['storage_dir'], binary['md5'][0:2], binary['md5']) binary_zip_path = os.path.join(self.config['storage_dir'], binary['md5'][0:2], '{0}.zip'.format(binary['md5'])) binary_json_path = '{0}.json'.format(binary_path) # have we already downloaded this md5? if os.path.exists(binary_path): logging.debug("already have binary {0} at {1}".format( binary['md5'], binary_path)) else: # go get it from Carbon Black if not os.path.isdir(binary_dir): os.makedirs(binary_dir) logging.info("downloading {0}".format(binary['md5'])) with open(binary_zip_path, 'wb') as fp: try: fp.write(cb.binary(binary['md5'])) except Exception as e: logging.warning( "unable to download {0}: {1}".format( binary['md5'], str(e))) continue # also save the json that came with the file with open(binary_json_path, 'w') as fp: json.dump(binary, fp, indent=4) # extract the file with ZipFile(binary_zip_path) as zip_fp: with zip_fp.open('filedata') as unzipped_fp: with open(binary_path, 'wb') as fp: fp.write(unzipped_fp.read()) # delete the zip file os.remove(binary_zip_path) logging.debug("downloaded {0}".format(binary_path)) # add this file to the work queue while not self.shutdown: try: self.work_queue.put(binary_path, block=True, timeout=1) break except Full: logging.debug("work queue is full...") # in SINGLE_THREADED mode we only loop through once if saq.SINGLE_THREADED: return
def collect(self): cb = cbapi.CbApi(self.config['url'], ssl_verify=False, token=self.config['token']) total_results = None # how far back do we look? # normally we look back over some period of time for any new binaries that were uploaded if self.last_search_time is not None: # have we already searched at least one time? # NOTE remember to use UTC time here self.time_range = 'server_added_timestamp:[{0} TO *]'.format(( datetime.datetime.utcnow() - datetime.timedelta(minutes=self.config.getint('search_offset')) ).strftime('%Y-%m-%dT%H:%M:%S')) elif self.config.getint('initial_search_offset') == 0: self.time_range = '' # get EVERYTHING available (useful when running this entire system for the first time or to get caught up) else: # first time running, go back N hours self.time_range = 'server_added_timestamp:[{0} TO *]'.format( (datetime.datetime.utcnow() - datetime.timedelta( hours=self.config.getint('initial_search_offset')) ).strftime('%Y-%m-%dT%H:%M:%S')) # remember the last time we searched # this was used to determine the next time range # now it's just a marker that at least one search was performed self.last_search_time = datetime.datetime.utcnow() while not self.shutdown: query = 'path:microsoft\\ office* ' query += '-process_name:Moc.exe ' query += '-process_name:xlview.exe ' query += '-hostname:PC* ' query += '-hostname:NAKYLEXRDA* ' query += 'username:ASHLAND username:i50* ' query += 'cmdline:AppData\\Local\\Microsoft\\Windows\\Temporary\ Internet\ Files\\Content.IE5 ' query += self.time_range try: logging.info("searching {} for {} starting at {}".format( self.config['url'], query, self.index)) json_result = cb.process_search(query, start=self.index, rows=self.increment) except requests.exceptions.HTTPError as e: logging.error( "carbon black server returned an error: {}".format(e)) return except Exception as e: logging.error( "communication error with carbon black server: {}".format( e)) #report_exception() return self.index += self.increment if len(json_result['results']) < 1: logging.debug("got no more results from search") # then we reset and use a new time range next time self.index = 0 self.time_range = None return for process in json_result['results']: if self.shutdown: return # have we already downloaded this file? logging.debug("checking for {}".format(process['id'])) with shelve.open(self.tracking_db) as db: if process['id'] in db: logging.debug("already downloaded {}".format( process['id'])) continue # add this process json to the work queue while not self.shutdown: try: self.work_queue.put(process, block=True, timeout=1) break except Full: logging.debug("work queue is full...") # in SINGLE_THREADED mode we only loop through once if saq.SINGLE_THREADED: return