def add_httplog(fqdn, db, c): for fqdn_part in iterate_fqdn_parts(fqdn): execute_with_retry(db, c, """ INSERT INTO httplog ( host, numconnections, firstconnectdate ) VALUES ( LOWER(%s), 1, UNIX_TIMESTAMP(NOW()) ) ON DUPLICATE KEY UPDATE numconnections = numconnections + 1""", ( fqdn_part, )) db.commit()
def execute_workload_cleanup(self, db, c): # look up all the work that is currently completed # a completed work item has no entries in the work_distribution table with a status of 'READY' c.execute( """ SELECT i.id, i.work FROM incoming_workload i JOIN work_distribution w ON i.id = w.work_id JOIN incoming_workload_type t ON i.type_id = t.id WHERE t.id = %s GROUP BY i.id, i.work HAVING SUM(IF(w.status = 'READY', 1, 0)) = 0""", (self.workload_type_id, )) submission_count = 0 for work_id, submission_blob in c: submission_count += 1 logging.debug(f"completed work item {work_id}") submission = None try: submission = pickle.loads(submission_blob) except Exception as e: logging.error( f"unable to un-pickle submission blob for id {work_id}: {e}" ) # clear any files that back the submission if submission and submission.files: try: target_dir = os.path.join(self.incoming_dir, submission.uuid) shutil.rmtree(target_dir) logging.debug(f"deleted incoming dir {target_dir}") except Exception as e: logging.error( f"unable to delete directory {target_dir}: {e}") # we finally clear the database entry for this workload item execute_with_retry(db, c, "DELETE FROM incoming_workload WHERE id = %s", (work_id, ), commit=True) return submission_count
def _t2(): with get_db_connection() as db: c = db.cursor() lock_user0.wait(5) # acquire lock on user1 execute_with_retry( db, c, "UPDATE users SET email = 'user1@_t2' WHERE username = '******'" ) lock_user1.set() # this will block waiting for lock on user0 execute_with_retry( db, c, "UPDATE users SET email = 'user0@_t2' WHERE username = '******'" ) db.commit()
def schedule_submission(self, submission, db, c): # we don't really need to change the file paths that are stored in the Submission object # we just remember where we've moved them to (later) try: # add this as a workload item to the database queue work_id = execute_with_retry(db, c, self.insert_workload, (submission, ), commit=True) assert isinstance(work_id, int) logging.info( f"scheduled {submission.description} mode {submission.analysis_mode}" ) except Exception as e: # something went wrong -- delete our incoming directory if we created one target_dir = self.get_submission_target_dir(submission) if os.path.exists(target_dir): try: shutil.rmtree(target_dir) except Exception as e: logging.error("unable to delete directory {}: {}".format( target_dir, e)) raise e self.submission_count += 1
def test_execute_with_retry(self, db, c): # simple single statement transaction execute_with_retry(db, c, ['SELECT 1'], [tuple()]) db.commit() # multi statement transaction _uuid = str(uuid.uuid4()) _lock_uuid = str(uuid.uuid4()) execute_with_retry(db, c, [ 'INSERT INTO locks ( uuid, lock_time ) VALUES ( %s, NOW() )', 'UPDATE locks SET lock_uuid = %s WHERE uuid = %s', 'DELETE FROM locks WHERE uuid = %s', ], [ (_uuid, ), (_lock_uuid, _uuid), (_uuid, ), ]) db.commit()
def _t2(): _uuid = str(uuid.uuid4()) _lock_uuid = str(uuid.uuid4()) try: with get_db_connection() as db: c = db.cursor() execute_with_retry( db, c, "UPDATE locks SET lock_owner = 'whatever'") # wait for signal to continue time.sleep(2) execute_with_retry( db, c, "INSERT INTO locks ( uuid, lock_time ) VALUES ( %s, NOW() )", (_uuid, )) db.commit() except pymysql.err.OperationalError as e: if e.args[0] == 1213 or e.args[0] == 1205: deadlock_event.set()
def update_content_metadata(sha256_content, node, file_name, db, c): return execute_with_retry( db, c, """ INSERT INTO cloudphish_content_metadata ( sha256_content, node, name ) VALUES ( UNHEX(%s), %s, %s ) ON DUPLICATE KEY UPDATE node = %s, name = %s""", (sha256_content, node, file_name, node, file_name), commit=True)
def save_persistent_data(self, key_name, key_value=None): """Creates a new persistent key with the given value recorded. The key must not already exist.""" if key_value is not None: key_value = pickle.dumps(key_value) with get_db_connection() as db: c = db.cursor() execute_with_retry( db, c, """ INSERT INTO persistence ( source_id, uuid, value ) VALUES ( %s, %s, %s ) ON DUPLICATE KEY UPDATE last_update = CURRENT_TIMESTAMP""", (self.persistence_source.id, key_name, key_value), commit=True)
def test_execute_with_retry_commit(self): _uuid = str(uuid.uuid4()) _lock_uuid = str(uuid.uuid4()) disable_cached_db_connections() # simple insert statement with commit option with get_db_connection() as db: c = db.cursor() execute_with_retry( db, c, 'INSERT INTO locks ( uuid, lock_time ) VALUES ( %s, NOW() )', (_uuid, ), commit=True) # check it on another connection with get_db_connection() as db: c = db.cursor() c.execute("SELECT uuid FROM locks WHERE uuid = %s", (_uuid, )) self.assertIsNotNone(c.fetchone()) _uuid = str(uuid.uuid4()) _lock_uuid = str(uuid.uuid4()) # and then this one should fail since we did not commit it with get_db_connection() as db: c = db.cursor() execute_with_retry( db, c, 'INSERT INTO locks ( uuid, lock_time ) VALUES ( %s, NOW() )', (_uuid, ), commit=False) with get_db_connection() as db: c = db.cursor() c.execute("SELECT uuid FROM locks WHERE uuid = %s", (_uuid, )) self.assertIsNone(c.fetchone()) enable_cached_db_connections()
def clear_alert(): url, sha256_url = _get_url_and_hash() row_count = 0 with get_db_connection() as db: c = db.cursor() row_count = execute_with_retry(db, c, """UPDATE cloudphish_analysis_results SET result = 'CLEAR' WHERE sha256_url = UNHEX(%s)""", (sha256_url,), commit=True) logging.info("request to clear cloudphish alert for {} row_count {}".format(url if url else sha256_url, row_count)) response = make_response(json.dumps({'result': 'OK', 'row_count': row_count})) response.mime_type = 'application/json' response.headers['Access-Control-Allow-Origin'] = '*' return response, 200
def _create_analysis(url, reprocess, alertable, **kwargs): assert isinstance(url, str) assert isinstance(reprocess, bool) assert isinstance(alertable, bool) assert isinstance(kwargs, dict) sha256_url = hash_url(url) new_entry = False try: with get_db_connection('cloudphish') as db: c = db.cursor() execute_with_retry( c, """INSERT INTO analysis_results ( sha256_url ) VALUES ( UNHEX(%s) )""", (sha256_url, )) db.commit() new_entry = True except pymysql.err.IntegrityError as e: # timing issue -- created as we were getting ready to create # (<class 'pymysql.err.IntegrityError'>--(1062, "Duplicate entry if e.args[0] != 1062: raise e logging.debug("entry for {} already created".format(url)) with get_db_connection('cloudphish') as db: c = db.cursor() # if we didn't just create this then we update the status of the existing entry # we don't need to do this if we just created it because if reprocess or not new_entry: execute_with_retry( c, """UPDATE analysis_results SET status = %s WHERE sha256_url = UNHEX(%s)""", (STATUS_NEW, sha256_url)) try: execute_with_retry( c, """INSERT INTO workload ( sha256_url, url, alertable, details ) VALUES ( UNHEX(%s), %s, %s, %s )""", (sha256_url, url, alertable, pickle.dumps(kwargs))) except pymysql.err.IntegrityError as e: # timing issue -- created as we were getting ready to create # (<class 'pymysql.err.IntegrityError'>--(1062, "Duplicate entry if e.args[0] != 1062: raise e logging.debug("analysis request for {} already exists".format(url)) db.commit() return get_cached_analysis(url)
def update_cloudphish_result(sha256_url, http_result_code=None, http_message=None, sha256_content=None, result=None, status=None, db=None, c=None): sql = [] params = [] if http_result_code is not None: sql.append('http_result_code = %s') params.append(http_result_code) if http_message is not None: sql.append('http_message = %s') params.append(http_message[:256]) if sha256_content is not None: sql.append('sha256_content = UNHEX(%s)') params.append(sha256_content) if result is not None: sql.append('result = %s') params.append(result) if status is not None: sql.append('status = %s') params.append(status) if not sql: logging.warning( "update_cloudphish_result called for {} but nothing was passed in to update?" .format(sha256_url)) return params.append(sha256_url) sql = "UPDATE cloudphish_analysis_results SET {} WHERE sha256_url = UNHEX(%s)".format( ', '.join(sql)) logging.debug("executing cloudphish update {}".format(sql, params)) return execute_with_retry(db, c, sql, tuple(params), commit=True)
def execute(self, db, c): # first we get a list of all the distinct analysis modes available in the work queue c.execute( """ SELECT DISTINCT(incoming_workload.mode) FROM incoming_workload JOIN work_distribution ON incoming_workload.id = work_distribution.work_id WHERE incoming_workload.type_id = %s AND work_distribution.group_id = %s AND work_distribution.status = 'READY' """, ( self.workload_type_id, self.group_id, )) available_modes = c.fetchall() db.commit() # if we get nothing from this query then no work is available for this group if not available_modes: if saq.UNIT_TESTING: logging.debug("no work available for {}".format(self)) return NO_WORK_AVAILABLE # flatten this out to a list of analysis modes available_modes = [_[0] for _ in available_modes] # given this list of modes that need remote targets, see what is currently available with get_db_connection(self.database) as node_db: node_c = node_db.cursor() sql = """ SELECT nodes.id, nodes.name, nodes.location, nodes.any_mode, nodes.last_update, node_modes.analysis_mode, COUNT(workload.id) AS 'WORKLOAD_COUNT' FROM nodes LEFT JOIN node_modes ON nodes.id = node_modes.node_id LEFT JOIN workload ON nodes.id = workload.node_id WHERE nodes.company_id = %s AND nodes.is_local = 0 AND TIMESTAMPDIFF(SECOND, nodes.last_update, NOW()) <= %s AND ( nodes.any_mode OR node_modes.analysis_mode in ( {} ) ) GROUP BY nodes.id, nodes.name, nodes.location, nodes.any_mode, nodes.last_update, node_modes.analysis_mode ORDER BY WORKLOAD_COUNT ASC, nodes.last_update ASC """.format(','.join(['%s' for _ in available_modes])) params = [self.company_id, self.node_status_update_frequency * 2] params.extend(available_modes) node_c.execute(sql, tuple(params)) node_status = node_c.fetchall() if not node_status: logging.warning( "no remote nodes are avaiable for all analysis modes {} for {}" .format(','.join(available_modes), self)) if not self.full_delivery: # if this node group is NOT in full_delivery mode and there are no nodes available at all # then we just clear out the work queue for this group # if this isn't done then the work will pile up waiting for a node to come online execute_with_retry( db, c, "UPDATE work_distribution SET status = 'ERROR' WHERE group_id = %s", (self.group_id, ), commit=True) return NO_NODES_AVAILABLE # now figure out what analysis modes are actually available for processing analysis_mode_mapping = { } # key = analysis_mode, value = [ RemoteNode ] any_mode_nodes = [] # list of nodes with any_mode set to True for node_id, name, location, any_mode, last_update, analysis_mode, workload_count in node_status: remote_node = RemoteNode(node_id, name, location, any_mode, last_update, analysis_mode, workload_count) if any_mode: any_mode_nodes.append(remote_node) if analysis_mode: if analysis_mode not in analysis_mode_mapping: analysis_mode_mapping[analysis_mode] = [] analysis_mode_mapping[analysis_mode].append(remote_node) # now we trim our list of analysis modes down to what is available # if we don't have a node that supports any mode if not any_mode_nodes: available_modes = [ m for m in available_modes if m in analysis_mode_mapping.keys() ] logging.debug( "available_modes = {} after checking available nodes".format( available_modes)) if not available_modes: logging.debug( "no nodes are available that support the available analysis modes" ) return NO_NODES_AVAILABLE # now we get the next things to submit from the database that have an analysis mode that is currently # available to be submitted to sql = """ SELECT incoming_workload.id, incoming_workload.mode, incoming_workload.work FROM incoming_workload JOIN work_distribution ON incoming_workload.id = work_distribution.work_id WHERE incoming_workload.type_id = %s AND work_distribution.group_id = %s AND incoming_workload.mode IN ( {} ) AND work_distribution.status = 'READY' ORDER BY incoming_workload.id ASC LIMIT %s""".format(','.join(['%s' for _ in available_modes])) params = [self.workload_type_id, self.group_id] params.extend(available_modes) params.append(self.batch_size) c.execute(sql, tuple(params)) work_batch = c.fetchall() db.commit() logging.info("submitting {} items".format(len(work_batch))) # simple flag that gets set if ANY submission is successful submission_success = False # we should have a small list of things to submit to remote nodes for this group for work_id, analysis_mode, submission_blob in work_batch: # first make sure we can un-pickle this try: submission = pickle.loads(submission_blob) except Exception as e: execute_with_retry( db, c, """UPDATE work_distribution SET status = 'ERROR' WHERE group_id = %s AND work_id = %s""", (self.group_id, self.work_id), commit=True) logging.error( "unable to un-pickle submission blob for id {}: {}".format( work_id, e)) continue # simple flag to remember if we failed to send submission_failed = False # the result of the submission (we pass to Submission.success later) submission_result = None self.coverage_counter += self.coverage if self.coverage_counter < 100: # we'll be skipping this one logging.debug( "skipping work id {} for group {} due to coverage constraints" .format(work_id, self.name)) else: # otherwise we try to submit it self.coverage_counter -= 100 # sort the list of RemoteNode objects by the workload_count available_targets = any_mode_nodes[:] if analysis_mode in analysis_mode_mapping: available_targets.extend( analysis_mode_mapping[analysis_mode]) target = sorted(available_targets, key=lambda n: n.workload_count) target = target[0] # attempt the send try: submission_result = target.submit(submission) logging.info("{} got submission result {} for {}".format( self, submission_result, submission)) submission_success = True except Exception as e: log_function = logging.warning if not self.full_delivery: log_function = logging.warning else: if not isinstance(e, urllib3.exceptions.MaxRetryError) \ and not isinstance(e, urllib3.exceptions.NewConnectionError) \ and not isinstance(e, requests.exceptions.ConnectionError): # if it's not a connection issue then report it #report_exception() pass log_function( "unable to submit work item {} to {} via group {}: {}". format(submission, target, self, e)) # if we are in full delivery mode then we need to try this one again later if self.full_delivery and (isinstance(e, urllib3.exceptions.MaxRetryError) \ or isinstance(e, urllib3.exceptions.NewConnectionError) \ or isinstance(e, requests.exceptions.ConnectionError)): continue # otherwise we consider it a failure submission_failed = True execute_with_retry( db, c, """UPDATE work_distribution SET status = 'ERROR' WHERE group_id = %s AND work_id = %s""", (self.group_id, work_id), commit=True) # if we skipped it or we sent it, then we're done with it if not submission_failed: execute_with_retry( db, c, """UPDATE work_distribution SET status = 'COMPLETED' WHERE group_id = %s AND work_id = %s""", (self.group_id, work_id), commit=True) if submission_failed: try: submission.fail(self) except Exception as e: logging.error(f"call to {submission}.fail() failed: {e}") report_exception() else: try: submission.success(self, submission_result) except Exception as e: logging.error( f"call to {submission}.success() failed: {e}") report_exception() if submission_success: return WORK_SUBMITTED return NO_WORK_SUBMITTED
def _create_analysis(url, reprocess, details, db, c): assert isinstance(url, str) assert isinstance(reprocess, bool) assert isinstance(details, dict) sha256_url = hash_url(url) if reprocess: # if we're reprocessing the url then we clear any existing analysis # IF the current analysis has completed # it's OK if we delete nothing here execute_with_retry("""DELETE FROM cloudphish_analysis_results WHERE sha256_url = UNHEX(%s) AND status = 'ANALYZED'""", (sha256_url,), commit=True) # if we're at this point it means that when we asked the database for an entry from cloudphish_analysis_results # it was empty, OR, we cleared existing analysis # however, we could have multiple requests coming in at the same time for the same url # so we need to take that into account here # first we'll generate our analysis uuid we're going to use _uuid = str(uuid.uuid4()) # so first we try to insert it try: execute_with_retry(db, c, ["""INSERT INTO cloudphish_analysis_results ( sha256_url, uuid, insert_date ) VALUES ( UNHEX(%s), %s, NOW() )""", """INSERT INTO cloudphish_url_lookup ( sha256_url, url ) VALUES ( UNHEX(%s), %s )"""], [(sha256_url, _uuid), (sha256_url, url)], commit=True) except pymysql.err.IntegrityError as e: # (<class 'pymysql.err.IntegrityError'>--(1062, "Duplicate entry # if we get a duplicate key entry here then it means that an entry was created between when we asked # and now if e.args[0] != 1062: raise e # so just return that one that was already created return get_cached_analysis(url) # at this point we've inserted an entry into cloudphish_analysis_results for this url # now at it's processing to the workload root = RootAnalysis() root.uuid = _uuid root.storage_dir = storage_dir_from_uuid(root.uuid) root.initialize_storage() root.analysis_mode = ANALYSIS_MODE_CLOUDPHISH # this is kind of a kludge but, # the company_id initially starts out as whatever the default is for this node # later, should the analysis turn into an alert, the company_id changes to whatever # is stored as the "d" field in the KEY_DETAILS_CONTEXT root.company_id = saq.COMPANY_ID root.tool = 'ACE - Cloudphish' root.tool_instance = saq.SAQ_NODE root.alert_type = ANALYSIS_TYPE_CLOUDPHISH root.description = 'ACE Cloudphish Detection - {}'.format(url) root.event_time = datetime.datetime.now() root.details = { KEY_DETAILS_URL: url, KEY_DETAILS_SHA256_URL: sha256_url, # this used to be configurable but it's always true now KEY_DETAILS_ALERTABLE: True, KEY_DETAILS_CONTEXT: details, # <-- optionally contains the source company_id } url_observable = root.add_observable(F_URL, url) if url_observable: url_observable.add_directive(DIRECTIVE_CRAWL) root.save() root.schedule() return get_cached_analysis(url)
def execute_post_analysis(self): import saq.database # if we are already an Alert AND we have a disposition... if isinstance( self.root, saq.database.Alert) and self.root.id and self.root.disposition: # keep track of the observables we've already updated in hal _updated_observables = set() # of md5 hash hexdigest # did we already set a disposition for this alert before? previous_disposition = None if self.state and 'previous_disposition' in self.state: previous_disposition = self.state['previous_disposition'] logging.debug( "loaded previous disposition of {} for {}".format( previous_disposition, self)) new_disposition = self.root.disposition # if the disposition didn't change then we don't care if previous_disposition == new_disposition: logging.debug( "same disposition {} == {} - not updating".format( new_disposition, self.root.disposition)) return with get_db_connection('hal9000') as db: c = db.cursor() update_count = 0 # update counts for all observables for observable in self.root.all_observables: md5_hasher = md5() md5_hasher.update( observable.type.encode('utf-8', errors='ignore')) md5_hasher.update( observable.value.encode('utf-8', errors='ignore')) id = md5_hasher.hexdigest() # keep track of the ones we've already updated # we only update any single observable value ONCE for each alert if id in _updated_observables: continue _updated_observables.add(id) # we have three major groups of dispositions: IGNORE, MAL and BENIGN # if we've changed state from what we were previously then we want to "undo" what we did previously if previous_disposition is None or previous_disposition in IGNORE_ALERT_DISPOSITIONS: if new_disposition in MAL_ALERT_DISPOSITIONS: execute_with_retry( c, """ INSERT INTO observables (id, mal_count) VALUES (UNHEX(%s), 1) ON DUPLICATE KEY UPDATE total_count = total_count + 1, mal_count = mal_count + 1 """, (id, )) elif new_disposition in BENIGN_ALERT_DISPOSITIONS: execute_with_retry( c, """ INSERT INTO observables (id) VALUES (UNHEX(%s)) ON DUPLICATE KEY UPDATE total_count = total_count + 1 """, (id, )) elif previous_disposition in BENIGN_ALERT_DISPOSITIONS: if new_disposition in MAL_ALERT_DISPOSITIONS: execute_with_retry( c, """ UPDATE observables SET mal_count = mal_count + 1 WHERE id = UNHEX(%s) """, (id, )) elif new_disposition in IGNORE_ALERT_DISPOSITIONS: execute_with_retry( c, """ UPDATE observables SET total_count = total_count - 1 WHERE id = UNHEX(%s) AND total_count > 0 """, (id, )) elif previous_disposition in MAL_ALERT_DISPOSITIONS: if new_disposition in BENIGN_ALERT_DISPOSITIONS: execute_with_retry( c, """ UPDATE observables SET mal_count = mal_count - 1 WHERE id = UNHEX(%s) AND mal_count > 0 """, (id, )) elif new_disposition in IGNORE_ALERT_DISPOSITIONS: execute_with_retry( c, """ UPDATE observables SET total_count = total_count - 1, mal_count = mal_count - 1 WHERE id = UNHEX(%s) AND total_count > 0 AND mal_count > 0 """, (id, )) update_count += 1 db.commit() # remember what our disposition was self.state = {} self.state['previous_disposition'] = self.root.disposition logging.debug( "updated {} observables in hal9000".format(update_count)) return # if we're not in the database AND we're not going to be an Alert... elif not self.root.has_detections: # sanity check if not hasattr(self, 'hal9000_observables'): logging.error("missing hal9000_observables property") return with get_db_connection('hal9000') as db: c = db.cursor() # record appearance of all hal9000 observables for id in self.root.hal9000_observables: execute_with_retry( c, """ INSERT INTO observables (id) VALUES (UNHEX(%s)) ON DUPLICATE KEY UPDATE total_count = total_count + 1""", (id, )) db.commit() return # otherwise we don't care logging.debug( "{} is not an alert or does not have a disposition".format(self)) return
def execute_post_analysis(self): import saq.database self.initialize_state({ STATE_KEY_ID_TRACKING: {}, # key = return value of _compute_hal9000_md5, value = { } (see below) STATE_KEY_PREVIOUS_DISPOSITION: None }) # start tracking what we do with all the observables for observable in self.root.all_observables: hal9000_id = _compute_hal9000_md5(observable) if hal9000_id not in self.state[STATE_KEY_ID_TRACKING]: # we keep track of how we modified the total count and the malicious count for each observable # (we record what we ADDED to the value so that we can undo it later if the disposition changes) self.state[STATE_KEY_ID_TRACKING][hal9000_id] = { 'id': observable.id, KEY_TOTAL_COUNT: None, KEY_MAL_COUNT: None } if self.root.analysis_mode != ANALYSIS_MODE_CORRELATION: # TODO check to see if this analysis mode has cleanup set to True # really what we want to do is see if we can possibly end up in a different analysis mode with get_db_connection('hal9000') as db: c = db.cursor() placeholder_clause = ','.join([ '(UNHEX(%s))' for _ in self.state[STATE_KEY_ID_TRACKING].keys() ]) parameters = tuple(self.state[STATE_KEY_ID_TRACKING].keys()) # record appearance of all hal9000 observables execute_with_retry(db, c, f""" INSERT INTO observables (id) VALUES {placeholder_clause} ON DUPLICATE KEY UPDATE total_count = total_count + 1""", parameters, commit=True) return True # all we do here # we don't really need to record any more state here because # we expect this entire analysis to get deleted # are we an alert with a disposition? new_disposition = None with get_db_connection() as db: c = db.cursor() c.execute("SELECT disposition FROM alerts WHERE uuid = %s", (self.root.uuid, )) result = c.fetchone() db.commit() if result: new_disposition = result[0] if new_disposition is None: return False # no alert or no disposition -- check again later # did we already set a disposition for this alert before? previous_disposition = self.state[STATE_KEY_PREVIOUS_DISPOSITION] logging.debug("loaded previous disposition of {} for {}".format( previous_disposition, self)) # if the disposition didn't change then we don't care if previous_disposition == new_disposition: logging.debug("same disposition {} == {} - not updating".format( previous_disposition, new_disposition)) return False # check again later all_sql = [] # list of SQL commands to execute all_parameters = [ ] # list of SQL parameter tuples for the SQL commands # if we've changed state from what we were previously then we want to undo what we did previously total_count_parameters = [] mal_count_parameters = [] for hal9000_id, value in self.state[STATE_KEY_ID_TRACKING].items(): if self.state[STATE_KEY_ID_TRACKING][hal9000_id][ KEY_TOTAL_COUNT] is not None: total_count_parameters.append(hal9000_id) if self.state[STATE_KEY_ID_TRACKING][hal9000_id][ KEY_MAL_COUNT] is not None: mal_count_parameters.append(hal9000_id) if total_count_parameters: placeholder_clause = ','.join( ['UNHEX(%s)' for _ in total_count_parameters]) all_sql.append(f""" UPDATE observables SET total_count = IF(total_count > 0, total_count - 1, 0) WHERE id IN ( {placeholder_clause} )""") all_parameters.append(tuple(total_count_parameters)) if mal_count_parameters: placeholder_clause = ','.join( ['UNHEX(%s)' for _ in mal_count_parameters]) all_sql.append(f""" UPDATE observables SET mal_count = IF(mal_count > 0, mal_count - 1, 0) WHERE id IN ( {placeholder_clause} )""") all_parameters.append(tuple(mal_count_parameters)) # we have three major groups of dispositions: IGNORE, MAL and BENIGN placeholder_clause = ','.join([ '(UNHEX(%s), 1)' for _ in self.state[STATE_KEY_ID_TRACKING].keys() ]) parameters = tuple(self.state[STATE_KEY_ID_TRACKING].keys()) if new_disposition in MAL_ALERT_DISPOSITIONS: placeholder_clause = ','.join([ '(UNHEX(%s), 1)' for _ in self.state[STATE_KEY_ID_TRACKING].keys() ]) all_sql.append(f""" INSERT INTO observables (id, mal_count) VALUES {placeholder_clause} ON DUPLICATE KEY UPDATE total_count = total_count + 1, mal_count = mal_count + 1 """ ) all_parameters.append(parameters) elif new_disposition in BENIGN_ALERT_DISPOSITIONS: placeholder_clause = ','.join([ '(UNHEX(%s))' for _ in self.state[STATE_KEY_ID_TRACKING].keys() ]) all_sql.append(f""" INSERT INTO observables (id) VALUES {placeholder_clause} ON DUPLICATE KEY UPDATE total_count = total_count + 1 """) all_parameters.append(parameters) with get_db_connection('hal9000') as db: c = db.cursor() execute_with_retry(db, c, all_sql, all_parameters, commit=True) # remember what we did so we can undo it later if we need to for hal9000_id in self.state[STATE_KEY_ID_TRACKING].keys(): if new_disposition in MAL_ALERT_DISPOSITIONS: self.state[STATE_KEY_ID_TRACKING][hal9000_id][ KEY_TOTAL_COUNT] = 1 self.state[STATE_KEY_ID_TRACKING][hal9000_id][ KEY_MAL_COUNT] = 1 elif new_disposition in BENIGN_ALERT_DISPOSITIONS: self.state[STATE_KEY_ID_TRACKING][hal9000_id][ KEY_TOTAL_COUNT] = 1 self.state[STATE_KEY_ID_TRACKING][hal9000_id][ KEY_MAL_COUNT] = None else: self.state[STATE_KEY_ID_TRACKING][hal9000_id][ KEY_TOTAL_COUNT] = None self.state[STATE_KEY_ID_TRACKING][hal9000_id][ KEY_MAL_COUNT] = None # remember what our disposition was self.state[STATE_KEY_PREVIOUS_DISPOSITION] = new_disposition return False # check again later
def execute(self, db, c): if self.test_mode == TEST_MODE_STARTUP: next_submission = None elif self.test_mode == TEST_MODE_SINGLE_SUBMISSION and self.submission_count > 0: next_submission = None else: next_submission = self.get_next_submission() # did we not get anything to submit? if next_submission is None: if self.service_is_debug: return # wait until we check again (defaults to 1 second, passed in on constructor) self.service_shutdown_event.wait(self.collection_frequency) return if not isinstance(next_submission, Submission): logging.critical( "get_next_submission() must return an object derived from Submission" ) # we COPY the files over to another directory for transfer # we'll DELETE them later if we are able to copy them all and then insert the entry into the database target_dir = None if next_submission.files: target_dir = os.path.join(self.incoming_dir, next_submission.uuid) if os.path.exists(target_dir): logging.error( "target directory {} already exists".format(target_dir)) else: try: os.mkdir(target_dir) for f in next_submission.files: # this could be a tuple of (source_file, target_name) if isinstance(f, tuple): f = f[0] target_path = os.path.join(target_dir, os.path.basename(f)) # TODO use hard links instead of copies to reduce I/O shutil.copy2(f, target_path) logging.debug("copied file from {} to {}".format( f, target_path)) except Exception as e: logging.error("I/O error moving files into {}: {}".format( target_dir, e)) report_exception() # we don't really need to change the file paths that are stored in the Submission object # we just remember where we've moved them to (later) try: # add this as a workload item to the database queue work_id = execute_with_retry(db, c, self.insert_workload, (next_submission, ), commit=True) assert isinstance(work_id, int) logging.info("scheduled {} mode {}".format( next_submission.description, next_submission.analysis_mode)) except Exception as e: # something went wrong -- delete our incoming directory if we created one if target_dir: try: shutil.rmtree(target_dir) except Exception as e: logging.error("unable to delete directory {}: {}".format( target_dir, e)) raise e # all is well -- delete the files we've copied into our incoming directory if self.delete_files: for f in next_submission.files: # this could be a tuple of (source_file, target_name) if isinstance(f, tuple): f = f[0] try: os.remove(f) except Exception as e: logging.error("unable to delete file {}: {}".format(f, e)) self.submission_count += 1
def collect(self): # allow persistence to load while not self.shutdown and not self.collection_shutdown and self.incomplete_analysis: try: logging.debug("adding persisted workload item {}".format( self.incomplete_analysis[0])) self.work_queue.put(self.incomplete_analysis[0], block=not saq.SINGLE_THREADED, timeout=1) self.incomplete_analysis.pop(0) except Full: if not saq.SINGLE_THREADED: continue if self.shutdown or self.collection_shutdown: return # grab the workload from the database with get_db_connection() as db: c = db.cursor() # how many items on the workload stack have already been acquired by this node? c.execute("SELECT COUNT(*) FROM workload WHERE node = %s", (saq.SAQ_NODE, )) row = c.fetchone() assigned_count = row[0] if assigned_count: logging.debug( "{} work items are currently assigned to {}".format( assigned_count, saq.SAQ_NODE)) # if there is nothing currently assigned then go ahead and assign some # (there is some sql trickery in here to do subselect magic in MySQL) if assigned_count < self.analysis_pool_size: sql = """ UPDATE workload SET node = %s WHERE id IN ( SELECT id FROM ( SELECT w.id FROM workload w JOIN alerts a ON a.id = w.alert_id WHERE w.node IS NULL AND a.location = %s ORDER BY w.id DESC LIMIT %s ) as t)""" # the number of assigned work should equal the our analysis_pool_size execute_with_retry(c, sql, (saq.SAQ_NODE, saq.SAQ_NODE, self.analysis_pool_size - assigned_count), attempts=10) db.commit() if c.rowcount != -1 and c.rowcount is not None: if c.rowcount: logging.debug("assigned {} work items to {}".format( c.rowcount, saq.SAQ_NODE)) # what we've done so far is marked specific alerts as acquired by this node # no we'll actually go *get* them, add them to the workload, and remove from the database # we go ahead and remove the item from the database *before* we're able to execute the analysis sql = "SELECT w.id, a.id, a.uuid, a.storage_dir FROM workload w JOIN alerts a ON w.alert_id = a.id WHERE w.node = %s" c.execute(sql, (saq.SAQ_NODE, )) # we'll keep a list of these so we can remove them later assigned_workload_ids = [] # of workload_id for workload_id, alert_id, uuid, storage_dir in c: logging.debug( "got workload {} alert {} uuid {} storage_dir {}".format( workload_id, alert_id, uuid, storage_dir)) # make sure this alert is still around if not os.path.exists(storage_dir): logging.warning("invalid or missing storage_dir {}".format( storage_dir)) continue # add this alert to the workload self.add_work_item(AnalysisRequest(uuid, storage_dir, alert_id)) assigned_workload_ids.append(workload_id) for workload_id in assigned_workload_ids: logging.debug("deleting workload_id {}".format(workload_id)) c.execute("DELETE FROM workload WHERE id = %s", (workload_id, )) db.commit()