def run(self): work_queue = self.initialize_queue() self.database_controller = BinaryDatabaseController(work_queue) self.database_controller.start() # Import previous work, if enabled legacy_feed_directory = self.get_config_string("legacy_feed_directory", None) if legacy_feed_directory: self.migrate_legacy_reports(legacy_feed_directory) # Prepare binary analysis ("detonation") provider consumer_threads = [] provider = self.get_provider() for i in range(self.num_quick_scan_threads): database_arbiter = self.database_controller.register( "consumer", quick_scan=True) t = QuickScanThread(database_arbiter, self.cb, provider, dirty_event=self.feed_dirty) consumer_threads.append(t) t.start() for i in range(self.num_deep_scan_threads): database_arbiter = self.database_controller.register( "consumer", quick_scan=False) t = DeepAnalysisThread(database_arbiter, self.cb, provider, dirty_event=self.feed_dirty) consumer_threads.append(t) t.start() # Start feed server metadata = self.get_metadata() self.start_feed_server(metadata) # Start collecting binaries collectors = self.start_binary_collectors(self.filter_spec) # Synchronize feed with Carbon Black self.get_or_create_feed() if cbint.utils.cbserver.is_server_at_least(self.cb, "4.1"): feed_synchronizer = FeedSyncRunner(self.cb, self.name, self.feed_dirty) feed_synchronizer.start() try: while True: sleep(1) except KeyboardInterrupt: print 'stopping...' for t in consumer_threads + collectors: t.stop() for t in consumer_threads + collectors: t.join() print 'stopped %s' % t
def test_concurrency(self): producer_threads = [] consumer_threads = [] self.work_queue.reprocess_on_restart() # handle things that were in-process when we died db_controller = BinaryDatabaseController(self.work_queue) db_controller.start() for i in range(10): producer_threads.append(ProducerThread(db_controller.register("producer"), 20)) for t in producer_threads: t.start() sleep(0.1) for i in range(5): consumer_threads.append(ConsumerThread(db_controller.register("consumer"))) for t in consumer_threads: t.start() for t in producer_threads: t.join() for t in consumer_threads: t.join() md5s_produced = [] md5s_consumed = [] for t in producer_threads: md5s_produced.extend(t.produced) for t in consumer_threads: md5s_consumed.extend(t.errors) md5s_consumed.extend(t.good) self.assertSetEqual(set(md5s_produced), set(md5s_consumed))
def run(self): work_queue = self.initialize_queue() self.database_controller = BinaryDatabaseController(work_queue) self.database_controller.start() # Import previous work, if enabled legacy_feed_directory = self.get_config_string("legacy_feed_directory", None) if legacy_feed_directory: self.migrate_legacy_reports(legacy_feed_directory) # Prepare binary analysis ("detonation") provider consumer_threads = [] provider = self.get_provider() for i in range(self.num_quick_scan_threads): database_arbiter = self.database_controller.register("consumer", quick_scan=True) t = QuickScanThread(database_arbiter, self.cb, provider, dirty_event=self.feed_dirty) consumer_threads.append(t) t.start() for i in range(self.num_deep_scan_threads): database_arbiter = self.database_controller.register("consumer", quick_scan=False) t = DeepAnalysisThread(database_arbiter, self.cb, provider, dirty_event=self.feed_dirty) consumer_threads.append(t) t.start() # Start feed server metadata = self.get_metadata() self.start_feed_server(metadata) # Start collecting binaries collectors = self.start_binary_collectors(self.filter_spec) # Synchronize feed with Carbon Black self.get_or_create_feed() if cbint.utils.cbserver.is_server_at_least(self.cb, "4.1"): feed_synchronizer = FeedSyncRunner(self.cb, self.name, self.feed_dirty) feed_synchronizer.start() try: while True: sleep(1) except KeyboardInterrupt: print 'stopping...' for t in consumer_threads + collectors: t.stop() for t in consumer_threads + collectors: t.join() print 'stopped %s' % t
def test_concurrency(self): producer_threads = [] consumer_threads = [] self.work_queue.reprocess_on_restart( ) # handle things that were in-process when we died db_controller = BinaryDatabaseController(self.work_queue) db_controller.start() for i in range(10): producer_threads.append( ProducerThread(db_controller.register("producer"), 20)) for t in producer_threads: t.start() sleep(0.1) for i in range(5): consumer_threads.append( ConsumerThread(db_controller.register("consumer"))) for t in consumer_threads: t.start() for t in producer_threads: t.join() for t in consumer_threads: t.join() md5s_produced = [] md5s_consumed = [] for t in producer_threads: md5s_produced.extend(t.produced) for t in consumer_threads: md5s_consumed.extend(t.errors) md5s_consumed.extend(t.good) self.assertSetEqual(set(md5s_produced), set(md5s_consumed))
class DetonationDaemon(CbIntegrationDaemon): def __init__(self, name, **kwargs): work_directory = kwargs.pop('work_directory', None) CbIntegrationDaemon.__init__(self, name, **kwargs) self.cb = None self.work_queue = None self.work_directory = work_directory or os.path.join("usr", "share", "cb", "integrations", "%s" % self.name) self.database_file = os.path.join(self.work_directory, "sqlite.db") self._queue_initialized = False self.done = False self.feed_dirty = Event() self.feed_url = None self.feed_base_url = None ### Start: Functions which must be overriden in subclasses of DetonationDaemon ### @property def num_quick_scan_threads(self): return 1 @property def num_deep_scan_threads(self): return 5 @property def filter_spec(self): return '' @property def historical_rate_limiter(self): return 0.5 @property def up_to_date_rate_limiter(self): return 0.1 def get_provider(self): raise IntegrationError("Integration did not provide a 'get_provider' function, which is required") def get_metadata(self): raise IntegrationError("Integration did not provide a 'get_metadata' function, which is required") ### End: Functions which must be overriden in subclasses of DetonationDaemon ### def validate_config(self): if not self.cfg.has_section('bridge'): raise ConfigurationError("Configuration file does not have required section 'bridge'") self.check_required_options(['carbonblack_server_url', 'carbonblack_server_token']) ssl_verify = self.get_config_boolean("carbonblack_server_sslverify", False) server_url = self.cfg.get("bridge", "carbonblack_server_url") server_token = self.cfg.get("bridge", "carbonblack_server_token") try: self.cb = cbapi.CbApi(server_url, token=server_token, ssl_verify=ssl_verify) except Exception as e: raise ConfigurationError("Could not create CbAPI instance to %s: %s" % (server_url, e.message)) if self.get_config_boolean("use_streaming", False): self.check_required_options(['carbonblack_streaming_host', 'carbonblack_streaming_username', 'carbonblack_streaming_password']) self.streaming_host = self.cfg.get('bridge', 'carbonblack_streaming_host') self.streaming_username = self.cfg.get('bridge', 'carbonblack_streaming_username') self.streaming_password = self.cfg.get('bridge', 'carbonblack_streaming_password') self.use_streaming = True else: self.use_streaming = False self.feed_base_url = "http://%s:%d" % (self.get_config_string('feed_host', '127.0.0.1'), self.get_config_integer('listener_port', 8080)) self.feed_url = "%s%s" % (self.feed_base_url, '/feed.json') try: cbinfo = self.cb.info() self.cb_version = cbinfo['version'] except Exception as e: raise ConfigurationError("Could not connect to Cb server at %s: %s" % (server_url, str(e))) return True def initialize_queue(self): if not self._queue_initialized: self.work_queue = SqliteQueue(self.database_file) self.work_queue.reprocess_on_restart() self._queue_initialized = True return self.work_queue def migrate_legacy_reports(self, legacy_directory): migrated_count = 0 if not os.path.isdir(legacy_directory): log.info("Legacy directory %s doesn't exist, nothing to migrate" % legacy_directory) return migrated_count if os.path.isfile(os.path.join(legacy_directory, '.migrated')): log.info("Feed reports from %s already migrated" % legacy_directory) return migrated_count for fn in (f for f in os.listdir(legacy_directory) if os.path.isfile(os.path.join(legacy_directory,f))): try: d = json.load(open(os.path.join(legacy_directory, fn), 'rb')) short_result = d['title'] timestamp = int(d['timestamp']) iocs = d['iocs'] score = int(d['score']) link = d['link'] # NOTE: we are assuming the first md5 in the list is the md5sum of the binary. md5_iocs = iocs.get('md5', []) if not md5_iocs: log.warning("No MD5 IOCs in file %s" % fn) continue md5sum = md5_iocs[0] md5_iocs.remove(md5sum) if not md5_iocs: del(iocs['md5']) if not iocs: iocs = None succeeded = (score >= 0) except Exception as e: log.warning("Could not parse file %s: %s" % (fn, e)) continue try: if not self.work_queue.binary_exists_in_database(md5sum): self.work_queue.append(md5sum) self.work_queue.mark_as_analyzed(md5sum, succeeded, 0, short_result, '', score=score, link=link, iocs=iocs) migrated_count += 1 except Exception as e: log.warning("Could not migrate file %s to new database: %s" % (fn, e)) import traceback log.warning(traceback.format_exc()) continue # try: # os.remove(os.path.join(legacy_directory, fn)) # except IOError: # log.warning("Could not remove old file %s after migration: %s" % (fn, e)) touch(os.path.join(legacy_directory, '.migrated')) log.info("Migrated %d reports from %s into database" % (migrated_count, legacy_directory)) return migrated_count def start_binary_collectors(self, filter_spec): collectors = [] now = datetime.datetime.utcnow() collectors.append(CbAPIHistoricalProducerThread(self.database_controller.register("producer"), self.cb, self.name, sleep_between=self.get_config_integer('sleep_between_batches', 1200), rate_limiter=self.historical_rate_limiter, start_time=now, filter_spec=filter_spec)) # historical query collectors.append(CbAPIUpToDateProducerThread(self.database_controller.register("producer"), self.cb, self.name, sleep_between=self.get_config_integer('sleep_between_batches', 30), rate_limiter=self.up_to_date_rate_limiter, start_time=now, filter_spec=filter_spec)) # constantly up-to-date query # if self.use_streaming: # # TODO: need filter_spec for streaming # collectors.append(CbStreamingProducerThread(self.database_controller.register("producer"), self.streaming_host, self.streaming_username, # self.streaming_password)) for collector in collectors: collector.start() return collectors def start_feed_server(self, feed_metadata): self.feed_server = SqliteFeedServer(self.database_file, self.get_config_integer('listener_port', 8080), feed_metadata, self.feed_base_url, self.work_directory, listener_address=self.get_config_string('listener_address', '0.0.0.0')) self.feed_server.start() def get_or_create_feed(self): feed_id = self.cb.feed_get_id_by_name(self.name) self.logger.info("Feed id for %s: %s" % (self.name, feed_id)) if not feed_id: self.logger.info("Creating %s feed for the first time" % self.name) # TODO: clarification of feed_host vs listener_address result = self.cb.feed_add_from_url(self.feed_url, True, False, False) # TODO: defensive coding around these self.cb calls feed_id = result.get('id', 0) return feed_id def run(self): work_queue = self.initialize_queue() self.database_controller = BinaryDatabaseController(work_queue) self.database_controller.start() # Import previous work, if enabled legacy_feed_directory = self.get_config_string("legacy_feed_directory", None) if legacy_feed_directory: self.migrate_legacy_reports(legacy_feed_directory) # Prepare binary analysis ("detonation") provider consumer_threads = [] provider = self.get_provider() for i in range(self.num_quick_scan_threads): database_arbiter = self.database_controller.register("consumer", quick_scan=True) t = QuickScanThread(database_arbiter, self.cb, provider, dirty_event=self.feed_dirty) consumer_threads.append(t) t.start() for i in range(self.num_deep_scan_threads): database_arbiter = self.database_controller.register("consumer", quick_scan=False) t = DeepAnalysisThread(database_arbiter, self.cb, provider, dirty_event=self.feed_dirty) consumer_threads.append(t) t.start() # Start feed server metadata = self.get_metadata() self.start_feed_server(metadata) # Start collecting binaries collectors = self.start_binary_collectors(self.filter_spec) # Synchronize feed with Carbon Black self.get_or_create_feed() if cbint.utils.cbserver.is_server_at_least(self.cb, "4.1"): feed_synchronizer = FeedSyncRunner(self.cb, self.name, self.feed_dirty) feed_synchronizer.start() try: while True: sleep(1) except KeyboardInterrupt: print 'stopping...' for t in consumer_threads + collectors: t.stop() for t in consumer_threads + collectors: t.join() print 'stopped %s' % t
class DetonationDaemon(CbIntegrationDaemon): def __init__(self, name, **kwargs): work_directory = kwargs.pop('work_directory', None) CbIntegrationDaemon.__init__(self, name, **kwargs) self.cb = None self.work_queue = None self.work_directory = work_directory or os.path.join( "usr", "share", "cb", "integrations", "%s" % self.name) self.database_file = os.path.join(self.work_directory, "sqlite.db") self._queue_initialized = False self.done = False self.feed_dirty = Event() self.feed_url = None self.feed_base_url = None ### Start: Functions which must be overriden in subclasses of DetonationDaemon ### @property def num_quick_scan_threads(self): return 1 @property def num_deep_scan_threads(self): return 5 @property def filter_spec(self): return '' @property def historical_rate_limiter(self): return 0.5 @property def up_to_date_rate_limiter(self): return 0.1 def get_provider(self): raise IntegrationError( "Integration did not provide a 'get_provider' function, which is required" ) def get_metadata(self): raise IntegrationError( "Integration did not provide a 'get_metadata' function, which is required" ) ### End: Functions which must be overriden in subclasses of DetonationDaemon ### def validate_config(self): if not self.cfg.has_section('bridge'): raise ConfigurationError( "Configuration file does not have required section 'bridge'") self.check_required_options( ['carbonblack_server_url', 'carbonblack_server_token']) ssl_verify = self.get_config_boolean("carbonblack_server_sslverify", False) server_url = self.cfg.get("bridge", "carbonblack_server_url") server_token = self.cfg.get("bridge", "carbonblack_server_token") try: self.cb = cbapi.CbApi(server_url, token=server_token, ssl_verify=ssl_verify) except Exception as e: raise ConfigurationError( "Could not create CbAPI instance to %s: %s" % (server_url, e.message)) if self.get_config_boolean("use_streaming", False): self.check_required_options([ 'carbonblack_streaming_host', 'carbonblack_streaming_username', 'carbonblack_streaming_password' ]) self.streaming_host = self.cfg.get('bridge', 'carbonblack_streaming_host') self.streaming_username = self.cfg.get( 'bridge', 'carbonblack_streaming_username') self.streaming_password = self.cfg.get( 'bridge', 'carbonblack_streaming_password') self.use_streaming = True else: self.use_streaming = False self.feed_base_url = "http://%s:%d" % (self.get_config_string( 'feed_host', '127.0.0.1'), self.get_config_integer('listener_port', 8080)) self.feed_url = "%s%s" % (self.feed_base_url, '/feed.json') try: cbinfo = self.cb.info() self.cb_version = cbinfo['version'] except Exception as e: raise ConfigurationError( "Could not connect to Cb server at %s: %s" % (server_url, str(e))) return True def initialize_queue(self): if not self._queue_initialized: self.work_queue = SqliteQueue(self.database_file) self.work_queue.reprocess_on_restart() self._queue_initialized = True return self.work_queue def migrate_legacy_reports(self, legacy_directory): migrated_count = 0 if not os.path.isdir(legacy_directory): log.info("Legacy directory %s doesn't exist, nothing to migrate" % legacy_directory) return migrated_count if os.path.isfile(os.path.join(legacy_directory, '.migrated')): log.info("Feed reports from %s already migrated" % legacy_directory) return migrated_count for fn in (f for f in os.listdir(legacy_directory) if os.path.isfile(os.path.join(legacy_directory, f))): try: d = json.load(open(os.path.join(legacy_directory, fn), 'rb')) short_result = d['title'] timestamp = int(d['timestamp']) iocs = d['iocs'] score = int(d['score']) link = d['link'] # NOTE: we are assuming the first md5 in the list is the md5sum of the binary. md5_iocs = iocs.get('md5', []) if not md5_iocs: log.warning("No MD5 IOCs in file %s" % fn) continue md5sum = md5_iocs[0] md5_iocs.remove(md5sum) if not md5_iocs: del (iocs['md5']) if not iocs: iocs = None succeeded = (score >= 0) except Exception as e: log.warning("Could not parse file %s: %s" % (fn, e)) continue try: if not self.work_queue.binary_exists_in_database(md5sum): self.work_queue.append(md5sum) self.work_queue.mark_as_analyzed(md5sum, succeeded, 0, short_result, '', score=score, link=link, iocs=iocs) migrated_count += 1 except Exception as e: log.warning("Could not migrate file %s to new database: %s" % (fn, e)) import traceback log.warning(traceback.format_exc()) continue # try: # os.remove(os.path.join(legacy_directory, fn)) # except IOError: # log.warning("Could not remove old file %s after migration: %s" % (fn, e)) touch(os.path.join(legacy_directory, '.migrated')) log.info("Migrated %d reports from %s into database" % (migrated_count, legacy_directory)) return migrated_count def start_binary_collectors(self, filter_spec): collectors = [] now = datetime.datetime.utcnow() collectors.append( CbAPIHistoricalProducerThread( self.database_controller.register("producer"), self.cb, self.name, sleep_between=self.get_config_integer('sleep_between_batches', 1200), rate_limiter=self.historical_rate_limiter, start_time=now, filter_spec=filter_spec)) # historical query collectors.append( CbAPIUpToDateProducerThread( self.database_controller.register("producer"), self.cb, self.name, sleep_between=self.get_config_integer('sleep_between_batches', 30), rate_limiter=self.up_to_date_rate_limiter, start_time=now, filter_spec=filter_spec)) # constantly up-to-date query # if self.use_streaming: # # TODO: need filter_spec for streaming # collectors.append(CbStreamingProducerThread(self.database_controller.register("producer"), self.streaming_host, self.streaming_username, # self.streaming_password)) for collector in collectors: collector.start() return collectors def start_feed_server(self, feed_metadata): self.feed_server = SqliteFeedServer( self.database_file, self.get_config_integer('listener_port', 8080), feed_metadata, self.feed_base_url, self.work_directory, listener_address=self.get_config_string('listener_address', '0.0.0.0')) self.feed_server.start() def get_or_create_feed(self): feed_id = self.cb.feed_get_id_by_name(self.name) self.logger.info("Feed id for %s: %s" % (self.name, feed_id)) if not feed_id: self.logger.info("Creating %s feed for the first time" % self.name) # TODO: clarification of feed_host vs listener_address result = self.cb.feed_add_from_url(self.feed_url, True, False, False) # TODO: defensive coding around these self.cb calls feed_id = result.get('id', 0) return feed_id def run(self): work_queue = self.initialize_queue() self.database_controller = BinaryDatabaseController(work_queue) self.database_controller.start() # Import previous work, if enabled legacy_feed_directory = self.get_config_string("legacy_feed_directory", None) if legacy_feed_directory: self.migrate_legacy_reports(legacy_feed_directory) # Prepare binary analysis ("detonation") provider consumer_threads = [] provider = self.get_provider() for i in range(self.num_quick_scan_threads): database_arbiter = self.database_controller.register( "consumer", quick_scan=True) t = QuickScanThread(database_arbiter, self.cb, provider, dirty_event=self.feed_dirty) consumer_threads.append(t) t.start() for i in range(self.num_deep_scan_threads): database_arbiter = self.database_controller.register( "consumer", quick_scan=False) t = DeepAnalysisThread(database_arbiter, self.cb, provider, dirty_event=self.feed_dirty) consumer_threads.append(t) t.start() # Start feed server metadata = self.get_metadata() self.start_feed_server(metadata) # Start collecting binaries collectors = self.start_binary_collectors(self.filter_spec) # Synchronize feed with Carbon Black self.get_or_create_feed() if cbint.utils.cbserver.is_server_at_least(self.cb, "4.1"): feed_synchronizer = FeedSyncRunner(self.cb, self.name, self.feed_dirty) feed_synchronizer.start() try: while True: sleep(1) except KeyboardInterrupt: print 'stopping...' for t in consumer_threads + collectors: t.stop() for t in consumer_threads + collectors: t.join() print 'stopped %s' % t