def __init__(self): ## Set logging information for instance self.logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) ## Take arguments from command line self.args = None self.read_args() ## Take input from configuration file self.get_config() self.common = Common(config=self.config) ## Instantiate a logger to to leg messages to aid debugging self.logger = Common().setup_logger(self.logger) ## Add network traffic files for parsing self.get_files() self.model_hash = None self.model = Model(duration=self.duration, hidden_size=None, model_type=self.args.algorithm) def create_base_alg(): return BaseAlgorithm(files=self.files, config=self.config, model=self.model, model_hash=self.model_hash, model_path=self.args.trained_model, sos_model=self.args.sos_model) ## Check whether operation is evaluation, train, or test ## Evaluation returns predictions that are useful for the deployment ## of networkml in an operational environment. if self.args.operation == 'eval': self.load_model() if (self.args.algorithm == 'onelayer' or self.args.algorithm == 'randomforest'): base_alg = create_base_alg() base_alg.eval(self.args.algorithm) ## SOS refers to statistical outlier selection model elif self.args.algorithm == 'sos': from networkml.algorithms.sos.eval_SoSModel import eval_pcap eval_pcap(self.args.path, self.args.sos_model, self.conf_labels, self.time_const) ## Train entails training a new model on specific packet captures elif self.args.operation == 'train': ## Check for model type specified ## onelayer refers to a one-layer neural network if self.args.algorithm == 'onelayer': m = MLPClassifier((self.state_size), alpha=0.1, activation='relu', max_iter=1000) base_alg = create_base_alg() base_alg.train(self.args.path, self.args.save, m, self.args.algorithm) ## Random forests refers to a decision tree-based model elif self.args.algorithm == 'randomforest': m = RandomForestClassifier(n_estimators=100, min_samples_split=5, class_weight='balanced') base_alg = create_base_alg() base_alg.train(self.args.path, self.args.save, m, self.args.algorithm) ## SOS refers to statistical outlier selection model elif self.args.algorithm == 'sos': from networkml.algorithms.sos.train_SoSModel import train train(self.args.path, self.args.sos_model, self.time_const, self.rnn_size, self.conf_labels, self.args.save) ## Test is for checking overall performance of networkML models for ## the device classification task. It is a benchmarking operation. elif self.args.operation == 'test': self.load_model() ## Check for model type specified ## onelayer refers to a one-layer neural network ## Random forests refers to a decision tree-based model if (self.args.algorithm == 'onelayer' or self.args.algorithm == 'randomforest'): base_alg = create_base_alg() base_alg.test(self.args.path, self.args.save) ## SOS refers to statistical outlier selection model elif self.args.algorithm == 'sos': self.logger.info( 'There is no testing operation for the SoSModel.')
def eval(self, algorithm): """ This operation uses a specified algorithm to predict--for particular network traffic--what devices types are present and whether the device is acting normally or abnormally. This is the function that should be used in production when a user wants to actually employ networkML to classify and assess traffic. Args: algorithm: type of algorithm (random forest, neural network, or stochastic outlier selection (SOS). """ if self.files: self.model.sessionize_pcaps(self.files) for fi in self.files: self.logger.info('Processing {0}...'.format(fi)) base_pcap = os.path.basename(fi) pcap_key, pcap_labels = self.parse_pcap_name(base_pcap) if pcap_key is None: self.logger.debug('Ignoring unknown pcap name %s', base_pcap) continue ## Get representations from the model reps, source_mac, timestamps, preds, others, capture_ip_source = self.model.get_representation( str(fi), source_ip=None, mean=False) ## If no predictions are made, send a message with explanation if preds is None: message = {} message[pcap_key] = {'valid': False, 'pcap': base_pcap} message = {'data': message} self.logger.info('Not enough sessions in file \'%s\'', str(fi)) self.publish_message(message) continue else: ## If a prediction is made, send message with prediction self.logger.debug('Generating predictions') ## Update the stored representation if reps is not None: self.logger.debug('Updating stored data') r_key = self.common.update_data(source_mac, reps, timestamps, preds, others, self.model_hash) ## Get the sessions that the model looked at sessions = self.model.sessions ## Clean the sessions clean_sessions = [] inferred_mac = None for session_dict in sessions: cleaned_sessions, inferred_mac = \ clean_session_dict( session_dict, source_address=source_mac ) clean_sessions.append(cleaned_sessions) if source_mac is None: source_mac = inferred_mac ## Make simple decisions based on vector differences and update ## times timestamp = timestamps[0].timestamp() labels, confs = zip(*preds) abnormality = 0.0 if self.has_avx(): from networkml.algorithms.sos.eval_SoSModel import eval_pcap try: abnormality = eval_pcap(str(fi), self.sos_model, self.conf_labels, self.time_const, label=labels[0], rnn_size=self.rnn_size, model_path=self.model_path, model_type=algorithm) except ValueError: self.logger.warning( "Can't run abnormality detection because not a big enough sample size" ) else: self.logger.warning( "Can't run abnormality detection because this CPU doesn't support AVX" ) prev_s = self.common.get_address_info(source_mac, timestamp) decision = self.common.basic_decision(pcap_key, source_mac, prev_s, timestamp, labels, confs, abnormality) sources = { 'source_ip': capture_ip_source, 'source_mac': source_mac, 'pcap_labels': pcap_labels, } if pcap_key in decision: decision[pcap_key].update(sources) elif source_mac in decision: decision[source_mac].update(sources) self.logger.debug('Created message') for i in range(3): self.logger.info(labels[i] + ' : ' + str(round(confs[i], 3))) # update Redis with decision if self.common.use_redis: redis_decision = {} for k in decision: redis_decision[k] = str(decision[k]) try: self.common.r.hmset(r_key, redis_decision) except Exception as e: # pragma: no cover self.logger.error( 'Failed to update keys in Redis because: {0}'. format(str(e))) message = {'data': decision} message['data']['pcap'] = base_pcap self.publish_message(message) message = {'data': ''} self.publish_message(message, close=True)
def eval(self, algorithm): for fi in self.files: self.logger.info('Processing {0}...'.format(fi)) source_mac = None key = None split_path = 'None' try: split_path = os.path.split(fi)[-1] split_path = split_path.split('.') split_path = split_path[0].split('-') key = split_path[0].split('_')[1] except Exception as e: # pragma: no cover self.logger.debug('Could not get key because %s', str(e)) # ignore misc files if (split_path[-1] == 'miscellaneous'): continue # Get representations from the model reps, source_mac, timestamps, preds, others, capture_ip_source = self.model.get_representation( str(fi), source_ip=source_mac, mean=False) if preds is None: message = {} message[key] = {'valid': False, 'pcap': os.path.split(fi)[-1]} uid = os.getenv('id', 'None') file_path = os.getenv('file_path', 'None') message = { 'id': uid, 'type': 'metadata', 'file_path': file_path, 'data': message, 'results': { 'tool': 'networkml', 'version': networkml.__version__ } } message = json.dumps(message) self.logger.info('Not enough sessions in file \'%s\'', str(fi)) if self.common.use_rabbit: self.common.channel.basic_publish( exchange=self.common.exchange, routing_key=self.common.routing_key, body=message, properties=pika.BasicProperties(delivery_mode=2, )) continue else: self.logger.debug('Generating predictions') last_update, prev_rep = self.common.get_previous_state( source_mac, timestamps[0]) # TODO are these calls actually needed??? _, mean_rep = self.common.average_representation( reps, timestamps, prev_representation=prev_rep, last_update=last_update) mean_preds = self.model.classify_representation(mean_rep) # Update the stored representation if reps is not None: self.logger.debug('Updating stored data') r_key = self.common.update_data(source_mac, reps, timestamps, preds, others, self.model_hash) # Get the sessions that the model looked at sessions = self.model.sessions # Clean the sessions clean_sessions = [] inferred_mac = None for session_dict in sessions: cleaned_sessions, inferred_mac = \ clean_session_dict( session_dict, source_address=source_mac ) clean_sessions.append(cleaned_sessions) if source_mac is None: source_mac = inferred_mac # Make simple decisions based on vector differences and update times timestamp = timestamps[0].timestamp() labels, confs = zip(*preds) abnormality = 0.0 has_avx = False if 'flags' in get_cpu_info() and ( 'avx' in get_cpu_info()['flags'] or 'avx2' in get_cpu_info()['flags']): has_avx = True if has_avx: from networkml.algorithms.sos.eval_SoSModel import eval_pcap abnormality = eval_pcap(str(fi), self.conf_labels, self.time_const, label=labels[0], rnn_size=self.rnn_size, model_path=self.model_path, model_type=algorithm) else: self.logger.warning( "Can't run abnormality detection because this CPU doesn't support AVX" ) prev_s = self.common.get_address_info(source_mac, timestamp) decision = self.common.basic_decision(key, source_mac, prev_s, timestamp, labels, confs, abnormality) if key in decision: decision[key]['source_ip'] = capture_ip_source decision[key]['source_mac'] = source_mac elif source_mac in decision: decision[source_mac]['source_ip'] = capture_ip_source decision[source_mac]['source_mac'] = source_mac self.logger.debug('Created message') for i in range(3): self.logger.info(labels[i] + ' : ' + str(round(confs[i], 3))) # update Redis with decision if self.common.use_redis: redis_decision = {} for k in decision: redis_decision[k] = str(decision[k]) try: self.common.r.hmset(r_key, redis_decision) except Exception as e: # pragma: no cover self.logger.error( 'Failed to update keys in Redis because: {0}'. format(str(e))) # Get json message uid = os.getenv('id', 'None') file_path = os.getenv('file_path', 'None') message = { 'id': uid, 'type': 'metadata', 'file_path': file_path, 'data': decision, 'results': { 'tool': 'networkml', 'version': networkml.__version__ } } message['data']['pcap'] = os.path.split(fi)[-1] message = json.dumps(message) self.logger.info('Message: ' + message) if self.common.use_rabbit: self.common.channel.basic_publish( exchange=self.common.exchange, routing_key=self.common.routing_key, body=message, properties=pika.BasicProperties(delivery_mode=2, )) uid = os.getenv('id', 'None') file_path = os.getenv('file_path', 'None') message = { 'id': uid, 'type': 'metadata', 'file_path': file_path, 'data': '', 'results': { 'tool': 'networkml', 'version': networkml.__version__ } } message = json.dumps(message) if self.common.use_rabbit: self.common.channel.basic_publish( exchange=self.common.exchange, routing_key=self.common.routing_key, body=message, properties=pika.BasicProperties(delivery_mode=2, )) try: self.common.connection.close() except Exception as e: # pragma: no cover self.logger.error( 'Unable to close rabbit connection because: {0}'.format( str(e))) return
def __init__(self): self.logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) self.args = None self.read_args() self.get_config() self.common = Common(config=self.config) self.logger = Common().setup_logger(self.logger) self.get_files() self.model_hash = None self.model = Model(duration=self.duration, hidden_size=None, model_type=self.args.algorithm) if self.args.operation == 'eval': self.load_model() if self.args.algorithm == 'onelayer': BaseAlgorithm(files=self.files, config=self.config, model=self.model, model_hash=self.model_hash, model_path=self.args.trained_model).eval( self.args.algorithm) elif self.args.algorithm == 'randomforest': BaseAlgorithm(files=self.files, config=self.config, model=self.model, model_hash=self.model_hash, model_path=self.args.trained_model).eval( self.args.algorithm) elif self.args.algorithm == 'sos': from networkml.algorithms.sos.eval_SoSModel import eval_pcap eval_pcap(self.args.path, self.conf_labels, self.time_const) elif self.args.operation == 'train': if self.args.algorithm == 'onelayer': m = MLPClassifier((self.state_size), alpha=0.1, activation='relu', max_iter=1000) BaseAlgorithm(files=self.files, config=self.config, model=self.model, model_hash=self.model_hash, model_path=self.args.trained_model).train( self.args.path, self.args.save, m, self.args.algorithm) elif self.args.algorithm == 'randomforest': m = RandomForestClassifier(n_estimators=100, min_samples_split=5, class_weight='balanced') BaseAlgorithm(files=self.files, config=self.config, model=self.model, model_hash=self.model_hash, model_path=self.args.trained_model).train( self.args.path, self.args.save, m, self.args.algorithm) elif self.args.algorithm == 'sos': from networkml.algorithms.sos.train_SoSModel import train train(self.args.path, self.time_const, self.rnn_size, self.conf_labels, self.args.save) elif self.args.operation == 'test': self.load_model() if self.args.algorithm == 'onelayer': BaseAlgorithm(files=self.files, config=self.config, model=self.model, model_hash=self.model_hash, model_path=self.args.trained_model).test( self.args.path, self.args.save) elif self.args.algorithm == 'randomforest': BaseAlgorithm(files=self.files, config=self.config, model=self.model, model_hash=self.model_hash, model_path=self.args.trained_model).test( self.args.path, self.args.save) elif self.args.algorithm == 'sos': self.logger.info( 'There is no testing operation for the SoSModel.')
def eval(self, algorithm): """ This operation uses a specified algorithm to predict--for particular network traffic--what devices types are present and whether the device is acting normally or abnormally. This is the function that should be used in production when a user wants to actually employ networkML to classify and assess traffic. Args: algorithm: type of algorithm (random forest, neural network, or stochastic outlier selection (SOS). """ for fi in self.files: self.logger.info('Processing {0}...'.format(fi)) base_pcap = os.path.basename(fi) key = self.parse_pcap_name(base_pcap) if key is None: self.logger.debug('Ignoring unknown pcap name %s', base_pcap) continue ## Get representations from the model reps, source_mac, timestamps, preds, others, capture_ip_source = self.model.get_representation( str(fi), source_ip=None, mean=False) ## If no predictions are made, send a message with explanation if preds is None: message = {} message[key] = {'valid': False, 'pcap': base_pcap} uid = os.getenv('id', 'None') file_path = os.getenv('file_path', 'None') message = { 'id': uid, 'type': 'metadata', 'file_path': file_path, 'data': message, 'results': { 'tool': 'networkml', 'version': networkml.__version__ } } message = json.dumps(message) self.logger.info('Not enough sessions in file \'%s\'', str(fi)) if self.common.use_rabbit: self.common.channel.basic_publish( exchange=self.common.exchange, routing_key=self.common.routing_key, body=message, properties=pika.BasicProperties(delivery_mode=2, )) continue else: ## If a prediction is made, send message with prediction self.logger.debug('Generating predictions') last_update, prev_rep = self.common.get_previous_state( source_mac, timestamps[0]) ## Update the stored representation if reps is not None: self.logger.debug('Updating stored data') r_key = self.common.update_data(source_mac, reps, timestamps, preds, others, self.model_hash) ## Get the sessions that the model looked at sessions = self.model.sessions ## Clean the sessions clean_sessions = [] inferred_mac = None for session_dict in sessions: cleaned_sessions, inferred_mac = \ clean_session_dict( session_dict, source_address=source_mac ) clean_sessions.append(cleaned_sessions) if source_mac is None: source_mac = inferred_mac ## Make simple decisions based on vector differences and update ## times timestamp = timestamps[0].timestamp() labels, confs = zip(*preds) abnormality = 0.0 has_avx = False ## Check if CPU supports AVX (advanced vector extension), ## which speeds up certain calculations if 'flags' in get_cpu_info() and ( 'avx' in get_cpu_info()['flags'] or 'avx2' in get_cpu_info()['flags']): has_avx = True if has_avx: from networkml.algorithms.sos.eval_SoSModel import eval_pcap abnormality = eval_pcap(str(fi), self.conf_labels, self.time_const, label=labels[0], rnn_size=self.rnn_size, model_path=self.model_path, model_type=algorithm) else: self.logger.warning( "Can't run abnormality detection because this CPU doesn't support AVX" ) ## prev_s = self.common.get_address_info(source_mac, timestamp) decision = self.common.basic_decision(key, source_mac, prev_s, timestamp, labels, confs, abnormality) if key in decision: decision[key]['source_ip'] = capture_ip_source decision[key]['source_mac'] = source_mac elif source_mac in decision: decision[source_mac]['source_ip'] = capture_ip_source decision[source_mac]['source_mac'] = source_mac self.logger.debug('Created message') for i in range(3): self.logger.info(labels[i] + ' : ' + str(round(confs[i], 3))) # update Redis with decision if self.common.use_redis: redis_decision = {} for k in decision: redis_decision[k] = str(decision[k]) try: self.common.r.hmset(r_key, redis_decision) except Exception as e: # pragma: no cover self.logger.error( 'Failed to update keys in Redis because: {0}'. format(str(e))) # Get json message uid = os.getenv('id', 'None') file_path = os.getenv('file_path', 'None') message = { 'id': uid, 'type': 'metadata', 'file_path': file_path, 'data': decision, 'results': { 'tool': 'networkml', 'version': networkml.__version__ } } message['data']['pcap'] = base_pcap message = json.dumps(message) self.logger.info('Message: ' + message) if self.common.use_rabbit: self.common.channel.basic_publish( exchange=self.common.exchange, routing_key=self.common.routing_key, body=message, properties=pika.BasicProperties(delivery_mode=2, )) uid = os.getenv('id', 'None') file_path = os.getenv('file_path', 'None') message = { 'id': uid, 'type': 'metadata', 'file_path': file_path, 'data': '', 'results': { 'tool': 'networkml', 'version': networkml.__version__ } } message = json.dumps(message) if self.common.use_rabbit: self.common.channel.basic_publish( exchange=self.common.exchange, routing_key=self.common.routing_key, body=message, properties=pika.BasicProperties(delivery_mode=2, )) try: self.common.connection.close() except Exception as e: # pragma: no cover self.logger.error( 'Unable to close rabbit connection because: {0}'.format( str(e))) return