key = None # extra check in case running the first time if ((split_path[-1] != 'miscellaneous' and key_address == source_ip) or (split_path[-1] != 'miscellaneous' and key_address == None)): # Initialize and load the model if len(sys.argv) > 2: load_path = sys.argv[2] else: load_path = os.path.join('models', 'RandomForestModel.pkl') # Compute model hash with open(load_path, 'rb') as handle: model_hash = hashlib.md5(handle.read()).hexdigest() model = RandomForestModel(duration=None) model.load(load_path) logger.debug("Loaded model from %s", load_path) # Get representations from the model reps, source_ip, timestamps, preds, others = model.get_representation( pcap_path, source_ip=source_ip, mean=False) if preds is not None: logger.debug("Generating predictions") last_update, prev_rep = get_previous_state(source_ip, timestamps[0]) _, mean_rep = average_representation(reps, timestamps, prev_representation=prev_rep, last_update=last_update)
data directory specified by the first argument. The model is saved to the location specified by the second argument. ''' import sys import json from utils.RandomForestModel import RandomForestModel if __name__ == '__main__': # Load model params from config with open('config.json') as config_file: config = json.load(config_file) duration = config['duration'] labels = config['labels'] # Get the data directory if len(sys.argv) < 2: data_dir = "/pcaps" else: data_dir = sys.argv[1] # Initialize the model model = RandomForestModel(duration=duration, labels=labels) # Train the model model.train(data_dir) # Save the model to the specified path if len(sys.argv) == 3: save_path = sys.argv[2] else: save_path = "/models/RandomForestModel.pkl" model.save(save_path)
def test_RandomForestModel(): instance = RandomForestModel(15)
logger = logging.getLogger(__name__) if len(sys.argv) < 2: data_dir = "/pcaps" else: data_dir = sys.argv[1] # Load model from specified path if len(sys.argv) > 2: load_path = sys.argv[2] else: load_path = os.path.join('models', 'RandomForestModel.pkl') if len(sys.argv) > 3: save_path = sys.argv[3] else: save_path = "/models/RandomForestModel.pkl" model = RandomForestModel(duration=None, hidden_size=None) logger.info("Loading model from %s", load_path) model.load(load_path) # Initialize results dictionary results = {} results['labels'] = model.labels # Get the true label assignments logger.info("Getting label assignments") with open(os.path.join(data_dir, 'label_assignments.json')) as handle: label_assignments = json.load(handle) # Walk through testing directory and get all the pcaps logger.info("Getting pcaps") pcaps = []
def eval_rnn(pcap, input_label=None): ''' Evaluate the RNN model on a single pcap ''' load_path = os.path.join('models', 'RandomForestModel.pkl') model = RandomForestModel(duration=None) model.load(load_path) # Get representations from the model reps, source_ip, timestamps, preds, others = model.get_representation( pcap_path, source_ip=None, mean=False) if preds is not None: logger.debug("Generating predictions") last_update, prev_rep = get_previous_state(source_ip, timestamps[0]) _, mean_rep = average_representation(reps, timestamps, prev_representation=prev_rep, last_update=last_update) mean_preds = model.classify_representation(mean_rep) if len(sys.argv) > 2: for p in mean_preds: logger.debug(p) # Update the stored representation current_rep, avg_rep = None, None if reps is not None and is_private(source_ip): logger.debug("Updating stored data") current_rep, avg_rep = update_data(source_ip, reps, timestamps, preds, others, model_hash) # Get the sessions that the model looked at sessions = model.sessions # Clean the sessions clean_sessions = [] inferred_ip = None clean_sessions, inferred_ip = \ clean_session_dict( sessions, source_address=None ) if source_ip is None: source_ip = inferred_ip L_in = [] pred_labels = {l[0]: l[1] for l in mean_preds} if input_label is None: for l in labels: if l not in pred_labels: L_in.append((l, 0)) else: L_in.append((l, pred_labels[l])) else: L_in = [(l, 0) for l in labels if l != input_label] L_in.append((input_label, 1)) # Use the RNN model to compute abnormality scores rnnmodel = AbnormalDetector(num_labels=len(labels)) rnnpath = os.path.join('models', 'AbnormalRNN') rnnmodel.load(rnnpath) for session_dict in clean_sessions: for k, session in session_dict.items(): X, L = create_inputs(L_in, session, 116) score = rnnmodel.get_output(X, L) print(k, score[0, 0])