def main(self): # path to the pcap to get the update from if len(sys.argv) < 2: pcap_path = '/pcaps/eval.pcap' else: pcap_path = sys.argv[1] source_mac = None key = None split_path = 'None' try: split_path = os.path.split(pcap_path)[-1] split_path = split_path.split('.') split_path = split_path[0].split('-') key = split_path[0].split('_')[1] except Exception as e: self.logger.debug('Could not get key because %s', str(e)) # ignore misc files if (split_path[-1] != 'miscellaneous'): # Initialize and load the model if len(sys.argv) > 2: load_path = sys.argv[2] else: load_path = '/models/RandomForestModel.pkl' # Compute model hash with open(load_path, 'rb') as handle: model_hash = hashlib.md5(handle.read()).hexdigest() model = Model(duration=None, hidden_size=None, model_type='RandomForest') model.load(load_path) self.logger.debug('Loaded model from %s', load_path) # Get representations from the model reps, source_mac, timestamps, preds, others = model.get_representation( pcap_path, source_ip=source_mac, mean=False) if preds is not None: self.logger.debug('Generating predictions') last_update, prev_rep = self.common.get_previous_state( source_mac, timestamps[0]) _, mean_rep = self.common.average_representation( reps, timestamps, prev_representation=prev_rep, last_update=last_update) mean_preds = model.classify_representation(mean_rep) if len(sys.argv) > 2: for p in mean_preds: self.logger.debug(p) # Update the stored representation if reps is not None: self.logger.debug('Updating stored data') r_key = self.common.update_data(source_mac, reps, timestamps, preds, others, model_hash) # Get the sessions that the model looked at sessions = model.sessions # Clean the sessions clean_sessions = [] inferred_mac = None for session_dict in sessions: cleaned_sessions, inferred_mac = \ clean_session_dict( session_dict, source_address=source_mac ) clean_sessions.append(cleaned_sessions) if source_mac is None: source_mac = inferred_mac # Make simple decisions based on vector differences and update times timestamp = timestamps[0].timestamp() labels, confs = zip(*preds) abnormality = 0 # abnormality = eval_pcap( # pcap_path, self.conf_labels, self.time_const, label=labels[0], rnn_size=self.rnn_size, model_path='/models/RandomForestModel.pkl', model_type='RandomForest') prev_s = self.common.get_address_info(source_mac, timestamp) decision = self.common.basic_decision(key, source_mac, prev_s, timestamp, labels, confs, abnormality) self.logger.debug('Created message') for i in range(3): self.logger.info(labels[i] + ' : ' + str(round(confs[i], 3))) # update Redis with decision try: self.r.hmset(r_key, decision) except Exception as e: self.logger.error( 'Failed to update keys in Redis because: {0}'.format( str(e))) # Get json message message = json.dumps(decision) self.logger.info('Message: ' + message) if not self.skip_rabbit: self.common.connect_rabbit() self.common.channel.basic_publish( exchange=self.common.exchange, routing_key=self.common.routing_key, body=message) else: message = {} message[key] = {'valid': False} message = json.dumps(message) self.logger.info('Not enough sessions in pcap') if not self.skip_rabbit: self.common.connect_rabbit() self.common.channel.basic_publish( exchange=self.common.exchange, routing_key=self.common.routing_key, body=message) if not self.skip_rabbit: try: self.common.connection.close() except Exception as e: self.logger.error( 'Unable to close rabbit connection because: {0}'.format( str(e))) return
single_result['label'] = true_label logger.info('Reading ' + pcap_file + ' as ' + true_label) # Get the internal representations representations, _, _, p, _ = model.get_representation(pcap, mean=False) if representations is not None: file_size += os.path.getsize(pcap) file_num += 1 length = representations.shape[0] time_slices += length single_result['aggregate'] = p individual_dict = {} # Classify each slice logger.info('Computing classifications by slice') for i in range(length): p_r = model.classify_representation(representations[i]) individual_dict[i] = p_r single_result['individual'] = individual_dict results[pcap] = single_result tock = time.clock() # Save results to path specified by third argument if len(sys.argv) >= 4: with open(save_path, 'w') as output_file: json.dump(results, output_file) logger.info('-' * 80) logger.info('Results with unknowns') logger.info('-' * 80) calc_f1(results, logger) logger.info('-' * 80) logger.info('Results forcing decisions')