def process_incident(self, incident): """ get the incident time from the db and gathers all features INPUT: log_files: the logs that we went through it. """ if(incident is None): return ip_sieve = IPSieve() ip_records = {} banned_ips = [] if(incident["file_name"] is None) or (len(incident["file_name"]) == 0): # get the logs from ES # get the logs from ES banned_ips = self.es_handler.get_banjax(incident['start'], incident['stop'], incident['target']) ats_records = self.es_handler.get(incident['start'], incident['stop'], incident['target']) # calculate IP dictionary with ATS records ip_records = ip_sieve.process_ats_records(ats_records) else: # read the sessions from the log file ip_sieve.add_log_file(incident["file_name"]) ip_records = ip_sieve.parse_log("nginx") # calculate features ip_feature_db = {} #At this stage it is only a peliminary list we might lose features #due to 0 variance self._active_feature_list = [] #do a dry run on all features just to gather the indeces of all available #features for CurentFeature in Learn2BanFeature.__subclasses__(): f = CurentFeature(ip_records, ip_feature_db) self._active_feature_list.append(f._FEATURE_INDEX) for CurentFeature in Learn2BanFeature.__subclasses__(): f = CurentFeature(ip_records, ip_feature_db) #logging.info("Computing feature %i..."% f._FEATURE_INDEX) print "Computing feature %i..."% f._FEATURE_INDEX f.compute() # post process the features ip_feature_db = self.bothound_tools.post_process(ip_feature_db) # delete the old sessions for thie incidend self.bothound_tools.delete_sessions(incident['id']) #print ip_feature_db self.bothound_tools.add_sessions(incident['id'], ip_feature_db, banned_ips) self.bothound_tools.set_incident_process(incident['id'], False) print "Incident {} processed.".format(incident['id']) return ip_feature_db
def test_all_features(self): for cur_log_file in self.log_files: self.test_ip_sieve.add_log_file(cur_log_file) self.test_ip_sieve.parse_log() for CurrentFeatureType in Learn2BanFeature.__subclasses__(): cur_feature_tester = CurrentFeatureType(self.test_ip_sieve, self.test_ip_feature_db) cur_feature_tester.compute() print self.test_ip_feature_db
def _build_available_feature_list(self): """ Search all the available feature class and stored them in a dictionary indexed by their names """ self._available_features={} self._feature_list = list() for CurrentFeatureType in Learn2BanFeature.__subclasses__(): self._available_features[CurrentFeatureType.__name__] = CurrentFeatureType self._feature_list.append(CurrentFeatureType.__name__)
def test_all_features(self): for cur_log_file in self.log_files: self.test_ip_sieve.add_log_file(cur_log_file) self.test_ip_sieve.parse_log() for CurrentFeatureType in Learn2BanFeature.__subclasses__(): cur_feature_tester = CurrentFeatureType( self.test_ip_sieve, self.test_ip_feature_db) cur_feature_tester.compute() print self.test_ip_feature_db
def _build_available_feature_list(self): """ Search all the available feature class and stored them in a dictionary indexed by their names """ self._available_features = {} self._feature_list = list() for CurrentFeatureType in Learn2BanFeature.__subclasses__(): self._available_features[ CurrentFeatureType.__name__] = CurrentFeatureType self._feature_list.append(CurrentFeatureType.__name__)
def gather_all_features(self, log_files): """ gathers all features INPUT: log_files: the logs that we went through it. """ for cur_log_file in log_files: self.ip_sieve.add_log_file(cur_log_file) self.ip_sieve.parse_log() for CurrentFeatureType in Learn2BanFeature.__subclasses__(): cur_feature_tester = CurrentFeatureType(self.ip_sieve, self.ip_feature_db) cur_feature_tester.compute() return self.ip_feature_db
def gather_all_features(self, log_files): """ gathers all features INPUT: log_files: the logs that we went through it. """ for cur_log_file in log_files: self.ip_sieve.add_log_file(cur_log_file) self.ip_sieve.parse_log() for CurrentFeatureType in Learn2BanFeature.__subclasses__(): cur_feature_tester = CurrentFeatureType( self.ip_sieve, self.ip_feature_db) cur_feature_tester.compute() return self.ip_feature_db
def _process_logs(self): """ get the log name from db and gathers all features INPUT: log_files: the logs that we went through it. """ #this is not a oop way of retrieving the logs but I think we are #avoiding db access in other classes beside l2btools cur_experiment_logs = self.l2btools.retrieve_experiment_logs(self.id) #if there is no log associated to this experiment then there is nothing #to do if len(cur_experiment_logs) == 0: logging.info("Giving up on experiment %i with no training log" % self.expr_dict['id']) return #log id is needed to be send to the trainer so the the trainer #knows which regex is detecting the bots for which log self.trainer.add_malicious_history_log_files([ (cur_log_info['log_id'], cur_log_info['file_name']) for cur_log_info in cur_experiment_logs ]) #extracitng the filenames #Get IP Features log_filenames = tuple(cur_log['file_name'] for cur_log in cur_experiment_logs) #At this stage it is only a peliminary list we might lose features #due to 0 variance self._active_feature_list = [] #do a dry run on all features just to gather the indeces of all available #features for CurrentFeatureType in Learn2BanFeature.__subclasses__(): cur_feature_tester = CurrentFeatureType(self.ip_sieve, self.ip_feature_db) self._active_feature_list.append(cur_feature_tester._FEATURE_INDEX) for cur_log_file in log_filenames: #in theory it might be more memory efficient #to crunch the logs one by one but python is quite disappointing in memory #management try: self.ip_sieve.add_log_file(cur_log_file) self.ip_sieve.parse_log() except IOError: print "Unable to read ", cur_log_file, "skipping..." for CurrentFeatureType in Learn2BanFeature.__subclasses__(): cur_feature_tester = CurrentFeatureType(self.ip_sieve, self.ip_feature_db) logging.info("Computing feature %i..." % cur_feature_tester._FEATURE_INDEX) cur_feature_tester.compute() # we have memory problem here :( # import objgraph # objgraph.show_refs([self.ip_sieve._ordered_records], filename='ips-graph.png') del self.ip_sieve._ordered_records del self.ip_sieve #f**k python with not letting the memory released # import gc # gc.collect() # print gc.garbage() self.trainer.add_to_sample(self.ip_feature_db) #we store the non-normailized vectors in a json file jsonized_ip_feature_db = {} for k, v in self.ip_feature_db.items(): jsonized_ip_feature_db[str(k)] = v import json with open(self.base_analyse_log_file + ".prenormal_ip_feature_db.json", "w") as ip_feature_file: json.dump(jsonized_ip_feature_db, ip_feature_file) del self.ip_feature_db del jsonized_ip_feature_db #Normalise training set, normalisation should happen after all #sample is gathered self.trainer.normalise(self.expr_dict['norm_mode'])
def process_incident(self, incident): """ get the incident time from the db and gathers all features INPUT: log_files: the logs that we went through it. """ if (incident is None): return ip_sieve = IPSieve() ip_records = {} banned_ips = [] if (incident["file_name"] is None) or (len(incident["file_name"]) == 0): # get the logs from ES # get the logs from ES banned_ips = self.es_handler.get_banjax(incident['start'], incident['stop'], incident['target']) ats_records = self.es_handler.get(incident['start'], incident['stop'], incident['target']) # calculate IP dictionary with ATS records ip_records = ip_sieve.process_ats_records(ats_records) else: # read the sessions from the log file ip_sieve.add_log_file(incident["file_name"]) ip_records = ip_sieve.parse_log("nginx") # calculate features ip_feature_db = {} #At this stage it is only a peliminary list we might lose features #due to 0 variance self._active_feature_list = [] #do a dry run on all features just to gather the indeces of all available #features for CurentFeature in Learn2BanFeature.__subclasses__(): f = CurentFeature(ip_records, ip_feature_db) self._active_feature_list.append(f._FEATURE_INDEX) for CurentFeature in Learn2BanFeature.__subclasses__(): f = CurentFeature(ip_records, ip_feature_db) #logging.info("Computing feature %i..."% f._FEATURE_INDEX) print "Computing feature %i..." % f._FEATURE_INDEX f.compute() # post process the features ip_feature_db = self.bothound_tools.post_process(ip_feature_db) # delete the old sessions for thie incidend self.bothound_tools.delete_sessions(incident['id']) #print ip_feature_db self.bothound_tools.add_sessions(incident['id'], ip_feature_db, banned_ips) self.bothound_tools.set_incident_process(incident['id'], False) print "Incident {} processed.".format(incident['id']) return ip_feature_db