def run(start_time: datetime, end_time: datetime, window_size: timedelta): current_time = start_time eq = ElasticQuery(server, index, username, password) dp = DetectorPool() src_dst = Detector(name='src_dst', n_seeds=1, n_bins=1024, features=['src_addr', 'dst_addr'], filt=None, thresh=10, flag_th=1) int_ext = Detector(name='int_ext', n_seeds=1, n_bins=1024, features=['internal', 'external'], filt=int_ext_filter, thresh=10, flag_th=1) dp.add_detector(src_dst) dp.add_detector(int_ext) src_dst_divs = [] int_ext_divs = [] while current_time < end_time: frame = eq.query_time(current_time, window_size) #Do not care about results dp.run_next_timestep(frame) src_dst_divs.append(src_dst.get_divs()) int_ext_divs.append(int_ext.get_divs()) current_time += window_size #Merge all divs? src_dst_divs = np.concatenate(src_dst_divs) int_ext_divs = np.concatenate(int_ext_divs) np.save('output/src_dst_divs_15_1024', src_dst_divs) np.save('output/int_ext_divs_15_1024', int_ext_divs)
def run(start_time: datetime, end_time: datetime, window_size: timedelta): current_time = start_time eq = ElasticQuery(server, index, username, password) dp = DetectorPool() detectors = [ Detector(name='ext_4_sigma', n_seeds=8, n_bins=1024, features=['external'], filt=int_ext_filter, thresh=0.36, flag_th=6, detection_rule='two_step'), Detector(name='int_4_sigma', n_seeds=8, n_bins=1024, features=['internal'], filt=int_ext_filter, thresh=0.44, flag_th=6, detection_rule='two_step'), Detector(name='src_4_sigma', n_seeds=8, n_bins=1024, features=['src_addr'], filt=None, thresh=0.32, flag_th=6, detection_rule='two_step'), Detector(name='dst_4_sigma', n_seeds=8, n_bins=1024, features=['dst_addr'], filt=None, thresh=0.32, flag_th=6, detection_rule='two_step') ] name_list = [] all_divs = {} # Add all detectors to detection pool for concurrency for detector in detectors: dp.add_detector(detector) name_list.append(detector.name) all_divs[detector.name] = [] # Threading detections = [] detection_frames = [] divs_detector = detectors[0] # Only need the divs from one detector ext_divs = [] # Main Operation Loop while current_time < end_time: # Load the data from local drive/ElasticSearch df = eq.query_time(current_time, window_size) current_time += window_size # Run detectors results = dp.run_next_timestep(df) # Result processing detections.append(results[0]) detection_frames.append(results[1]) logger.debug(' '.join([str(len(_)) for _ in results])) for det in detectors: all_divs[det.name].append(det.get_divs()) ext_divs.append(divs_detector.get_divs()) full_detections = pd.concat(detection_frames) window_size_fmt = int(window_size.total_seconds() / 60) pd.to_pickle( full_detections, 'output/detection_frame_{}-{}_{}.pkl'.format(start_time.day, start_time.month, window_size_fmt)) pd.to_pickle( detection_list_to_df(detections), 'output/detections_{}-{}_{}.pkl'.format(start_time.day, start_time.month, window_size_fmt)) with open( 'output/ext_divs_{}-{}_{}.pkl'.format(start_time.day, start_time.month, window_size_fmt), 'wb') as fp: pickle.dump(ext_divs, fp, protocol=pickle.HIGHEST_PROTOCOL) for det in detectors: with open( 'output/divs_{}_{}-{}_{}.pkl'.format(det.name, start_time.day, start_time.month, window_size_fmt), 'wb') as fp: pickle.dump(all_divs[det.name], fp, protocol=pickle.HIGHEST_PROTOCOL)