class Main: """ Main. """ def __init__(self): """ Initializer. """ # Network Settings. self.iface = None self.addr1 = None # Destination self.addr2 = None # Source self.addr3 = None # Basic Service Set ID (BSSID) self.ssid = None self.channel = None self.wepKey = None self.passphrase = None self.arpIpSource = None self.arpIpDestination = None self.__initNetworkSettings() # Initialize the Operating System. self.__initOperatingSystem() # Signal Settings for the timeout interval (in seconds). signal.signal(signal.SIGALRM, self.__signal_handler) self.signalInterval = 0.4 # Logger, and Handlers for management, EAPoL and ICMP frames. self.logger = Logger(filename='log.txt', terminal=True) self.handleMgmt = HandleManagement(self.logger, self.iface, self.addr1, self.addr2, self.addr3, self.ssid, self.channel) self.handleEAPoL = HandleEAPoL(self.logger, self.iface, self.addr1, self.addr2, self.addr3, self.ssid) self.handleARP = HandleARP(self.logger, self.iface, self.addr1, self.addr2, self.addr3, self.arpIpSource, self.arpIpDestination) # Set the keys and passphrases used in the above handlers. self.handleMgmt.setWEPKey(self.wepKey) self.handleEAPoL.setPassphrase(self.passphrase) # Cryptographic handlers for encapsulation and decapsulation. self.handleWEP = HandleWEP() self.handleTKIP = HandleTKIP() self.handleAES = HandleAES() # Set the cryptographic handlers. self.handleMgmt.setCryptographicHandlers(wep=self.handleWEP) self.handleEAPoL.setCryptographicHandlers(tkip=self.handleTKIP, aes=self.handleAES) self.handleARP.setCryptographicHandlers(wep=self.handleWEP, tkip=self.handleTKIP, aes=self.handleAES) # Traces holding the list of all traces, and its Trace Manager. self.traceManager = TraceManager(self.handleMgmt, self.handleEAPoL) self.traces = self.traceManager.getTraces() # Trace and helpers, holding information about the trace under test. self.trace = None self.tracePosition = None self.traceBeaconed = False self.traceFinished = False def printResults(self): """ Print the obtained results. """ self.traceManager.printResults() def __initOperatingSystem(self): """ Initialize requirements in the Operating System (OS). NOTE: It might be necessary to restart the interface with: ifconfig self.iface down; ifconfig self.iface up """ # Set the requested channel on the interface. os.system('iwconfig ' + self.iface + ' channel ' + str(self.channel)) def __initNetworkSettings(self): """ Initialize the network settings; select the simulated or one of the physical routers. """ #self.__setSettingsSimulation() self.__setSettingsForSomeRouter() def __setSettingsSimulation(self): self.iface = 'wlan1' self.addr1 = '02:00:00:00:00:00' # Destination MAC self.addr2 = '02:00:00:00:01:00' # Source MAC self.addr3 = '02:00:00:00:00:00' # Basic Service Set ID (BSSID) self.ssid = 'TEST_NETWORK' self.channel = 1 self.wepKey = 'abcde' self.passphrase = 'abcdefgh' self.arpIpSource = '192.168.1.2' # Within the same subnet of the destination. self.arpIpDestination = '192.168.1.1' def __setSettingsForSomeRouter(self): self.iface = 'wlan0' self.addr1 = 'aa:bb:cc:dd:ee:ff' # Destination MAC self.addr2 = 'ff:ee:dd:cc:bb:aa' # Source MAC self.addr3 = self.addr1 # Basic Service Set ID (BSSID) self.ssid = 'TEST_NETWORK' self.channel = 1 self.passphrase = 'abcdefgh' self.arpIpSource = '192.168.1.2' # Within the same subnet of the destination. self.arpIpDestination = '192.168.1.1' def __signal_handler(self, signum, frame): """ Handler for signal events. """ raise Exception("Timeout.") def __isPacketIntendedForUs(self, packet): """ Check if we are the intended receivers of the packet by comparing the MAC addresses in the given packet against our own MAC addresses. """ # The packet destination address (addr1) must equal our source address (addr2). if packet.addr1 != self.addr2: return False # The packet source address (addr2) must equal our destination address (addr1). if packet.addr2 != self.addr1: return False return True def run(self): """ Run all available traces. """ assert( self.traces is not None ), \ 'There are no traces available to be executed.' printTerminalLine('=') for x, trace in enumerate(self.traces): self.__traceRun(trace[0], trace[1]) if x < len(self.traces) - 1: printTerminalLine('-') def __traceRun(self, trace, validationType): """ Run the given trace, from now on known as the trace under test. Note: The current implementation assumes that the first message in the trace under test waits for a message to be received. Most commonly this will be a beacon frame from the access point. """ assert( trace is not None and trace is not [] ), \ 'There is no trace given, or it is empty, and can therefore not be executed.' self.trace = trace self.tracePosition = 0 self.traceBeaconed = False self.traceFinished = False # Run the trace and upon completion validate the connection. If either of them # fails the exception handler will catch the error message and mark the trace as # failed. If the connection is validated positively, the trace is marked as # success. # FIXME: http://stackoverflow.com/questions/1112343/how-do-i-capture-sigint-in-python try: self.logger.log(self.logger.ACTION, 'Starting Trace...') sniff(iface=self.iface, lfilter=self.__traceFilter, stop_filter=self.__traceStopCondition, prn=self.__traceHandler) signal.setitimer(signal.ITIMER_REAL, 0.0) # Reset the timer. if validationType is None: raise Exception('No Validation Type was given.') sleep(100.00 / 1000.00) # Sleep for 100ms before validation. self.__traceValidateConnection(validationType) self.traceManager.markTrace(type='SUCCESS', trace=self.trace, validation=validationType) # Catch exceptions; log the message and mark the trace as failed. except Exception, message: self.logger.log(self.logger.EXCEPTION, str(message)) self.traceManager.markTrace(type='FAILURE', trace=self.trace, validation=validationType) # Reset the connection state with the access point. self.__traceReset()
logger.time_log('Starting Category Transformation.') with open('data_scratch/Crimes_-_2001_to_present.csv') as input_file: reader = csv.reader(input_file) for i, line in enumerate(reader): raw_record_count += 1 if raw_record_count == 0: continue iucr = line[4] type = line[5] description = line[6] location = line[7] fbi_code = line[14] iucr_codes[iucr] = 1 type_codes[type] = 1 description_codes[description] = 1 location_codes[location] = 1 fbi_codes[fbi_code] = 1 logger.time_log('Category Transformation Complete.\n') logger.log('Processed %s total records' % raw_record_count) serialize_codes(iucr_codes, 'data_scratch/iucr_codes.json', logger) serialize_codes(type_codes, 'data_scratch/type_codes.json', logger) serialize_codes(description_codes, 'data_scratch/description_codes.json', logger) serialize_codes(location_codes, 'data_scratch/location_codes.json', logger) serialize_codes(fbi_codes, 'data_scratch/fbi_codes.json', logger)
longitude = None if (violation_code is not None and issue_month is not None and issue_weekday is not None and issue_hour is not None and issue_time is not None and car_state is not None and latitude is not None and longitude is not None): # With required source fields scrubbed, it is time to engineer a couple of features. out_of_state = 0 if car_state != 5: out_of_state = 1 luxury_make = luxury_make_types[car_make] domestic_make = domestic_make_types[car_make] writer.writerow([ violation_code, issue_month, issue_weekday, issue_hour, car_state, car_make, car_color, latitude, longitude, out_of_state, luxury_make, domestic_make ]) processed_records += 1 logger.time_log('Data Pre-Processing Complete.\n') logger.log(' Total Records: %s' % raw_record_count) logger.log('Processed Records: %s\n' % processed_records) logger.log('Records with missing or incomplete data:') for column in column_names: logger.log("%s: %s" % (column, missing_data[column])) logger.close()
sample_seed = 1029 sample_rate = 0.10 raw_record_count = -1 processed_records = 0 random.seed(sample_seed) logger.time_log('Starting Data Sampling.') with open('data_scratch/cleaned_crimes.csv') as input_file: with open('data_scratch/sampled_cleaned_crimes.csv', 'w', newline='') as output_file: reader = csv.reader(input_file) writer = csv.writer(output_file) writer.writerow(column_names) for i, line in enumerate(reader): raw_record_count += 1 if raw_record_count == 0: continue if random.random() < sample_rate: writer.writerow(line) processed_records += 1 logger.time_log('Data Sampling Complete.\n') logger.log('Total Records: %s' % raw_record_count) logger.log('Sampled Records: %s' % processed_records)
with open('model/experiment/output/%s_predict_proba.p' % predictions, 'rb') as file: frame = pickle.load(file) optimal_threshold, optimal_f1_score = find_optimal_f1_threshold(frame) y_actual = frame.y_actual y_predict_proba = frame.y_predict y_predict = (y_predict_proba[:, 1] >= optimal_threshold).astype(bool) score = [ predictions, optimal_threshold, optimal_f1_score, accuracy_score(y_actual, y_predict), precision_score(y_actual, y_predict), recall_score(y_actual, y_predict) ] scores.append(score) print(score) score_frame = pd.DataFrame(scores, columns=[ 'model', 'threshold', 'f1', 'accuracy', 'precision', 'recall' ]) logger.log("Sorted For Accuracy:") score_frame.sort_values('accuracy', ascending=False, inplace=True) logger.log(score_frame) logger.log("Sorted For F1:") score_frame.sort_values('f1', ascending=False, inplace=True) logger.log(score_frame)
year, month, weekday, hour, iucr_codes[iucr], type_codes[type], description_codes[description], location_codes[location], domestic, beat, district, ward, community, fbi_codes[fbi_code], latitude, longitude, index_crime, non_index_crime, violent_crime, property_crime, public_violence, ]) processed_records += 1 logger.time_log('Pre-Processing Complete.\n') logger.log('Total Records: %s' % raw_record_count) logger.log('Processed Records: %s\n' % processed_records) logger.log('Missing Data:') for column in missing_columns.keys(): logger.log(' %s: %s' % (column, missing_columns[column]))
def run_classification_experiment(self, sample=None, random_state=None, test_size=0.20, multiclass=False, record_predict_proba=False, sampling=None, cv=5, verbose=True, transformer=None, fit_increment=None, warm_start=False, max_iters=None, n_jobs=-1): use_project_path() logger = Logger('%s.txt' % self.name) evaluator = Evaluator(logger) data_frame = self.df if sample is not None: data_frame = data_frame.sample(n=sample, random_state=random_state) x_train, x_test, y_train, y_test = train_test_split( data_frame, data_frame[self.target], test_size=test_size) if transformer is not None: logger.time_log('Fitting Transformer...') transformer.fit(x_train) logger.time_log('Transformer Fit Complete.\n') if sampling is not None: logger.time_log('Starting Data Re-Sampling...') logger.log('Original Training Shape is %s' % Counter(y_train)) x_new, y_new = sampling.fit_resample(x_train, y_train) logger.log('Balanced Training Shape is %s' % Counter(y_new)) if hasattr(x_train, 'columns'): x_new = pd.DataFrame(x_new, columns=x_train.columns) x_train, y_train = x_new, y_new logger.time_log('Re-Sampling Complete.\n') logger.time_log('Shuffling Re-Sampled Data.\n') x_train, y_train = shuffle(x_train, y_train, random_state=random_state) logger.time_log('Shuffling Complete.\n') if self.hyper_parameters is not None: self.estimator.set_params(**self.hyper_parameters.params) if cv is not None: kfold = StratifiedKFold(n_splits=cv, random_state=random_state) logger.time_log('Cross Validating Model...') fold_scores = Parallel(n_jobs=n_jobs, verbose=3)( delayed(crossfold_classifier) (clone(self.estimator), transformer, x_train, y_train, train_index, test_index, record_predict_proba, verbose, fit_increment, warm_start, max_iters, random_state) for train_index, test_index in kfold.split(x_train, y_train)) logger.time_log('Cross Validation Complete.\n') logger.time_log('Training Model...') if fit_increment is not None: if max_iters is not None: for iter in range(max_iters): x_iter_train, y_iter_train = shuffle( x_train, y_train, random_state=random_state) batch_fit_classifier(self.estimator, x_iter_train, y_iter_train, transformer=transformer, increment=fit_increment, verbose=verbose) else: batch_fit_classifier(self.estimator, x_train, y_train, transformer=transformer, increment=fit_increment, verbose=verbose) else: if transformer is not None: x_train_transformed = transformer.transform(x_train) self.estimator.fit(x_train_transformed, y_train) else: self.estimator.fit(x_train, y_train) logger.time_log('Training Complete.\n') logger.time_log('Testing Training Partition...') y_train_predict = batch_predict(self.estimator, x_train, transformer=transformer, verbose=verbose) logger.time_log('Testing Complete.\n') train_evaluation_frame = EvaluationFrame(y_train, y_train_predict) logger.time_log('Testing Holdout Partition...') y_test_predict = batch_predict(self.estimator, x_test, transformer=transformer, verbose=verbose) logger.time_log('Testing Complete.\n') test_evaluation_frame = EvaluationFrame(y_test, y_test_predict) test_evaluation_frame.save('%s_predict.p' % self.name) test_proba_evaluation_frame = None if record_predict_proba: logger.time_log('Testing Holdout Partition (probability)...') y_test_predict_proba = batch_predict_proba(self.estimator, x_test, transformer=transformer, verbose=verbose) test_proba_evaluation_frame = EvaluationFrame( y_test, y_test_predict_proba) test_proba_evaluation_frame.save('%s_predict_proba.p' % self.name) logger.time_log('Testing Complete.\n') if cv is not None: evaluator.evaluate_fold_scores(fold_scores) evaluator.evaluate_classifier_result( self.estimator, test_evaluation_frame, train=train_evaluation_frame, test_proba=test_proba_evaluation_frame, multiclass=multiclass) logger.close() if self.hyper_parameters is not None: self.hyper_parameters.save('%s_params.p' % self.name) self.trained_estimator = self.estimator
def run_classification_search_experiment(self, scoring, sample=None, random_state=None, test_size=0.20, n_jobs=-1, n_iter=2, cv=5, verbose=3, multiclass=False, record_predict_proba=False, sampling=None): use_project_path() logger = Logger('%s.txt' % self.name) search = BayesSearchCV(self.estimator, self.hyper_parameters.search_space, n_jobs=n_jobs, n_iter=n_iter, cv=cv, verbose=verbose, scoring=scoring, return_train_score=True) data_frame = self.df if sample is not None: data_frame = data_frame.sample(n=sample, random_state=random_state) x_train, x_test, y_train, y_test = train_test_split( data_frame, data_frame[self.target], test_size=test_size) if sampling is not None: logger.time_log('Starting Data Re-Sampling...') logger.log('Original Training Shape is %s' % Counter(y_train)) x_new, y_new = sampling.fit_resample(x_train, y_train) logger.log('Balanced Training Shape is %s' % Counter(y_new)) if hasattr(x_train, 'columns'): x_new = pd.DataFrame(x_new, columns=x_train.columns) x_train, y_train = x_new, y_new logger.time_log('Re-Sampling Complete.\n') logger.time_log('Shuffling Re-Sampled Data.\n') x_train, y_train = shuffle(x_train, y_train, random_state=random_state) logger.time_log('Shuffling Complete.\n') logger.time_log('Starting HyperParameter Search...') results = search.fit(x_train, y_train) logger.time_log('Search Complete.\n') logger.time_log('Testing Training Partition...') y_train_predict = batch_predict(results.best_estimator_, x_train) logger.time_log('Testing Complete.\n') train_evaluation_frame = EvaluationFrame(y_train, y_train_predict) logger.time_log('Testing Holdout Partition...') y_test_predict = batch_predict(results.best_estimator_, x_test) logger.time_log('Testing Complete.\n') test_evaluation_frame = EvaluationFrame(y_test, y_test_predict) test_evaluation_frame.save('%s_predict.p' % self.name) test_proba_evaluation_frame = None if record_predict_proba: logger.time_log('Testing Holdout Partition (probability)...') y_test_predict_proba = batch_predict_proba(results.best_estimator_, x_test) test_proba_evaluation_frame = EvaluationFrame( y_test, y_test_predict_proba) test_proba_evaluation_frame.save('%s_predict_proba.p' % self.name) logger.time_log('Testing Complete.\n') evaluator = Evaluator(logger) evaluator.evaluate_classifier_result( results, test_evaluation_frame, train=train_evaluation_frame, test_proba=test_proba_evaluation_frame, multiclass=multiclass) logger.close() self.hyper_parameters.params = results.best_params_ self.hyper_parameters.save('%s_params.p' % self.name) self.trained_estimator = results.best_estimator_