def main(): global complete_frame global train_frame global validate_frame global test_array global train_array global test_array global validation_array global input_dim global inter_dim complete_data = "/home/wso2123/My Work/Datasets/Breast cancer wisconsin/data.csv" train_data = "/home/wso2123/My Work/Datasets/Breast cancer wisconsin/uncorrected_train.csv" validate_data = "/home/wso2123/My Work/Datasets/Breast cancer wisconsin/validate.csv" test_data = "/home/wso2123/My Work/Datasets/Breast cancer wisconsin/test.csv" # load the CSV file as a numpy matrix complete_frame = pd.read_csv(complete_data) train_frame = pd.read_csv(train_data) validate_frame = pd.read_csv(validate_data) test_frame = pd.read_csv(test_data) train_frame = pd.get_dummies(train_frame) train_frame = train_frame.drop('diagnosis_M', axis=1) feature_list = list(train_frame.columns) print feature_list mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) train_array = mapper.fit_transform(train_frame) print train_array test_frame = pd.get_dummies(test_frame) test_frame = test_frame.drop('diagnosis_M', axis=1) feature_list = list(test_frame.columns) print feature_list mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) test_array = mapper.fit_transform(test_frame) validate_frame = pd.get_dummies(validate_frame) feature_list = list(validate_frame.columns) print feature_list mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) validation_array = mapper.fit_transform(validate_frame) # batch_size_tune() # learning_rate_tune(100,10) # optimizer_tune(100,10) # activation_tune(100, 10) # init_mode_tune(100,10) # dropout_tune(100,10) hidden_depth_tune(100, 10)
def main(): global complete_frame global train_frame global validate_frame global test_frame global train_array global test_array global validation_array train_data = "/home/wso2123/My Work/Datasets/Webscope/A3Benchmark/uncorrected_train.csv" validate_data = "/home/wso2123/My Work/Datasets/Webscope/A3Benchmark/validate.csv" test_data = "/home/wso2123/My Work/Datasets/Webscope/A3Benchmark/test.csv" one_class_data = "/home/wso2123/My Work/Datasets/Webscope/A3Benchmark/train.csv" # load the CSV file as a numpy matrix # complete_frame = pd.read_csv(complete_data) train_frame = pd.read_csv(train_data) validate_frame = pd.read_csv(validate_data) test_frame = pd.read_csv(test_data) train_frame = pd.get_dummies(train_frame) feature_list = list(train_frame.columns) print feature_list, len(feature_list) mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) train_array = mapper.fit_transform(train_frame) # train_array = train_frame.as_matrix() test_frame = pd.get_dummies(test_frame) feature_list = list(test_frame.columns) print feature_list, len(feature_list) mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) test_array = mapper.fit_transform(test_frame) # test_array = test_frame.as_matrix() validate_frame = pd.get_dummies(validate_frame) feature_list = list(validate_frame.columns) print feature_list, len(feature_list) mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) validation_array = mapper.fit_transform(validate_frame) # validation_array = validate_frame.as_matrix() print "Training set (n_col, n_rows)", train_array.shape print "Testing set (n_col, n_rows)", test_array.shape print "Validation set (n_col, n_rows)", validation_array.shape li = [12, 15, 20, 25, 30] for i in range(1, 4): print i, "---------------" model_build(i)
def main(): global complete_frame global train_frame global validate_frame global test_array global train_array global test_array global validation_array complete_data = "/home/wso2123/My Work/Datasets/Ionosphere/ionosphere.csv" train_data = "/home/wso2123/My Work/Datasets/Ionosphere/uncorrected_train.csv" validate_data = "/home/wso2123/My Work/Datasets/Ionosphere/validate.csv" test_data = "/home/wso2123/My Work/Datasets/Ionosphere/test.csv" # load the CSV file as a numpy matrix complete_frame = pd.read_csv(complete_data) train_frame = pd.read_csv(train_data) validate_frame = pd.read_csv(validate_data) test_frame = pd.read_csv(test_data) train_frame = pd.get_dummies(train_frame) train_frame = train_frame.drop('C35_b', axis=1) feature_list = list(train_frame.columns) print feature_list mapper = DataFrameMapper([(feature_list, [preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer()])]) train_array = mapper.fit_transform(train_frame) test_frame = pd.get_dummies(test_frame) test_frame = test_frame.drop('C35_b', axis=1) feature_list = list(test_frame.columns) print feature_list mapper = DataFrameMapper([(feature_list, [preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer()])]) test_array = mapper.fit_transform(test_frame) validate_frame = pd.get_dummies(validate_frame) feature_list = list(validate_frame.columns) print feature_list mapper = DataFrameMapper([(feature_list, [preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer()])]) validation_array = mapper.fit_transform(validate_frame) print "Training set (n_col, n_rows)", train_array.shape print "Testing set (n_col, n_rows)", test_array.shape print "Validation set (n_col, n_rows)", validation_array.shape max_recall = 0 dep = 0 for i in range(1, 10): print i, "---------------" new_recall = model_build(15) if new_recall > max_recall: dep = i max_recall = new_recall print dep, max_recall
def main(): global complete_frame global train_frame global validate_frame global test_array global train_array global test_array complete_data = "/home/wso2123/My Work/Datasets/KDD Cup/train.csv" train_data = "/home/wso2123/My Work/Datasets/KDD Cup/uncorrected_train.csv" validate_data = "/home/wso2123/My Work/Datasets/KDD Cup/validate.csv" test_data = "/home/wso2123/My Work/Datasets/KDD Cup/test.csv" # load the CSV file as a pandas data frame complete_frame = pd.read_csv(complete_data) train_frame = pd.read_csv(train_data) validate_frame = pd.read_csv(validate_data) test_frame = pd.read_csv(test_data) train_frame = pd.get_dummies(train_frame) feature_list = list(train_frame.columns) mapper = DataFrameMapper([(feature_list[-20:], preprocessing.OneHotEncoder()), (feature_list[:-20], preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0))]) d = mapper.fit_transform(train_frame) test_frame = pd.get_dummies(test_frame) feature_list = list(test_frame.columns) mapper = DataFrameMapper([(feature_list[-20:], preprocessing.OneHotEncoder()), (feature_list[:-20], preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0))]) d1 = mapper.fit_transform(test_frame) print test_frame print train_frame recall = 1 index = 1 for i in tqdm(range(1, 40)): print i, "---------------" new_recal = model_build(12) if new_recal > recall: recall = new_recal index = i print "Max recall: ", recall, " at index: ", index
def main(): global complete_frame global train_frame global validate_frame global test_array global train_array global test_array global validation_array complete_data = "/home/wso2123/My Work/Datasets/Breast cancer wisconsin/data.csv" train_data = "/home/wso2123/My Work/Datasets/Breast cancer wisconsin/uncorrected_train.csv" validate_data = "/home/wso2123/My Work/Datasets/Breast cancer wisconsin/validate.csv" test_data = "/home/wso2123/My Work/Datasets/Breast cancer wisconsin/test.csv" # load the CSV file as a numpy matrix complete_frame = pd.read_csv(complete_data) train_frame = pd.read_csv(train_data) validate_frame = pd.read_csv(validate_data) test_frame = pd.read_csv(test_data) train_frame = pd.get_dummies(train_frame) train_frame = train_frame.drop('diagnosis_M', axis=1) feature_list = list(train_frame.columns) print feature_list mapper = DataFrameMapper([(feature_list, [preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer()])]) train_array = mapper.fit_transform(train_frame) test_frame = pd.get_dummies(test_frame) test_frame = test_frame.drop('diagnosis_M', axis=1) feature_list = list(test_frame.columns) print feature_list mapper = DataFrameMapper([(feature_list, [preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer()])]) test_array = mapper.fit_transform(test_frame) validate_frame = pd.get_dummies(validate_frame) feature_list = list(validate_frame.columns) print feature_list mapper = DataFrameMapper([(feature_list, [preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer()])]) validation_array = mapper.fit_transform(validate_frame) for i in range(1): print i, "---------------" model_build(12)
def main(): global complete_frame global train_frame global validate_frame global test_array global train_array global test_array complete_data = "/home/wso2123/My Work/Datasets/Ionosphere/train.csv" train_data = "/home/wso2123/My Work/Datasets/Ionosphere/uncorrected_train.csv" validate_data = "/home/wso2123/My Work/Datasets/Ionosphere/validate.csv" test_data = "/home/wso2123/My Work/Datasets/Ionosphere/test.csv" # load the CSV file as a pandas data frame complete_frame = pd.read_csv(complete_data) train_frame = pd.read_csv(train_data) validate_frame = pd.read_csv(validate_data) test_frame = pd.read_csv(test_data) train_frame = pd.get_dummies(train_frame) feature_list = list(train_frame.columns) mapper = DataFrameMapper([(feature_list[-2:], preprocessing.OneHotEncoder()), (feature_list[:-2], preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0))]) d = mapper.fit_transform(train_frame) test_frame = pd.get_dummies(test_frame) feature_list = list(test_frame.columns) mapper = DataFrameMapper([(feature_list[-2:], preprocessing.OneHotEncoder()), (feature_list[:-2], preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0))]) d1 = mapper.fit_transform(test_frame) for i in range(1, 32): print i, "---------------" model_build(i)
def main(): global complete_frame global train_frame global validate_frame global test_array global train_array global test_array global validation_array global batch_size global original_dim global latent_dim global intermediate_dim global nb_epoch global epsilon_std batch_size = 100 original_dim = 118 latent_dim = 3 intermediate_dim = 25 nb_epoch = 10 epsilon_std = 1.0 complete_data = "/home/wso2123/My Work/Datasets/KDD Cup/kddcup.data_10_percent_corrected" train_data = "/home/wso2123/My Work/Datasets/KDD Cup/uncorrected_train.csv" validate_data = "/home/wso2123/My Work/Datasets/KDD Cup/train.csv" test_data = "/home/wso2123/My Work/Datasets/KDD Cup/test.csv" # load the CSV file as a numpy matrix complete_frame = pd.read_csv(complete_data) train_frame = pd.read_csv(train_data) validate_frame = pd.read_csv(validate_data) test_frame = pd.read_csv(test_data) lbl_list_train = [ 'C42_back.', 'C42_buffer_overflow.', 'C42_ftp_write.', 'C42_guess_passwd.', 'C42_imap.', 'C42_ipsweep.', 'C42_land.', 'C42_loadmodule.', 'C42_multihop.', 'C42_neptune.', 'C42_nmap.', 'C42_perl.', 'C42_phf.', 'C42_pod.', 'C42_portsweep.', 'C42_rootkit.', 'C42_satan.', 'C42_smurf.', 'C42_teardrop.', 'C42_warezclient.', 'C42_warezmaster.' ] lbl_list_test = [ 'C42_back.', 'C42_buffer_overflow.', 'C42_ftp_write.', 'C42_guess_passwd.', 'C42_imap.', 'C42_ipsweep.', 'C42_land.', 'C42_loadmodule.', 'C42_multihop.', 'C42_neptune.', 'C42_nmap.', 'C42_perl.', 'C42_pod.', 'C42_portsweep.', 'C42_rootkit.', 'C42_satan.', 'C42_smurf.', 'C42_spy.', 'C42_teardrop.', 'C42_warezclient.', 'C42_warezmaster.' ] train_frame = pd.get_dummies(train_frame) train_frame = train_frame.drop(lbl_list_train, axis=1) feature_list = list(train_frame.columns) print feature_list, len(feature_list) mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) train_array = mapper.fit_transform(train_frame) test_frame = pd.get_dummies(test_frame) test_frame = test_frame.drop(lbl_list_test, axis=1) feature_list = list(test_frame.columns) print feature_list, len(feature_list) mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) test_array = mapper.fit_transform(test_frame) # validate_frame = pd.get_dummies(validate_frame) # feature_list = list(validate_frame.columns) # print feature_list, len(feature_list) # mapper = DataFrameMapper([(feature_list, [preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), # preprocessing.Normalizer()])]) # validation_array = mapper.fit_transform(validate_frame) validation_array = train_array[0:103744] print "Training set (n_col, n_rows)", train_array.shape print "Testing set (n_col, n_rows)", test_array.shape print "Validation set (n_col, n_rows)", validation_array.shape for i in range(1, 10): print i, "---------------" model_build(i)
def main(): global complete_frame global train_frame global validate_frame global test_array global train_array global test_array global validation_array complete_data = "/home/wso2123/My Work/Datasets/KDD Cup/kddcup.data_10_percent_corrected" train_data = "/home/wso2123/My Work/Datasets/KDD Cup/uncorrected_train.csv" validate_data = "/home/wso2123/My Work/Datasets/KDD Cup/train.csv" test_data = "/home/wso2123/My Work/Datasets/KDD Cup/test.csv" # load the CSV file as a numpy matrix complete_frame = pd.read_csv(complete_data) train_frame = pd.read_csv(train_data) validate_frame = pd.read_csv(validate_data) test_frame = pd.read_csv(test_data) lbl_list_train = [ 'C42_back.', 'C42_buffer_overflow.', 'C42_ftp_write.', 'C42_guess_passwd.', 'C42_imap.', 'C42_ipsweep.', 'C42_land.', 'C42_loadmodule.', 'C42_multihop.', 'C42_neptune.', 'C42_nmap.', 'C42_perl.', 'C42_phf.', 'C42_pod.', 'C42_portsweep.', 'C42_rootkit.', 'C42_satan.', 'C42_smurf.', 'C42_teardrop.', 'C42_warezclient.', 'C42_warezmaster.' ] lbl_list_test = [ 'C42_back.', 'C42_buffer_overflow.', 'C42_ftp_write.', 'C42_guess_passwd.', 'C42_imap.', 'C42_ipsweep.', 'C42_land.', 'C42_loadmodule.', 'C42_multihop.', 'C42_neptune.', 'C42_nmap.', 'C42_perl.', 'C42_pod.', 'C42_portsweep.', 'C42_rootkit.', 'C42_satan.', 'C42_smurf.', 'C42_spy.', 'C42_teardrop.', 'C42_warezclient.', 'C42_warezmaster.' ] train_frame = pd.get_dummies(train_frame) train_frame = train_frame.drop(lbl_list_train, axis=1) feature_list = list(train_frame.columns) print feature_list, len(feature_list) mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) train_array = mapper.fit_transform(train_frame) test_frame = pd.get_dummies(test_frame) test_frame = test_frame.drop(lbl_list_test, axis=1) feature_list = list(test_frame.columns) print feature_list, len(feature_list) mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) test_array = mapper.fit_transform(test_frame) # validate_frame = pd.get_dummies(validate_frame) # feature_list = list(validate_frame.columns) # print feature_list, len(feature_list) # mapper = DataFrameMapper([(feature_list, [preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), # preprocessing.Normalizer()])]) # validation_array = mapper.fit_transform(validate_frame) validation_array = train_array[0:103744] train_array = np.reshape(train_array, (len(train_array), 1, 118)) test_array = np.reshape(test_array, (len(test_array), 1, 118)) # x_test = np.reshape(x_test, (len(x_test), 1, 28, 28)) print "Training set (n_col, n_rows)", train_array.shape print "Testing set (n_col, n_rows)", test_array.shape print "Validation set (n_col, n_rows)", validation_array.shape print train_array[0] li = [25, 40, 55, 75, 80] for i in range(1): print i, "---------------" model_build(li[i - 1])
def main(): global complete_frame global train_frame global validate_frame global test_array global train_array global test_array global validation_array global batch_size global original_dim global latent_dim global intermediate_dim global nb_epoch global epsilon_std batch_size = 199 original_dim = 35 latent_dim = 3 intermediate_dim = 15 nb_epoch = 100 epsilon_std = 1.0 complete_data = "/home/wso2123/My Work/Datasets/Ionosphere/ionosphere.csv" train_data = "/home/wso2123/My Work/Datasets/Ionosphere/uncorrected_train.csv" validate_data = "/home/wso2123/My Work/Datasets/Ionosphere/validate.csv" test_data = "/home/wso2123/My Work/Datasets/Ionosphere/test.csv" # load the CSV file as a numpy matrix complete_frame = pd.read_csv(complete_data) train_frame = pd.read_csv(train_data) validate_frame = pd.read_csv(validate_data) test_frame = pd.read_csv(test_data) train_frame = pd.get_dummies(train_frame) train_frame = train_frame.drop('C35_b', axis=1) feature_list = list(train_frame.columns) print feature_list mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) train_array = mapper.fit_transform(train_frame) test_frame = pd.get_dummies(test_frame) test_frame = test_frame.drop('C35_b', axis=1) feature_list = list(test_frame.columns) print feature_list mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) test_array = mapper.fit_transform(test_frame) validate_frame = pd.get_dummies(validate_frame) feature_list = list(validate_frame.columns) print feature_list mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) validation_array = mapper.fit_transform(validate_frame) print "Training set (n_col, n_rows)", train_array.shape print "Testing set (n_col, n_rows)", test_array.shape print "Validation set (n_col, n_rows)", validation_array.shape for i in range(1, 10): print i, "---------------" model_build(i)
def main(): global complete_frame global train_frame global validate_frame global test_frame global train_array global test_array global validation_array global batch_size global input_dim global latent_dim global timesteps global nb_epoch global epsilon_std batch_size = 100 input_dim = 31 latent_dim = 12 timesteps = 5 nb_epoch = 10 epsilon_std = 1.0 # complete_data = "/home/wso2123/My Work/Datasets/KDD Cup/kddcup.data_10_percent_corrected" train_data = "/home/wso2123/My Work/Datasets/Creditcard/train.csv" validate_data = "/home/wso2123/My Work/Datasets/Creditcard/validate.csv" test_data = "/home/wso2123/My Work/Datasets/Creditcard/test.csv" # load the CSV file as a numpy matrix # complete_frame = pd.read_csv(complete_data) train_frame = pd.read_csv(train_data) validate_frame = pd.read_csv(validate_data) test_frame = pd.read_csv(test_data) train_frame = pd.get_dummies(train_frame) # train_frame = train_frame.drop(lbl_list_train, axis=1) feature_list = list(train_frame.columns) print feature_list, len(feature_list) mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) train_array = mapper.fit_transform(train_frame) test_frame = pd.get_dummies(test_frame) # test_frame = test_frame.drop(lbl_list_test, axis=1) feature_list = list(test_frame.columns) print feature_list, len(feature_list) mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) test_array = mapper.fit_transform(test_frame) test_array = test_array[0:85440] validate_frame = pd.get_dummies(validate_frame) feature_list = list(validate_frame.columns) print feature_list, len(feature_list) mapper = DataFrameMapper([(feature_list, [ preprocessing.Imputer(missing_values='NaN', strategy='mean', axis=0), preprocessing.Normalizer() ])]) validation_array = mapper.fit_transform(validate_frame) train_array = np.reshape(train_array, (len(train_array) / 5, 5, input_dim)) test_array = np.reshape(test_array, (len(test_array) / 5, 5, input_dim)) validation_array = np.reshape(validation_array, (len(validation_array) / 5, 5, input_dim)) print "Training set (n_col, n_rows)", train_array.shape print "Testing set (n_col, n_rows)", test_array.shape print "Validation set (n_col, n_rows)", validation_array.shape for i in range(1): print i, "---------------" model_build(i)