def Neural_Network_Selection(normal_count, anomaly_coount, data_dir): undersample, _ = preprocessing.create_datasets(data_dir, normal_count, anomaly_coount) X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = undersample hidden_layers = [1, 2, 3, 4, 5] hidden_layer_neurons = [50, 100, 200, 300, 500] results_matrix_validation = np.zeros((5, 5)) dense_index = -1 for neurons in hidden_layer_neurons: layer_index = -1 dense_index += 1 for layers in hidden_layers: model = Sequential() model.add(Dense(neurons, input_dim=30, activation='relu')) layer_index += 1 for _layers in range(layers): model.add(Dense(neurons, activation='relu')) model.add(Dense(1, activation='sigmoid')) # Compile model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # Fit the model model.fit(X_train_undersample, y_train_undersample, epochs=200, batch_size=10, verbose=0) acc = (model.evaluate(X_test_undersample, y_test_undersample)[1]) results_matrix_validation[dense_index][layer_index] = acc result = np.where( results_matrix_validation == np.amax(results_matrix_validation)) return (hidden_layer_neurons[result[0][0]], hidden_layers[result[1][0]])
def create_nonlinear_SVC(data_dir, normal_count, anomaly_count): undersample, _ = preprocessing.create_datasets(data_dir, normal_count, anomaly_count) train_x, test_x, train_y, test_y = undersample kernels = ['linear', 'poly', 'rbf', 'sigmoid'] degree = [1, 3, 6, 12] C = [1, 10, 20] recall_val = 0 opt_kernel = None opt_degree = None opt_C = None for kernel in kernels: for deg in degree: for c in C: clf = SVC(kernel=kernel, degree=deg, C=c) clf.fit(train_x, train_y) recall_state = recall_score(test_y, clf.predict(test_x), pos_label=1) if recall_val < recall_state: recall_val = recall_state opt_kernel, opt_degree, opt_C = (kernel, deg, c) #print('Recall Score: {} | kernel {}, degree {}, C {}'.format(recall_state, kernel, deg, c)) return ((kernel, deg, c))
def PCA_(undersample_amount, data_dir, plot): train_x, test_x, train_y, test_y = preprocessing.create_datasets( data_dir, undersample_amount, undersample_amount)[0] pca = PCA(n_components=2) finalDf = plot_PCA(train_x, train_y, pca, plot) pca = PCA(n_components=10) principalComponents = pca.fit_transform(train_x) variance_explained = pca.explained_variance_ratio_ #print('First 10 Principal Components Variance Explained As Follows Respectively: {}'.format(variance_explained)) # first two principal components consistantly (every stratified split sample) explains about 75% and above of the variance # while the first component explains about 65% of the whole thing return (finalDf, test_x, test_y)
import nmf1 import nmf2 from complement import * from preprocessing import create_datasets from metric import evaluate import numpy as np import matplotlib.pyplot as plt if __name__ == '__main__': dataset1, dataset2, dataset3, dataset4, dataset5 = create_datasets() #print(dataset1) print ('************datasets were created!!***************') R_train1 = np.array(dataset1[0]) R_test1 = np.array(dataset1[1]) R_train2 = np.array(dataset2[0]) R_test2 = np.array(dataset2[1]) R_train3 = np.array(dataset3[0]) R_test3 = np.array(dataset3[1]) R_train4 = np.array(dataset4[0]) R_test4 = np.array(dataset4[1]) R_train5 = np.array(dataset5[0]) R_test5 = np.array(dataset5[1]) #print(R_train1,R_test1) K = 19 # number of latent features # initialize P and Q with random values P = np.random.rand(R_train1.shape[0], K) Q = np.random.rand(R_train1.shape[1], K) print(R_test1.shape[0],R_test1.shape[1])
def Logistic_Regression_Selection(sample_times, undersample_amount, data_dir): c_bank = [0.001, 0.01, 0.1, 1, 10] penalties = ['l1', 'l2'] results_matrix_train = np.zeros((5, 3)) results_matrix_validation = np.zeros((5, 3)) results_matrix_large_test = np.zeros((5, 3)) for sample_count in range(sample_times): undersample, test_set = preprocessing.create_datasets( data_dir, undersample_amount, undersample_amount) X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = undersample X_test, y_test = test_set for c in c_bank: for regulizer in penalties: log_reg = LogisticRegression(C=c, penalty=regulizer) log_reg.fit(X_train_undersample, y_train_undersample.values.ravel()) # train set y_pred_undersample = log_reg.predict(X_train_undersample) recall_train = np.round(recall_score( y_train_undersample.values, y_pred_undersample), decimals=4) # validation set y_pred_undersample = log_reg.predict(X_test_undersample) recall_test = np.round(recall_score(y_test_undersample.values, y_pred_undersample), decimals=4) # large test y_pred_undersample = log_reg.predict(X_test) recall_large_test = np.round(recall_score( y_test.values, y_pred_undersample), decimals=4) #print("------------------------------------") #print('Sample Number {}: C-value {}, Regularizer {}, has Training Recall: {}'.format(sample_count, c, regulizer, recall_train)) #print('Sample Number {}: C-value {}, Regularizer {}, has Validation Recall: {}'.format(sample_count, c, regulizer, recall_test)) #print('Sample Number {}: C-value {}, Regularizer {}, has Validation Recall: {}'.format(sample_count, c, regulizer, recall_large_test)) #print("------------------------------------") results_matrix_train[c_bank.index(c)][penalties.index( regulizer)] += recall_train results_matrix_validation[c_bank.index(c)][penalties.index( regulizer)] += recall_test results_matrix_large_test[c_bank.index(c)][penalties.index( regulizer)] += recall_large_test results_matrix_train = results_matrix_train / sample_times results_matrix_validation = results_matrix_validation / sample_times results_matrix_large_test = results_matrix_large_test / sample_times final_c = [] final_reg = [] result = np.where(results_matrix_train == np.amax(results_matrix_train)) #print("------------------------------------") #print("Best Average Training Logistic Regression Recall: {}".format(np.amax(results_matrix_train))) #print("With C value: {}".format(c_bank[result[0][0]])) #print("With Penalty type: {}".format(penalties[result[1][0]])) #print("------------------------------------") final_c.append(c_bank[result[0][0]]) final_reg.append(penalties[result[1][0]]) result = np.where( results_matrix_validation == np.amax(results_matrix_validation)) #print("------------------------------------") #print("Best Average Validation Logistic Regression Recall: {}".format(np.amax(results_matrix_validation))) #print("With C value: {}".format(c_bank[result[0][0]])) #print("With Penalty type: {}".format(penalties[result[1][0]])) #print("------------------------------------") final_c.append(c_bank[result[0][0]]) final_reg.append(penalties[result[1][0]]) result = np.where( results_matrix_large_test == np.amax(results_matrix_large_test)) #print("------------------------------------") #print("Best Average Training Logistic Regression Recall: {}".format(np.amax(results_matrix_large_test))) #print("With C value: {}".format(c_bank[result[0][0]])) #print("With Penalty type: {}".format(penalties[result[1][0]])) #print("------------------------------------") final_c.append(c_bank[result[0][0]]) final_reg.append(penalties[result[1][0]]) final_c = Counter(final_c).most_common(1)[0] final_reg = Counter(final_reg).most_common(1)[0] ##print("Best Overall C value: {}, Best Overall Regularizer: {}".format(final_c[0], final_reg[0])) ##print("------------------------------------") log_reg = LogisticRegression(C=float(final_c[0]), penalty=str(final_reg[0])) log_reg.fit(X_train_undersample, y_train_undersample.values.ravel()) # train set y_pred_undersample = log_reg.predict(X_train_undersample) recall_train = np.round(recall_score(y_train_undersample.values, y_pred_undersample), decimals=4) # validation set y_pred_undersample = log_reg.predict(X_test_undersample) recall_test = np.round(recall_score(y_test_undersample.values, y_pred_undersample), decimals=4) # large test y_pred_undersample = log_reg.predict(X_test) recall_large_test = np.round(recall_score(y_test.values, y_pred_undersample), decimals=4) ##print("------------------------------------") ##print("------------------------------------") ##print("------------------------------------") ##print('Logistic Regression Test Set Recall: {}'.format(recall_large_test)) ##print('Logistic Regression Test Set Confusion Matrix:') ##print(pd.DataFrame(confusion_matrix(y_test.values,y_pred_undersample,labels=[0,1]), index=['true:0', 'true:1'], columns=['pred:0', 'pred:1'])) return (final_c[0], final_reg[0])
import numpy as np from preprocessing import create_datasets, create_generators from extracting import extract_features from model import create_model, get_conv_base from visualizing import display_progress base_dir: str = '/Users/Jan/developer/ML/dogs/dataset' train, test, val = create_datasets('hello') conv_base = get_conv_base() train_features, train_labels = extract_features(conv_base, train, 2000) val_features, val_labels = extract_features(conv_base, val, 1000) test_features, test_labels = extract_features(conv_base, test, 1000) train_features = np.reshape(train_features, (2000, 4 * 4 * 512)) val_features = np.reshape(val_features, (1000, 4 * 4 * 512)) test_features = np.reshape(test_features, (1000, 4 * 4 * 512)) model = create_model() model.summary() p: str = '/Users/Jan/Developer/ML/dogs/dataset' train_generator, val_generator = create_generators(f'{p}/train', f'{p}/val', f'{p}/test') history = model.fit_generator(train_generator, steps_per_epoch=100, epochs=30,
import nmf1 import nmf2 from complement import * from preprocessing import create_datasets from metric import evaluate import numpy as np import matplotlib.pyplot as plt if __name__ == '__main__': dataset1, dataset2, dataset3, dataset4, dataset5 = create_datasets() print '************datasets were created!!***************' R_train1 = np.array(dataset1[0]) R_test1 = np.array(dataset1[1]) R_train2 = np.array(dataset2[0]) R_test2 = np.array(dataset2[1]) R_train3 = np.array(dataset3[0]) R_test3 = np.array(dataset3[1]) R_train4 = np.array(dataset4[0]) R_test4 = np.array(dataset4[1]) R_train5 = np.array(dataset5[0]) R_test5 = np.array(dataset5[1]) K = 19 # number of latent features # initialize P and Q with random values P = np.random.rand(R_train1.shape[0], K) Q = np.random.rand(R_train1.shape[1], K)
iteration = args.iteration settings = f'dim{dim}--layer_hidden{layer_hidden}--layer_output{layer_output}--lr{lr}--lr_decay{lr_decay}--decay_interval{decay_interval}--batch{batch_train}' print(settings) # Slack Message url = 'http://xxx.xxx.xxx' message = {"text": f"GNN train start"} message_ = {"text": f"GNN train end.)"} requests.post(url, data=json.dumps(message)) # Preprocessing Datasets print('Creating datasets from molecular graph.') print( 'Trainingset is splitted and converted into subsets based on K-foldCV') print('Just a moment......') datasets_train, datasets_valid, dataset_test, N_fingerprints, valid_indexes = pp.create_datasets( train_path, test_path, radius, task, device) # Make directiry for Saving Results os.mkdir(f'{args.date}') ### Trainig and Prediction ## for a in range(5): dataset_train = datasets_train[a] dataset_valid = datasets_valid[a] dataset_test = dataset_test N_fingerprints = N_fingerprints print('-' * 100) print('The preprocess has finished!') print('# of training data allocations:', a) print('# of training data samples:', len(dataset_train)) print('# of development data samples:', len(dataset_valid)) print('# of test data samples:', len(dataset_test))