def test_all_detections_class(fn): folder_name = 'test' test_detections = preprocessing.all_detections(folder_name) view_name = fn.split('/')[-1].split('.feat')[0] with open(fn) as f: lines = f.readlines() test_detections.update(lines, view_name) labels = test_detections.get_all_tp_fp() print(' ')
def test_save_to_txt_class(fn): out_dir = '//lyta/tomodev/DeepLearning/Playground_stage3/output_pick1mark/score_txt/' folder_name = 'test' test_detections = preprocessing.all_detections(folder_name) view_name = fn.split('/')[-1].split('_feat.txt')[0] with open(fn) as f: lines = f.readlines() test_detections.update(lines, view_name) labels = test_detections.get_all_tp_fp() postprocessing.pick_one_mark_one_group(test_detections) postprocessing.save_to_txt(test_detections, out_dir)
def run_all_test_weighted_logit_save_to_txt(): out_dir = '//lyta/tomodev/DeepLearning/Playground_stage3/output_pick1mark/score_txt/weighted_logit/' for fn in glob.glob('C:/experiments/temp_feature_vector/test/*.txt'): # hard code to make the program run through folder_name = 'test' test_detections = preprocessing.all_detections(folder_name) view_name = fn.split('\\')[-1].split('_feat.txt')[0] with open(fn) as f: lines = f.readlines() test_detections.update(lines, view_name) postprocessing.pick_one_mark_one_group_use_logit(test_detections) postprocessing.save_to_txt(test_detections, out_dir)
def run(): # get training set os.chdir(test_dir) folder_name = train_dir.split('/')[-2] # initiate train detections train_detections = preprocessing.all_detections(folder_name) for file in glob.glob('*.txt'): with open(file) as f: lines = f.readlines() view_name = file.split('_feat.txt')[0] # adding detections from one view to this instance train_detections.update(lines, view_name) # get testing set os.chdir(test_dir) folder_name = test_dir.split('/')[-2] # initiate train detections test_detections = preprocessing.all_detections(folder_name) for file in glob.glob('*.txt'): with open(file) as f: lines = f.readlines() view_name = file.split('_feat.txt')[0] # adding detections from one view to this instance test_detections.update(lines, view_name) # get train_x and train_y train_y = np.array([ train_detections.groups[i].group_tp_fp for i in range(train_detections.get_total_group_count()) ]) train_x = lstm_processing.form_lstm_features_v3(train_detections) train_x = lstm_processing.repeat_and_pad(train_x, maxlen) train_x = sequence.pad_sequences(train_x, maxlen=maxlen, dtype='float') train_x = np.array(train_x) del train_detections # get test_x and test_y test_y = np.array([ test_detections.groups[i].group_tp_fp for i in range(test_detections.get_total_group_count()) ]) test_x = lstm_processing.form_lstm_features_v3(test_detections) test_x = lstm_processing.repeat_and_pad(test_x, maxlen) test_x = sequence.pad_sequences(test_x, maxlen=maxlen, dtype='float') test_x = np.array(test_x) del test_detections model = Sequential() model.add(Bidirectional(LSTM(128), input_shape=(maxlen, 4))) model.add(Dropout(0.5)) # model.add(Bidirectional(LSTM(512, return_sequences=True))) # model.add(Dropout(0.5)) # model.add(Bidirectional(LSTM(128))) # model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) # try using different optimizers and different optimizer configs model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) print('Train...') model.fit(train_x, train_y, batch_size=batch_size, epochs=20, validation_data=[test_x, test_y]) # predictions on train set predictions_train = model.predict(train_x) loss_train = model.evaluate(train_x, train_y, verbose=1) print('This is the training set result: \n') # Compute AUC fpr, tpr, thresholds = roc_curve(train_y, predictions_train, pos_label=1) az_train = auc(fpr, tpr) print('AUC for train set is ' + str(az_train)) # predictions on test set predictions_test = model.predict(test_x) loss_test = model.evaluate(test_x, test_y, verbose=1) print('This is the training set result: \n') # Compute AUC fpr, tpr, thresholds = roc_curve(test_y, predictions_test, pos_label=1) az_test = auc(fpr, tpr) print('AUC for testing set is ' + str(az_test)) # # record the predictions into the class # train_detections.record_group_lstm_scores(predictions_train[:,0]) # test_detections.record_group_lstm_scores(predictions_test[:,0]) # save model model.save_weights(out_dir + 'model.h5') # Now test on each view and dump txt for scoring for fn in glob.glob('C:/experiments/temp_feature_vector/test/*.txt'): folder_name = 'test' test_detections_one_view = preprocessing.all_detections(folder_name) view_name = fn.split('\\')[-1].split('_feat.txt')[0] with open(fn) as f: lines = f.readlines() test_detections_one_view.update(lines, view_name) # Get the feature values test_x = lstm_processing.form_lstm_features_v3( test_detections_one_view) test_x = lstm_processing.repeat_and_pad(test_x, maxlen) test_x = sequence.pad_sequences(test_x, maxlen=maxlen, dtype='float') test_x = np.array(test_x) # predict predictions_test_one_view = model.predict(test_x) # record the predictions test_detections_one_view.record_group_lstm_scores( predictions_test_one_view[:, 0]) lstm_processing.save_to_txt_no_pick_one_mark(test_detections_one_view, out_dir)
def run(): # get training set os.chdir(train_dir) folder_name = train_dir.split('/')[-2] # initiate train detections train_detections = preprocessing.all_detections(folder_name) for file in glob.glob('*.txt')[0:300]: with open(file) as f: lines = f.readlines() view_name = file.split('_feat.txt')[0] # adding detections from one view to this instance train_detections.update(lines, view_name) # get testing set os.chdir(test_dir) folder_name = test_dir.split('/')[-2] # initiate train detections test_detections = preprocessing.all_detections(folder_name) for file in glob.glob('*.txt')[0:1]: with open(file) as f: lines = f.readlines() view_name = file.split('_feat.txt')[0] # adding detections from one view to this instance test_detections.update(lines, view_name) # get train_x and train_y train_y = np.array(train_detections.get_all_tp_fp()) train_x = np.array(train_detections.get_all_dl_feat_1536()) del train_detections tsne = TSNE(n_components=n_components, init='random', random_state=0, perplexity=perplexity) train_x = tsne.fit_transform(train_x) # get test_x and test_y test_y = np.array(test_detections.get_all_tp_fp()) test_x = np.array(test_detections.get_all_dl_feat_1536()) del test_detections test_x = tsne.fit_transform(test_x) # scale the features scaler = StandardScaler() scaler.fit(train_x) train_x = scaler.transform(train_x) test_x = scaler.transform(test_x) # start the training mlp = MLPClassifier(hidden_layer_sizes=(30, 30, 30)) mlp.fit(train_x, train_y) # predictions on train set predictions_train = mlp.predict(train_x) prob_train = mlp.predict_proba(train_x)[:, 1] print('This is the training set result: \n') print(confusion_matrix(train_y, predictions_train)) print(classification_report(train_y, predictions_train)) # Compute AUC fpr, tpr, thresholds = roc_curve(train_y, prob_train, pos_label=1) az_train = auc(fpr, tpr) print('AUC for train set is ' + str(az_train)) # predictions on test set predictions_test = mlp.predict(test_x) prob_test = mlp.predict_proba(test_x)[:, 1] print('This is the testing set result: \n') print(confusion_matrix(test_y, predictions_test)) print(classification_report(test_y, predictions_test)) # Compute AUC fpr, tpr, thresholds = roc_curve(test_y, prob_test, pos_label=1) az_test = auc(fpr, tpr) print('AUC for testing set is ' + str(az_test)) # record the predictions into the class # train_detections.record_scores(prob_train) # test_detections.record_scores(prob_test) # Now test on each view and dump txt for scoring for fn in glob.glob('C:/experiments/temp_feature_vector/test/*.txt'): folder_name = 'test' test_detections_one_view = preprocessing.all_detections(folder_name) view_name = fn.split('\\')[-1].split('_feat.txt')[0] with open(fn) as f: lines = f.readlines() test_detections_one_view.update(lines, view_name) # Get the feature values test_x = np.array(test_detections_one_view.get_all_dl_feat_1536()) test_x = tsne.fit_transform(test_x) test_x = scaler.transform(test_x) predictions_test_one_view = mlp.predict_proba(test_x)[:, 1] # record the predictions test_detections_one_view.record_scores(predictions_test_one_view) postprocessing.pick_one_mark_one_group_use_prediction( test_detections_one_view) postprocessing.save_to_txt(test_detections_one_view, out_dir)
def run(): # get training set os.chdir(train_dir) folder_name = train_dir.split('/')[-2] # initiate train detections train_detections = preprocessing.all_detections(folder_name) for file in glob.glob('*.txt')[0:10]: with open(file) as f: lines = f.readlines() view_name = file.split('_feat.txt')[0] # adding detections from one view to this instance train_detections.update(lines, view_name) # get testing set os.chdir(test_dir) folder_name = test_dir.split('/')[-2] # initiate train detections test_detections = preprocessing.all_detections(folder_name) for file in glob.glob('*.txt')[0:1]: with open(file) as f: lines = f.readlines() view_name = file.split('_feat.txt')[0] # adding detections from one view to this instance test_detections.update(lines, view_name) # get train_x and train_y y = np.array(train_detections.get_all_tp_fp()) X = np.array(train_detections.get_all_dl_feat_1536()) del train_detections n_components = 2 (fig, subplots) = plt.subplots(3, 5, figsize=(15, 8)) perplexities = [5, 30, 50, 100, 200] n_iters = [500, 1000, 5000] red = y == 0 green = y == 1 ax = subplots[0][0] ax.scatter(X[red, 0], X[red, 1], c="r") ax.scatter(X[green, 0], X[green, 1], c="g") ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') for i, n_iter in enumerate(n_iters): for j, perplexity in enumerate(perplexities): ax = subplots[i][j] t0 = time() tsne = manifold.TSNE(n_components=n_components, init='random', random_state=0, perplexity=perplexity, n_iter=n_iter) Y = tsne.fit_transform(X) t1 = time() print("perplexity=%d in %.2g sec" % (perplexity, t1 - t0)) ax.set_title("Perplexity=%d, iteration =%d" % (perplexity, n_iter)) ax.scatter(Y[red, 0], Y[red, 1], c="r") ax.scatter(Y[green, 0], Y[green, 1], c="g") ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) ax.axis('tight') plt.show() print(' ')
def run(): # get training set os.chdir(train_dir) folder_name = train_dir.split('/')[-2] # initiate train detections train_detections = preprocessing.all_detections(folder_name) for file in glob.glob('*.feat'): with open(file) as f: lines = f.readlines() view_name = file.split('.feat')[0] # adding detections from one view to this instance train_detections.update(lines, view_name)#, update_tp_group_only = True) # get testing set os.chdir(test_dir) folder_name = test_dir.split('/')[-2] # initiate train detections test_detections = preprocessing.all_detections(folder_name) for file in glob.glob('*.feat'): with open(file) as f: lines = f.readlines() view_name = file.split('.feat')[0] # adding detections from one view to this instance test_detections.update(lines, view_name) # get train_x and train_y train_y = np.array(train_detections.get_all_tp_fp()) train_x = np.array([train_detections.get_all_od(), train_detections.get_all_dl(), train_detections.get_all_logit(), train_detections.get_all_area()]).transpose() # get test_x and test_y test_y = np.array(test_detections.get_all_tp_fp()) test_x = np.array([test_detections.get_all_od(), test_detections.get_all_dl(), test_detections.get_all_logit(), test_detections.get_all_area()]).transpose() # scale the features scaler = StandardScaler() scaler.fit(train_x) train_x = scaler.transform(train_x) test_x = scaler.transform(test_x) # start the training mlp = MLPClassifier(hidden_layer_sizes=(8,4)) mlp.fit(train_x, train_y) # save the model joblib.dump(mlp, out_dir + 'pick_one_mark_nn.pkl') joblib.dump(scaler, out_dir + 'pick_one_mark_scaler.pkl') # predictions on train set predictions_train = mlp.predict(train_x) prob_train = mlp.predict_proba(train_x)[:,1] print('This is the training set result: \n') print(confusion_matrix(train_y, predictions_train)) print(classification_report(train_y, predictions_train)) # Compute AUC fpr, tpr, thresholds = roc_curve(train_y, prob_train, pos_label=1) az_train = auc(fpr, tpr) print('AUC for train set is ' + str(az_train)) # predictions on test set predictions_test = mlp.predict(test_x) prob_test = mlp.predict_proba(test_x)[:,1] print('This is the testing set result: \n') print(confusion_matrix(test_y, predictions_test)) print(classification_report(test_y, predictions_test)) # Compute AUC fpr, tpr, thresholds = roc_curve(test_y, prob_test, pos_label=1) az_test = auc(fpr, tpr) print('AUC for testing set is ' + str(az_test)) # record the predictions into the class train_detections.record_scores(prob_train) test_detections.record_scores(prob_test) # Now test on each view and dump txt for scoring for fn in glob.glob(test_dir + '*.feat'): folder_name = 'test' view_name = fn.split('\\')[-1].split('.feat')[0] test_detections_one_view = preprocessing.all_detections_one_view(folder_name, view_name) with open(fn) as f: lines = f.readlines() test_detections_one_view.update(lines, view_name) # Get the feature values test_x = np.array([test_detections_one_view.get_all_od(), test_detections_one_view.get_all_dl(), test_detections_one_view.get_all_logit()]).transpose() test_x = scaler.transform(test_x) predictions_test_one_view = mlp.predict_proba(test_x)[:,1] # record the predictions test_detections_one_view.record_scores(predictions_test_one_view) postprocessing.pick_one_mark_one_group_use_prediction(test_detections_one_view) postprocessing.save_to_txt(test_detections_one_view, out_dir, view_name)