# # best result cosine: threshold = 0.92, F1 = 0.708 (P = 0.709, R = 0.706) # # best result manhattan: threshold = 0.33, F1 = 0.537 (P = 0.474, R = 0.620) # y_pred = y_dists # # # best result: window = 4, threshold = 0.42, F1 = 0.419 (P = 0.339, R = 0.550) # # Better result for very high windows, window = 1000, threshold = 0.43, F1 = 0.493 (P = 0.422, R = 0.591) # # y_pred = slide_window(y_dists) # # # best result: epsilon = 2, threshold = 0.46, F1 = 0.515 (P = 0.468, R = 0.573) # # y_pred = neighbourhood_difference(y_dists) # # plot_thresholds(y_true, y_pred, False, 'binary') # Measurements on test data if __name__ == '__main__': data = config.get_seg_data('test') print("Loading the data") y = np.load(data['y']) y_true = np.load(data['y_true_lm']) T = 0.5 y_pred = compute_distance(y, euclidean_distances) > T P, R, F, S = prfs(y_true, y_pred, average='binary') print('euclidean distance: threshold = %.2f' % T) print_measurements(y_true, y_pred) y_dists = compute_distance(y, cosine_distances) T = 0.92 y_pred = y_dists > T
from segmentation.lstm.lstm_utils import split_to_time_steps from utils import first_option, plot_thresholds if __name__ == '__main__': if first_option( 'Do you want to use the model trained on cosine distances [c] or on raw SVM predictions [r]?', 'c', 'r'): cosine = True model = load_model(config.lstm_model_1) else: cosine = False model = load_model(config.lstm_model_577) time_steps = model.get_config()[0]['config']['batch_input_shape'][1] held_out = config.get_seg_data('held_out') X_held = np.load(held_out['y']) y_held = np.load(held_out['y_true_lm']) if cosine: print("Computing the distances") X_held = compute_distance(X_held, cosine_distances) else: y_held = np.append(0, y_held) X = split_to_time_steps(X_held) y_true = split_to_time_steps(y_held) y_pred = model.predict(X)
def get_next_data(data_names): for name in data_names: yield config.get_seg_data(name)
import numpy as np from sklearn.svm import LinearSVC, SVC import config from segmentation.distance_based_methods import compute_distance from utils import save_pickle, first_option, load_sparse_csr if __name__ == '__main__': data = config.get_seg_data('train') if first_option('Do you want to use linear [l] or RBF [r] kernel?', 'l', 'r'): path = config.classifier_linear classifier = LinearSVC(random_state=0) else: path = config.classifier_rbf classifier = SVC(random_state=0, kernel='rbf') y_true = np.load(data['y_true_lm']) print("Loading x") x = load_sparse_csr(data['x']) print("Computing cosine distance") x_dists = compute_distance(x) print("Classifier training") classifier.fit(x_dists, y_true) print("Saving th classifier to: " + path) save_pickle(path, classifier)
# Number of vectors in one sequence, input data structure [samples, time_steps, features] time_steps = 200 if Path(config.lstm_model_1).is_file() and first_option( 'Do you want to continue training the saved model?', 'y', 'n'): print("Loading new model") model = load_model(config.lstm_model_1) else: print("Building new model") model = build_model(time_steps, 1) # Create dir for histories create_dir(config.hist_dir) print("Loading the data") train = config.get_seg_data('train') held_out = config.get_seg_data('held_out') X_train_or = np.load(train['y']) y_train_or = np.load(train['y_true_lm']) X_held_out = np.load(held_out['y']) y_held_out = np.load(held_out['y_true_lm']) print("Computing the distances on test data") X_held_out = compute_distance(X_held_out, cosine_distances) # Split the 2D matrix to 3D matrix of dimensions [samples, time_steps, features] X_held_out = split_to_time_steps(X_held_out) # Split the 1D vector to 2D matrix of dimensions: [samples, time_steps]