def plot_features(subject, data_path, model_path, test_labels, dataset='test'): with open(model_path + '/' + subject + '.pickle', 'rb') as f: state_dict = cPickle.load(f) cnn = ConvNet(state_dict['params']) cnn.set_weights(state_dict['weights']) scalers = state_dict['scalers'] if dataset == 'test': d = load_test_data(data_path, subject) x = d['x'] y = test_labels['preictal'] elif dataset == 'train': d = load_train_data(data_path, subject) x, y = d['x'], d['y'] else: raise ValueError('dataset') x, _ = scale_across_time(x, x_test=None, scalers=scalers) if state_dict['params']['scale_time'] \ else scale_across_features(x, x_test=None, scalers=scalers) cnn.batch_size.set_value(x.shape[0]) get_features = theano.function([cnn.x, Param(cnn.training_mode, default=0)], cnn.feature_extractor.output, allow_input_downcast=True) logits_test = get_features(x) model = TSNE(n_components=2, random_state=0) z = model.fit_transform(np.float64(logits_test)) plt.scatter(z[:, 0], z[:, 1], s=60, c=y) plt.show()
def plot_train_probs(subject, data_path, model_path): with open(model_path + '/' + subject + '.pickle', 'rb') as f: state_dict = pickle.load(f) cnn = ConvNet(state_dict['params']) cnn.set_weights(state_dict['weights']) scalers = state_dict['scalers'] d = load_train_data(data_path, subject) x, y = d['x'], d['y'] x, _ = scale_across_time(x, x_test=None, scalers=scalers) if state_dict['params']['scale_time'] \ else scale_across_features(x, x_test=None, scalers=scalers) cnn.batch_size.set_value(x.shape[0]) probs = cnn.get_test_proba(x) fpr, tpr, threshold = roc_curve(y, probs) c = np.sqrt((1 - tpr)**2 + fpr**2) opt_threshold = threshold[np.where(c == np.min(c))[0]] print(opt_threshold) x_coords = np.zeros(len(y), dtype='float64') rng = np.random.RandomState(42) x_coords += rng.normal(0.0, 0.08, size=len(x_coords)) plt.scatter(x_coords, probs, c=y, s=60) plt.title(subject) plt.show()
def predict(subject, data_path, model_path, submission_path): patient_filenames = [filename for filename in os.listdir(model_path) if subject in filename and filename.endswith('.pickle')] for filename in patient_filenames: print filename d = load_test_data(data_path, subject) x, id = d['x'], d['id'] with open(model_path + '/' + filename, 'rb') as f: state_dict = cPickle.load(f) scalers = state_dict['scalers'] x, _ = scale_across_time(x, x_test=None, scalers=scalers) if state_dict['params']['scale_time'] \ else scale_across_features(x, x_test=None, scalers=scalers) cnn = ConvNet(state_dict['params']) cnn.set_weights(state_dict['weights']) test_proba = cnn.get_test_proba(x) ans = zip(id, test_proba) df = DataFrame(data=ans, columns=['clip', 'preictal']) csv_name = '.'.join(filename.split('.')[:-1]) if '.' in filename else filename df.to_csv(submission_path + '/' + csv_name + '.csv', index=False, header=True)
def predict(subject, data_path, model_path, submission_path): patient_filenames = [ filename for filename in os.listdir(model_path) if subject in filename and filename.endswith('.pickle') ] for filename in patient_filenames: print(filename) d = load_test_data(data_path, subject) x, id = d['x'], d['id'] with open(model_path + '/' + filename, 'rb') as f: state_dict = pickle.load(f) scalers = state_dict['scalers'] x, _ = scale_across_time(x, x_test=None, scalers=scalers) if state_dict['params']['scale_time'] \ else scale_across_features(x, x_test=None, scalers=scalers) cnn = ConvNet(state_dict['params']) cnn.set_weights(state_dict['weights']) test_proba = cnn.get_test_proba(x) ans = list(zip(id, test_proba)) df = DataFrame(data=ans, columns=['clip', 'preictal']) csv_name = '.'.join( filename.split('.')[:-1]) if '.' in filename else filename df.to_csv(submission_path + '/' + csv_name + '.csv', index=False, header=True)
def plot_train_probs(subject, data_path, model_path): with open(model_path + "/" + subject + ".pickle", "rb") as f: state_dict = cPickle.load(f) cnn = ConvNet(state_dict["params"]) cnn.set_weights(state_dict["weights"]) scalers = state_dict["scalers"] d = load_train_data(data_path, subject) x, y = d["x"], d["y"] x, _ = ( scale_across_time(x, x_test=None, scalers=scalers) if state_dict["params"]["scale_time"] else scale_across_features(x, x_test=None, scalers=scalers) ) cnn.batch_size.set_value(x.shape[0]) probs = cnn.get_test_proba(x) fpr, tpr, threshold = roc_curve(y, probs) c = np.sqrt((1 - tpr) ** 2 + fpr ** 2) opt_threshold = threshold[np.where(c == np.min(c))[0]] print opt_threshold x_coords = np.zeros(len(y), dtype="float64") rng = np.random.RandomState(42) x_coords += rng.normal(0.0, 0.08, size=len(x_coords)) plt.scatter(x_coords, probs, c=y, s=60) plt.title(subject) plt.show()