def fit(self, X, y): self.nb_epoch = 160 # prepare an optimizer self.optimizer = optim.Adam(self.model.conv_classifier.parameters(), lr=self.lr) # define a number of train/test trials nb_train_trials = int(np.floor(self.trainTestRatio * X.shape[0])) # split the dataset train_set = SignalAndTarget(X[:nb_train_trials], y=y[:nb_train_trials]) test_set = SignalAndTarget(X[nb_train_trials:], y=y[nb_train_trials:]) # random generator self.rng = RandomState(None) # array that tracks results self.loss_rec = np.zeros((self.nb_epoch, 2)) self.accuracy_rec = np.zeros((self.nb_epoch, 2)) # run all epoch for i_epoch in range(self.nb_epoch): self._batchTrain(i_epoch, train_set) self._evalTraining(i_epoch, train_set, test_set) return self
def data_all_chan_cwtandraw(train_set, test_set): """ :param train_set: :param test_set: :return: all channle wave transform """ wavename = 'morl' totalscal = 64 sampling_rate = 250 # channel_cwt_data = [] train_trial_data = [] # cwt for train_set for i, t in enumerate(train_set.X): train_channel_cwt_data = [] for j in range(train_set.X.shape[1]): cwtmatr, frequencies = Continuous_Wavelt_Transform( np.squeeze(t[j, :]), wavename, totalscal, sampling_rate) cwtmatr = cwtmatr[0:22, :].astype(np.float32) train_channel_cwt_data.append(cwtmatr) # 对小波变换之后的数据进行归一化 train_channel_cwt_data = nomal(np.array(train_channel_cwt_data)) train_channel_cwt_data = train_channel_cwt_data.tolist() train_channel_cwt_data.append(t) train_trial_data.append(np.array(train_channel_cwt_data)) train_cwt_signal = np.array(train_trial_data).astype(np.float32) train_cwt_targal = train_set.y train_cwt_dataset = SignalAndTarget(train_cwt_signal, train_cwt_targal) test_trial_data = [] for i, t in enumerate(test_set.X): test_channel_cwt_data = [] for j in range(test_set.X.shape[1]): cwtmatr, frequencies = Continuous_Wavelt_Transform( np.squeeze(t[j, :]), wavename, totalscal, sampling_rate) cwtmatr = cwtmatr[0:22, :].astype(np.float32) test_channel_cwt_data.append(cwtmatr) # 对小波变换之后的数据进行归一化 test_channel_cwt_data = nomal(np.array(test_channel_cwt_data)) test_channel_cwt_data = test_channel_cwt_data.tolist() test_channel_cwt_data.append(t) test_trial_data.append(np.array(test_channel_cwt_data)) test_cwt_signal = np.array(test_trial_data).astype(np.float32) test_cwt_targal = test_set.y test_cwt_dataset = SignalAndTarget(test_cwt_signal, test_cwt_targal) return train_cwt_dataset, test_cwt_dataset
def fit(self, X, y): # define a number of train/test trials nb_train_trials = int(np.floor(7/8*X.shape[0])) # split the dataset self.train_set = SignalAndTarget(X[:nb_train_trials], y=y[:nb_train_trials]) self.test_set = SignalAndTarget(X[nb_train_trials:], y=y[nb_train_trials:]) # number of classes and input channels n_classes = np.unique(y).size in_chans = self.train_set.X.shape[1] # final_conv_length = auto ensures we only get a single output in the time dimension self.model = ShallowFBCSPNet( in_chans=in_chans, n_classes=n_classes, input_time_length=self.train_set.X.shape[2], n_filters_time=self.n_filters_time, filter_time_length=self.filter_time_length, n_filters_spat=self.n_filters_spat, pool_time_length=self.pool_time_length, pool_time_stride=self.pool_time_stride, final_conv_length='auto' ).create_network() # setup model for cuda if self.cuda: self.model.cuda() # setup optimizer self.optimizer = optim.Adam(self.model.parameters()) # array that tracks results self.loss_rec = np.zeros((self.nb_epoch,2)) self.accuracy_rec = np.zeros((self.nb_epoch,2)) # run all epoch for i_epoch in range(self.nb_epoch): self._batchTrain(i_epoch, self.train_set) self._evalTraining(i_epoch, self.train_set, self.test_set) return self
def evaluate(self, X, y): """ Evaluate, i.e., compute metrics on given inputs and targets. Parameters ---------- X: ndarray Input data. y: 1darray Targets. Returns ------- result: dict Dictionary with result metrics. """ X = _ensure_float32(X) stop_criterion = MaxEpochs(0) train_set = SignalAndTarget(X, y) model_constraint = None valid_set = None test_set = None loss_function = self.loss if self.cropped: loss_function = lambda outputs, targets: self.loss( th.mean(outputs, dim=2), targets ) # reset runtime monitor if exists... for monitor in self.monitors: if hasattr(monitor, "last_call_time"): monitor.last_call_time = time.time() exp = Experiment( self.network, train_set, valid_set, test_set, iterator=self.iterator, loss_function=loss_function, optimizer=self.optimizer, model_constraint=model_constraint, monitors=self.monitors, stop_criterion=stop_criterion, remember_best_column=None, run_after_early_stop=False, cuda=self.is_cuda, log_0_epoch=True, do_early_stop=False, ) exp.monitor_epoch({"train": train_set}) result_dict = dict( [ (key.replace("train_", ""), val) for key, val in dict(exp.epochs_df.iloc[0]).items() ] ) return result_dict
def concatenate_channels(datasets): all_X = [dataset.X for dataset in datasets] new_X = np.concatenate(all_X, axis=1) new_y = datasets[0].y for dataset in datasets: assert np.array_equal(dataset.y, new_y) return SignalAndTarget(new_X, new_y)
def select_examples(dataset, indices): """ Select examples from dataset. Parameters ---------- dataset: :class:`.SignalAndTarget` indices: list of int, 1d-array of int Indices to select Returns ------- reduced_set: :class:`.SignalAndTarget` Dataset with only examples selected. """ # probably not necessary indices = np.array(indices) if hasattr(dataset.X, 'ndim'): # numpy array new_X = np.array(dataset.X)[indices] else: # list new_X = [dataset.X[i] for i in indices] new_y = np.asarray(dataset.y)[indices] return SignalAndTarget(new_X, new_y)
def test_crops_data_loader_regression(): """Test CropsDataLoader.""" # Convert data from volt to millivolt # Pytorch expects float32 for input and int64 for labels. rng = np.random.RandomState(42) X = rng.randn(60, 64, 343).astype(np.float32) y = rng.randint(0, 2, size=len(X)) train_set = SignalAndTarget(X, y=y) input_time_length = X.shape[2] n_preds_per_input = X.shape[2] // 4 n_times_input = input_time_length # size of signal passed to nn batch_size = 32 iterator = CropsFromTrialsIterator(batch_size=batch_size, input_time_length=n_times_input, n_preds_per_input=n_preds_per_input) ds = EEGDataSet(train_set.X, train_set.y) loader = \ CropsDataLoader(ds, batch_size=batch_size, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input, num_workers=0) for (X1b, y1b), (X2b, y2b) in \ zip(iterator.get_batches(train_set, shuffle=False), loader): np.testing.assert_array_equal(y1b, y2b) np.testing.assert_array_equal(X1b, X2b)
def predict(self, X, threshold_for_binary_case=None): """ Predict the labels for given input data. Parameters ---------- X: ndarray Input data. threshold_for_binary_case: float, optional In case of a model with single output, the threshold for assigning, label 0 or 1, e.g. 0.5. Returns ------- pred_labels: 1darray Predicted labels per trial. """ all_preds = [] for b_X, _ in self.iterator.get_batches(SignalAndTarget(X, X), False): all_preds.append(var_to_np(self.network(np_to_var(b_X)))) if self.cropped: pred_labels = compute_trial_labels_from_crop_preds( all_preds, self.iterator.input_time_length, X) else: pred_labels = compute_pred_labels_from_trial_preds( all_preds, threshold_for_binary_case) return pred_labels
def data_c3c4_cwt_and_rowdata(train_set, test_set): """ :param train_set: :param test_set: :return: c3 c4 channle wave transform and raw data """ wavename = 'morl' totalscal = 64 sampling_rate = 250 # channel_cwt_data = [] train_trial_data = [] # cwt for train_set for i, t in enumerate(train_set.X): train_channel_cwt_data = [] for j in [7, 9]: cwtmatr, frequencies = Continuous_Wavelt_Transform( np.squeeze(t[j, :]), wavename, totalscal, sampling_rate) cwtmatr = cwtmatr[0:22, :].astype(np.float32) train_channel_cwt_data.append(cwtmatr) # train_channel_cwt_data = nomal(train_channel_cwt_data) # let wave_data nomal train_channel_cwt_data.append(t) # t :原始数据 train_trial_data.append(np.array(train_channel_cwt_data)) # train_trial_data.append(t) # 增加原始数据 train_cwt_signal = np.array(train_trial_data) train_cwt_targal = train_set.y train_cwt_dataset = SignalAndTarget(train_cwt_signal, train_cwt_targal) test_trial_data = [] for i, t in enumerate(test_set.X): test_channel_cwt_data = [] for j in [7, 9]: cwtmatr, frequencies = Continuous_Wavelt_Transform( np.squeeze(t[j, :]), wavename, totalscal, sampling_rate) cwtmatr = cwtmatr[0:22, :].astype(np.float32) test_channel_cwt_data.append(cwtmatr) # test_channel_cwt_data = nomal(test_channel_cwt_data) # let wave_data nomal test_channel_cwt_data.append(t) # t :原始数据 test_trial_data.append(np.array(test_channel_cwt_data)) # train_trial_data.append(t) # 增加原始数据 test_cwt_signal = np.array(test_trial_data) test_cwt_targal = test_set.y test_cwt_dataset = SignalAndTarget(test_cwt_signal, test_cwt_targal) return train_cwt_dataset, test_cwt_dataset
def create_set(inds): X = [] for i in inds: log.info("Load {:s}".format(cleaned_file_names[i])) x = load_data(cleaned_file_names[i], preproc_functions) X.append(x) y = cleaned_labels[inds].astype(np.int64) return SignalAndTarget(X, y)
def hgm_data_c3c4_cwt(train_set, test_set): """ :param train_set: :param test_set: :return: ac3 c4 channle wave transform """ wavename = 'morl' totalscal = 125 sampling_rate = 250 # channel_cwt_data = [] train_trial_data = [] # cwt for train_set for i, t in enumerate(train_set.X): train_channel_cwt_data = [] for j in [4, 5]: cwtmatr, frequencies = Continuous_Wavelt_Transform( np.squeeze(t[j, :]), wavename, totalscal, sampling_rate) cwtmatr = cwtmatr[0:44, :].astype(np.float32) train_channel_cwt_data.append(cwtmatr) # 对小波变换之后的数据进行归一化 train_channel_cwt_data.append(t) train_trial_data.append(np.array(train_channel_cwt_data)) train_cwt_signal = np.array(train_trial_data) train_cwt_targal = train_set.y train_cwt_dataset = SignalAndTarget(train_cwt_signal, train_cwt_targal) test_trial_data = [] for i, t in enumerate(test_set.X): test_channel_cwt_data = [] for j in [4, 5]: cwtmatr, frequencies = Continuous_Wavelt_Transform( np.squeeze(t[j, :]), wavename, totalscal, sampling_rate) cwtmatr = cwtmatr[0:44, :].astype(np.float32) test_channel_cwt_data.append(cwtmatr) test_channel_cwt_data.append(t) test_trial_data.append(np.array(test_channel_cwt_data)) test_cwt_signal = np.array(test_trial_data) test_cwt_targal = test_set.y test_cwt_dataset = SignalAndTarget(test_cwt_signal, test_cwt_targal) return train_cwt_dataset, test_cwt_dataset
def select_trials(dataset, inds): if hasattr(dataset.X, 'ndim'): # numpy array new_X = np.array(dataset.X)[inds] else: # list new_X = [dataset.X[i] for i in inds] new_y = np.asarray(dataset.y)[inds] return SignalAndTarget(new_X, new_y)
def create_set(X, y, inds): """ X list and y nparray :return: """ new_X = [] for i in inds: new_X.append(X[i]) new_y = y[inds] return SignalAndTarget(new_X, new_y)
def _load_h5_data(file_path): """ Loads HGD h5 file data and create SignalAndTarget object :param file_path: :return: """ with h5py.File(file_path, "r") as h5file: keys = sorted(list(h5file.keys())) # 0 is X, 1 is y # convert to list for faster indexing later on return SignalAndTarget(list(h5file[keys[0]][()]), list(h5file[keys[1]][()]))
def predict(model, data, labels, n_channels=22, input_time_length=500): # n_classes = 4 # if n_classes == 4: # labels = labels - 1 # # # # # # # # CREATE CROPPED ITERATOR # # # # # # # # # val_set = SignalAndTarget(data, y=labels) # determine output size test_input = np_to_var( np.ones((2, n_channels, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] # print("{:d} predictions per input/trial".format(n_preds_per_input)) iterator = CropsFromTrialsIterator(batch_size=32, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) model.eval() # Collect all predictions and losses all_preds = [] batch_sizes = [] for batch_X, batch_y in iterator.get_batches(val_set, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() outputs = model(net_in) all_preds.append(var_to_np(outputs)) outputs = th.mean(outputs, dim=2, keepdim=False) batch_sizes.append(len(batch_X)) # Assign the predictions to the trials preds_per_trial = compute_preds_per_trial_from_crops( all_preds, input_time_length, val_set.X) # preds per trial are now trials x classes x timesteps/predictions # Now mean across timesteps for each trial to get per-trial predictions meaned_preds_per_trial = np.array( [np.mean(p, axis=1) for p in preds_per_trial]) meaned_preds_per_trial_rec = -1 / meaned_preds_per_trial label_cert = np.max(meaned_preds_per_trial_rec) predicted_labels = np.argmax(meaned_preds_per_trial, axis=1) accuracy = np.mean(predicted_labels == val_set.y) # print("{:6s} Accuracy: {:.2f}%".format('Validation', accuracy * 100)) print('predicted_labels shape') print(predicted_labels.shape) return predicted_labels, label_cert
def train_outer(self, trainsetlist, testsetlist, save_model): scores, all_preds, probabilities_list, outer_cross_entropy, fold_models = [],[],[],[],[] for train_set, test_set in zip(trainsetlist, testsetlist): trainset_X, valset_X, trainset_y, valset_y = train_test_split( train_set.X, train_set.y, test_size=0.2, shuffle=True, random_state=42, stratify=train_set.y) train_set = SignalAndTarget(trainset_X, trainset_y) val_set = SignalAndTarget(valset_X, valset_y) print( f"train set: {train_set.y.shape} : val_set: {val_set.y.shape} : test_set: {test_set.y.shape}" ) _, _, test_accuracy, optimised_model, predictions, probabilities = self.train_model( train_set, val_set, test_set, save_model) fold_models.append(optimised_model) probs_array = [] for lst in probabilities: for trial in lst: probs_array.append( trial) # all probabilities for this test-set print(f"/" * 20) scores.append(test_accuracy) self.concat_y_pred(predictions) probabilities_list.append( probs_array) #outer probabilities to be used for cross-entropy self.model_number += 1 for y_true, y_probs in zip(testsetlist, probabilities_list): outer_cross_entropy.append(cross_entropy(y_true.y, y_probs)) return scores, fold_models, self.y_pred, probabilities_list, outer_cross_entropy
def apply_csp_fast(epo, filt, columns=[0, -1]): """Apply the CSP filter. Apply the spacial CSP filter to the epoched data. Parameters ---------- epo : epoched ``Data`` object this method relies on the ``epo`` to have three dimensions in the following order: class, time, channel filt : 2d array the CSP filter (i.e. the ``v`` return value from :func:`calculate_csp`) columns : array of ints, optional the columns of the filter to use. The default is the first and the last one. Returns ------- epo : epoched ``Data`` object The channels from the original have been replaced with the new virtual CSP channels. Examples -------- >> w, a, d = calculate_csp(epo) >> epo = apply_csp_fast(epo, w) See Also -------- :func:`calculate_csp` :func:`apply_csp` """ # getting only selected columns f = filt[:, columns] # pre-allocating filtered vector filtered = [] # TODO: pre-allocate in the right way, no append please # filtering on each trial for trial_i in range(len(epo.X)): # time x filters this_filtered = np.dot(epo.X[trial_i].T, f) # to filters x time filtered.append(this_filtered.T) return SignalAndTarget(filtered, epo.y)
def select_pairs_from_paired_dataset(paired_dataset, indices): # back to ndarray return SignalAndTarget( { 'source': np.array( [paired_dataset.X['source'][i] for i in indices]), 'target': np.array( [paired_dataset.X['target'][i] for i in indices]) }, { 'source': np.array( [paired_dataset.y['source'][i] for i in indices]), 'target': np.array( [paired_dataset.y['target'][i] for i in indices]) })
def create_pairs(source_data, target_data, source_indices, target_indices): # Create list for faster indexing source_data.X = list(source_data.X) source_data.y = list(source_data.y) target_data.X = list(target_data.X) target_data.y = list(target_data.y) return SignalAndTarget( { 'source': [source_data.X[i] for i in source_indices], 'target': [target_data.X[i] for i in target_indices] }, { 'source': [source_data.y[i] for i in source_indices], 'target': [target_data.y[i] for i in target_indices] })
def to_signal_target(train_inputs, test_inputs): sets = [] for inputs in (train_inputs, test_inputs): X = np.concatenate([var_to_np(ins) for ins in inputs]).astype( np.float32 ) y = np.concatenate( [np.ones(len(ins)) * i_class for i_class, ins in enumerate(inputs)] ) y = y.astype(np.int64) set = SignalAndTarget(X, y) sets.append(set) train_set = sets[0] valid_set = sets[1] return train_set, valid_set
def concatenate_two_sets(set_a, set_b): """ Concatenate two sets together. Parameters ---------- set_a, set_b: :class:`.SignalAndTarget` Returns ------- concatenated_set: :class:`.SignalAndTarget` """ new_X = concatenate_np_array_or_add_lists(set_a.X, set_b.X) new_y = concatenate_np_array_or_add_lists(set_a.y, set_b.y) return SignalAndTarget(new_X, new_y)
def predict(model, X_test, batch_size, iterator, threshold_for_binary_case=None): """ Load torch model and make predictions on new data. """ all_preds = [] with th.no_grad(): for b_X, _ in iterator.get_batches(SignalAndTarget(X_test, X_test), False): b_X_var = np_to_var(b_X) all_preds.append(var_to_np(model(b_X_var))) pred_labels = compute_pred_labels_from_trial_preds( all_preds, threshold_for_binary_case) return pred_labels
def get_tuh_train_val_test(data_folder): preproc_functions = create_preproc_functions( sec_to_cut_at_start=global_vars.get('sec_to_cut_at_start'), sec_to_cut_at_end=global_vars.get('sec_to_cut_at_end'), duration_recording_mins=global_vars.get('duration_recording_mins'), max_abs_val=global_vars.get('max_abs_val'), clip_before_resample=global_vars.get('clip_before_resample'), sampling_freq=global_vars.get('sampling_freq'), divisor=global_vars.get('divisor')) test_preproc_functions = create_preproc_functions( sec_to_cut_at_start=global_vars.get('sec_to_cut_at_start'), sec_to_cut_at_end=global_vars.get('sec_to_cut_at_end'), duration_recording_mins=global_vars.get('test_recording_mins'), max_abs_val=global_vars.get('max_abs_val'), clip_before_resample=global_vars.get('clip_before_resample'), sampling_freq=global_vars.get('sampling_freq'), divisor=global_vars.get('divisor')) training_set = DiagnosisSet( n_recordings=global_vars.get('n_recordings'), max_recording_mins=global_vars.get('max_recording_mins'), preproc_functions=preproc_functions, train_or_eval='train', sensor_types=global_vars.get('sensor_types')) test_set = DiagnosisSet(n_recordings=global_vars.get('n_recordings'), max_recording_mins=None, preproc_functions=test_preproc_functions, train_or_eval='eval', sensor_types=global_vars.get('sensor_types')) X, y = training_set.load() global_vars.set('input_height', X[0].shape[1]) splitter = TrainValidSplitter(10, i_valid_fold=0, shuffle=global_vars.get('shuffle')) train_set, valid_set = splitter.split(X, y) test_X, test_y = test_set.load() test_set = SignalAndTarget(test_X, test_y) train_set.X = np.array(train_set.X) valid_set.X = np.array(valid_set.X) test_set.X = np.array(test_set.X) return train_set, valid_set, test_set
def concatenate_two_sets(set_a, set_b): """ Concatenate two sets together. Parameters ---------- set_a, set_b: :class:`.SignalAndTarget` Returns ------- concatenated_set: :class:`.SignalAndTarget` """ if hasattr(set_a.X, 'ndim') and hasattr(set_b.X, 'ndim'): new_X = np.concatenate((set_a.X, set_b.X), axis=0) else: if hasattr(set_a.X, 'ndim'): set_a.X = set_a.X.tolist() if hasattr(set_b.X, 'ndim'): set_b.X = set_b.X.tolist() new_X = set_a.X + set_b.X new_y = np.concatenate((set_a.y, set_b.y), axis=0) return SignalAndTarget(new_X, new_y)
def predict_outs(self, X, individual_crops=False): """ Predict raw outputs of the network for given input. Parameters ---------- X: ndarray Input data. threshold_for_binary_case: float, optional In case of a model with single output, the threshold for assigning, label 0 or 1, e.g. 0.5. individual_crops: bool Returns ------- outs_per_trial: 2darray or list of 2darrays Network outputs for each trial, optionally for each crop within trial. """ if individual_crops: assert self.cropped, "Cropped labels only for cropped decoding" X = _ensure_float32(X) all_preds = [] with th.no_grad(): dummy_y = np.ones(len(X), dtype=np.int64) for b_X, _ in self.iterator.get_batches( SignalAndTarget(X, dummy_y), False): b_X_var = np_to_var(b_X) if self.is_cuda: b_X_var = b_X_var.cuda() all_preds.append(var_to_np(self.network(b_X_var))) if self.cropped: outs_per_trial = compute_preds_per_trial_from_crops( all_preds, self.iterator.input_time_length, X) if not individual_crops: outs_per_trial = np.array( [np.mean(o, axis=1) for o in outs_per_trial]) else: outs_per_trial = np.concatenate(all_preds) return outs_per_trial
def _create_signal_target_from_start_and_ival(data, events, fs, name_to_codes, epoch_ival_ms): ival_in_samples = ms_to_samples(np.array(epoch_ival_ms), fs) start_offset = np.int32(np.round(ival_in_samples[0])) # we will use ceil but exclusive... stop_offset = np.int32(np.ceil(ival_in_samples[1])) mrk_code_to_name_and_y = _to_mrk_code_to_name_and_y(name_to_codes) class_to_n_trials = Counter() X = [] y = [] for i_sample, mrk_code in zip(events[:, 0], events[:, 1]): start_sample = int(i_sample) + start_offset stop_sample = int(i_sample) + stop_offset if mrk_code in mrk_code_to_name_and_y: name, this_y = mrk_code_to_name_and_y[mrk_code] X.append(data[:, start_sample:stop_sample].astype(np.float32)) y.append(np.int64(this_y)) class_to_n_trials[name] += 1 log.info("Trial per class:\n{:s}".format(str(class_to_n_trials))) return SignalAndTarget(np.array(X), np.array(y))
def load_data_and_model(n_job): fileName = file_for_number(n_job) print("file = {:s}".format(fileName)) # %% Load data: matlab cell array import h5py log.info("Loading data...") with h5py.File(dir_sourceData + '/' + fileName + '.mat', 'r') as h5file: sessions = [h5file[obj_ref] for obj_ref in h5file['D'][0]] Xs = [session['ieeg'][:] for session in sessions] ys = [session['traj'][0] for session in sessions] srates = [session['srate'][0, 0] for session in sessions] # %% create datasets from braindecode.datautil.signal_target import SignalAndTarget # Outer added axis is the trial axis (size one always...) datasets = [ SignalAndTarget([X.astype(np.float32)], [y.astype(np.float32)]) for X, y in zip(Xs, ys) ] from braindecode.datautil.splitters import concatenate_sets # only for allocation assert len(datasets) >= 4 train_set = concatenate_sets(datasets[:-1]) valid_set = datasets[-2] # dummy variable, validation set is not used test_set = datasets[-1] log.info("Loading CNN model...") import torch model = torch.load(dir_outputData + '/models/' + fileName + '_model') # fix for new pytorch for m in model.modules(): if m.__class__.__name__ == 'Conv2d': m.padding_mode = 'zeros' log.info("Loading done.") return train_set, valid_set, test_set, model
def evaluate(self, X, y, batch_size=32): # Create a dummy experiment for the evaluation iterator = BalancedBatchSizeIterator(batch_size=batch_size, seed=0) # seed irrelevant stop_criterion = MaxEpochs(0) train_set = SignalAndTarget(X, y) model_constraint = None valid_set = None test_set = None loss_function = self.loss if self.cropped: loss_function = lambda outputs, targets: \ self.loss(th.mean(outputs, dim=2), targets) exp = Experiment(self.network, train_set, valid_set, test_set, iterator=iterator, loss_function=loss_function, optimizer=self.optimizer, model_constraint=model_constraint, monitors=self.monitors, stop_criterion=stop_criterion, remember_best_column=None, run_after_early_stop=False, cuda=self.cuda, print_0_epoch=False, do_early_stop=False) exp.monitor_epoch({'train': train_set}) result_dict = dict([ (key.replace('train_', ''), val) for key, val in dict(exp.epochs_df.iloc[0]).items() ]) return result_dict
def fit_transform(model, optimizer, data, labels, num_epochs=10, n_channels=22, input_time_length=500): # # # # # # # # CREATE CROPPED ITERATOR # # # # # # # # # train_set = SignalAndTarget(data, y=labels) # determine output size test_input = np_to_var( np.ones((2, n_channels, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] # print("{:d} predictions per input/trial".format(n_preds_per_input)) iterator = CropsFromTrialsIterator(batch_size=32, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) # # # # # # # # TRAINING LOOP # # # # # # # for i_epoch in range(num_epochs): # Set model to training mode model.train() for batch_X, batch_y in iterator.get_batches(train_set, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() # Remove gradients of last backward pass from all parameters optimizer.zero_grad() outputs = model(net_in) outputs = th.mean(outputs, dim=2, keepdim=False) loss = F.nll_loss(outputs, net_target) loss.backward() optimizer.step() model.eval() # print("Epoch {:d}".format(i_epoch)) # Collect all predictions and losses all_preds = [] all_losses = [] batch_sizes = [] for batch_X, batch_y in iterator.get_batches(train_set, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() outputs = model(net_in) all_preds.append(var_to_np(outputs)) outputs = th.mean(outputs, dim=2, keepdim=False) loss = F.nll_loss(outputs, net_target) loss = float(var_to_np(loss)) all_losses.append(loss) batch_sizes.append(len(batch_X)) # Compute mean per-input loss loss = np.mean( np.array(all_losses) * np.array(batch_sizes) / np.mean(batch_sizes)) # print("{:6s} Loss: {:.5f}".format('Train', loss)) # Assign the predictions to the trials preds_per_trial = compute_preds_per_trial_from_crops( all_preds, input_time_length, train_set) # preds per trial are now trials x classes x timesteps/predictions # Now mean across timesteps for each trial to get per-trial predictions meaned_preds_per_trial = np.array( [np.mean(p, axis=1) for p in preds_per_trial]) predicted_labels = np.argmax(meaned_preds_per_trial, axis=1) accuracy = np.mean(predicted_labels == train_set.y) # print("{:6s} Accuracy: {:.2f}%".format('Train', accuracy * 100)) return model
def fit_transform_2(model, optimizer, train_data, y_train, test_data, y_test, num_epochs=20, n_channels=22, input_time_length=500): train_set = SignalAndTarget(train_data, y=y_train) test_set = SignalAndTarget(test_data, y=y_test) # # # # # # # # CREATE CROPPED ITERATOR # # # # # # # # # # determine output size test_input = np_to_var( np.ones((2, n_channels, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] iterator = CropsFromTrialsIterator(batch_size=32, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) accuracy_out = [] min_loss = 1000 for i_epoch in range(num_epochs): # Set model to training mode model.train() for batch_X, batch_y in iterator.get_batches(train_set, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() # print(batch_y) net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() # Remove gradients of last backward pass from all parameters optimizer.zero_grad() outputs = model(net_in) # Mean predictions across trial # Note that this will give identical gradients to computing # a per-prediction loss (at least for the combination of log softmax activation # and negative log likelihood loss which we are using here) outputs = th.mean(outputs, dim=2, keepdim=False) loss = F.nll_loss(outputs, net_target) loss.backward() optimizer.step() # Print some statistics each epoch model.eval() # print("Epoch {:d}".format(i_epoch)) for setname, dataset in (('Train', train_set), ('Test', test_set)): # Collect all predictions and losses all_preds = [] all_losses = [] batch_sizes = [] for batch_X, batch_y in iterator.get_batches(dataset, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() outputs = model(net_in) all_preds.append(var_to_np(outputs)) outputs = th.mean(outputs, dim=2, keepdim=False) loss = F.nll_loss(outputs, net_target) loss = float(var_to_np(loss)) all_losses.append(loss) batch_sizes.append(len(batch_X)) # Compute mean per-input loss loss = np.mean( np.array(all_losses) * np.array(batch_sizes) / np.mean(batch_sizes)) # print("{:6s} Loss: {:.5f}".format(setname, loss)) # Assign the predictions to the trials preds_per_trial = compute_preds_per_trial_from_crops( all_preds, input_time_length, dataset.X) # preds per trial are now trials x classes x timesteps/predictions # Now mean across timesteps for each trial to get per-trial predictions meaned_preds_per_trial = np.array( [np.mean(p, axis=1) for p in preds_per_trial]) predicted_labels = np.argmax(meaned_preds_per_trial, axis=1) accuracy = np.mean(predicted_labels == dataset.y) # print("{:6s} Accuracy: {:.2f}%".format(setname, accuracy * 100)) if setname == 'Test': accuracy_out.append(accuracy) if loss < min_loss: min_loss = loss elif loss > min_loss * 1.1: print("Training Stopping") return model, np.asarray(accuracy_out) return model, np.asarray(accuracy_out)