def make_final_predictions(kwargs, exp): exp.model.eval() for setname in ('train', 'test'): dataset = exp.datasets[setname] if kwargs["cuda"]: preds_per_batch = [ var_to_np(exp.model(np_to_var(b[0]).cuda())) for b in exp.iterator.get_batches(dataset, shuffle=False) ] else: preds_per_batch = [ var_to_np(exp.model(np_to_var(b[0]))) for b in exp.iterator.get_batches(dataset, shuffle=False) ] preds_per_trial = compute_preds_per_trial( preds_per_batch, dataset, input_time_length=exp.iterator.input_time_length, n_stride=exp.iterator.n_preds_per_input) mean_preds_per_trial = [ np.mean(preds, axis=(0, 2)) for preds in preds_per_trial ] mean_preds_per_trial = np.array(mean_preds_per_trial) write_predictions(dataset.y, mean_preds_per_trial, setname, kwargs, exp)
def _evalTraining(self, i_epoch, train_set, test_set): # Print some statistics each epoch self.model.eval() print("Epoch {:d}".format(i_epoch)) sets = {'Train': 0, 'Test': 1} # run evaluation on both train and test sets for setname, dataset in (('Train', train_set), ('Test', test_set)): # get balanced sets i_trials_in_batch = get_balanced_batches(len(dataset.X), self.rng, batch_size=32, shuffle=False) outputs = [] net_targets = [] # for all trials in set for i_trials in i_trials_in_batch: # adapt datasets batch_X = dataset.X[i_trials][:, :, :, None] batch_y = dataset.y[i_trials] # apply some conversion net_in = np_to_var(batch_X) net_target = np_to_var(batch_y) # convert if self.cuda: net_in = net_in.cuda() net_target = net_target.cuda() net_target = var_to_np(net_target) output = var_to_np(self.model(net_in)) outputs.append(output) net_targets.append(net_target) net_targets = np_to_var(np.concatenate(net_targets)) outputs = np_to_var(np.concatenate(outputs)) loss = F.nll_loss(outputs, net_targets) print("{:6s} Loss: {:.5f}".format(setname, float(var_to_np(loss)))) self.loss_rec[i_epoch, sets[setname]] = var_to_np(loss) predicted_labels = np.argmax(var_to_np(outputs), axis=1) accuracy = np.mean(dataset.y == predicted_labels) print("{:6s} Accuracy: {:.1f}%".format(setname, accuracy * 100)) self.accuracy_rec[i_epoch, sets[setname]] = accuracy return
def test_cosine_annealing_should_affect_update_in_sgd(): init_w = np.float32(3) w_var = np_to_var(init_w, dtype=np.float64) x_var = np_to_var(2, dtype=np.float64) y_var = np_to_var(100, dtype=np.float64) w_var = th.nn.Parameter(w_var.data) lr = 0.1 grad = -2 optim = ScheduledOptimizer( CosineAnnealing(10), SGD([w_var], lr=lr), ) n_epochs = 10 grad_times_lr_per_epoch = grad * lr * ( 0.5 * np.cos(np.pi * np.arange(0, n_epochs) / (n_epochs)) + 0.5) for i_epoch in range(n_epochs): expected_subtracted_gradient = np.sum( grad_times_lr_per_epoch[:i_epoch + 1]) loss = th.abs(y_var - w_var * x_var) optim.zero_grad() loss.backward() optim.step() assert np.allclose(init_w - expected_subtracted_gradient, var_to_np(w_var))
def test_cosine_annealing_crashes_for_too_many_optimizer_steps(): # restart crash init_w = np.float32(3) periods = [1, 2, 4, 8, 20] w_var = np_to_var(init_w, dtype=np.float64) x_var = np_to_var(2, dtype=np.float64) y_var = np_to_var(100, dtype=np.float64) w_var = th.nn.Parameter(w_var.data) wd = 0.1 lr = 0 optim = AdamW([w_var], lr=lr, weight_decay=wd) optim = ScheduledOptimizer(CosineAnnealing(periods), optim) decayed_w = init_w for n_epochs in periods: cosine_val_per_epoch = 0.5 * np.cos(np.pi * np.arange(0, n_epochs) / (n_epochs)) + 0.5 for i_epoch in range(n_epochs): decayed_w = decayed_w * (1 - wd * cosine_val_per_epoch[i_epoch]) loss = th.abs(y_var - w_var * x_var) optim.zero_grad() loss.backward() optim.step() assert np.allclose(decayed_w, var_to_np(w_var)) with pytest.raises(AssertionError, match=r'More updates \(35\) than expected \(34\)'): optim.step()
def predict(self, X, threshold_for_binary_case=None): """ Predict the labels for given input data. Parameters ---------- X: ndarray Input data. threshold_for_binary_case: float, optional In case of a model with single output, the threshold for assigning, label 0 or 1, e.g. 0.5. Returns ------- pred_labels: 1darray Predicted labels per trial. """ all_preds = [] for b_X, _ in self.iterator.get_batches(SignalAndTarget(X, X), False): all_preds.append(var_to_np(self.network(np_to_var(b_X)))) if self.cropped: pred_labels = compute_trial_labels_from_crop_preds( all_preds, self.iterator.input_time_length, X) else: pred_labels = compute_pred_labels_from_trial_preds( all_preds, threshold_for_binary_case) return pred_labels
def test_sanity_check_sgd(): # sanity check SGD w_var = np_to_var(3, dtype=np.float64) x_var = np_to_var(2, dtype=np.float64) y_var = np_to_var(100, dtype=np.float64) w_var = th.nn.Parameter(w_var.data) optim = SGD([w_var], lr=0.1) var_to_np(w_var * x_var) loss = th.abs(y_var - w_var * x_var) optim.zero_grad() loss.backward() # gradient will be 2 always actually optim.step() assert np.allclose(var_to_np(w_var), 3.2)
def _eval_batch(self, inputs, targets): net_in = self.np_to_tensor(inputs) net_target = self.np_to_tensor(targets) outputs = self.model(net_in) loss = self.loss_function(outputs, net_target) loss = float(th_ext_util.var_to_np(loss)) return outputs, loss
def get_corr_coef(dataset, model): with th.no_grad(): outs = model(np_to_var(dataset.X).unsqueeze(-1).cuda()) all_y = np.array(dataset.y) preds = var_to_np(outs) preds_flat = np.concatenate(preds) y_flat = np.concatenate(all_y[:, -preds.shape[1]:]) corrcoef = np.corrcoef(y_flat, preds_flat)[0, 1] return corrcoef
def predict(model, data, labels, n_channels=22, input_time_length=500): # n_classes = 4 # if n_classes == 4: # labels = labels - 1 # # # # # # # # CREATE CROPPED ITERATOR # # # # # # # # # val_set = SignalAndTarget(data, y=labels) # determine output size test_input = np_to_var( np.ones((2, n_channels, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] # print("{:d} predictions per input/trial".format(n_preds_per_input)) iterator = CropsFromTrialsIterator(batch_size=32, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) model.eval() # Collect all predictions and losses all_preds = [] batch_sizes = [] for batch_X, batch_y in iterator.get_batches(val_set, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() outputs = model(net_in) all_preds.append(var_to_np(outputs)) outputs = th.mean(outputs, dim=2, keepdim=False) batch_sizes.append(len(batch_X)) # Assign the predictions to the trials preds_per_trial = compute_preds_per_trial_from_crops( all_preds, input_time_length, val_set.X) # preds per trial are now trials x classes x timesteps/predictions # Now mean across timesteps for each trial to get per-trial predictions meaned_preds_per_trial = np.array( [np.mean(p, axis=1) for p in preds_per_trial]) meaned_preds_per_trial_rec = -1 / meaned_preds_per_trial label_cert = np.max(meaned_preds_per_trial_rec) predicted_labels = np.argmax(meaned_preds_per_trial, axis=1) accuracy = np.mean(predicted_labels == val_set.y) # print("{:6s} Accuracy: {:.2f}%".format('Validation', accuracy * 100)) print('predicted_labels shape') print(predicted_labels.shape) return predicted_labels, label_cert
def test_adam_and_adamw_identical_without_weight_decay(): init_w = np.float32(3) w_var_adam = np_to_var(init_w, dtype=np.float64) w_var_adamw = np_to_var(init_w, dtype=np.float64) x_var = np_to_var(2, dtype=np.float64) y_var = np_to_var(100, dtype=np.float64) w_var_adam = th.nn.Parameter(w_var_adam.data) w_var_adamw = th.nn.Parameter(w_var_adamw.data) lr = 0.1 optim_adam = Adam([w_var_adam], lr=lr, weight_decay=0) optim_adamw = Adam([w_var_adamw], lr=lr, weight_decay=0) n_epochs = 10 for i_epoch in range(n_epochs): loss_adam = th.abs(y_var - w_var_adam * x_var) optim_adam.zero_grad() loss_adam.backward() optim_adam.step() loss_adamw = th.abs(y_var - w_var_adamw * x_var) optim_adamw.zero_grad() loss_adamw.backward() optim_adamw.step() assert np.allclose(var_to_np(w_var_adam), var_to_np(w_var_adamw))
def predict_with_model(self, data): self.model.eval() # data is time x channels in_var = np_to_var(data.T[None, :, :, None], dtype=np.float32) if self.cuda: in_var = in_var.cuda() pred = var_to_np(self.model(in_var)) # possibly mean across time axis if pred.ndim > 2: pred = np.mean(pred, axis=2).squeeze() if self.exponentiate_preds: pred = np.exp(pred) return pred
def test_adam_should_not_decay_weights_with_lr_0(): init_w = np.float32(3) w_var = np_to_var(init_w, dtype=np.float64) x_var = np_to_var(2, dtype=np.float64) y_var = np_to_var(100, dtype=np.float64) w_var = th.nn.Parameter(w_var.data) lr = 0 optim = Adam([w_var], lr=lr, weight_decay=1) n_epochs = 10 for i_epoch in range(n_epochs): loss = th.abs(y_var - w_var * x_var) optim.zero_grad() loss.backward() optim.step() assert np.allclose(init_w, var_to_np(w_var))
def _eval_epoch(self, setname_dataset_tuple): self.model.eval() epoch_results = OrderedDict() for setname, dataset in setname_dataset_tuple: if dataset is None: continue # Collect all predictions and losses all_preds = [] all_losses = [] batch_sizes = [] for batch_X, batch_y in self.iterator.get_batches(dataset, shuffle=False): batch_size = len(batch_X) preds, loss = self._eval_batch(batch_X, batch_y) if self.siamese: preds = preds['cls'] batch_size = len(batch_X['source']) all_preds.append(th_ext_util.var_to_np(preds)) all_losses.append(loss) batch_sizes.append(batch_size) # Compute mean per-input loss batch_weights = np.array(batch_sizes) / float(np.sum(batch_sizes)) loss_per_batch = [np.mean(loss) for loss in all_losses] mean_loss = np.sum(batch_weights * loss_per_batch) # Compute predictions and accuracy/inverse_of_error predicted_labels = self.func_compute_pred_labels( all_preds, dataset) accuracy = np.mean(predicted_labels == dataset.y) if self.siamese: accuracy = np.mean(predicted_labels == dataset.y['source']) # early_stopping needs the validation loss # to check if it has decresed, and if it has, # it will make a checkpoint of the current models if setname == 'valid': self.stop_criterion(mean_loss, self.model) # Save results epoch_results.update({ f'{setname}_loss': mean_loss, f'{setname}_accuracy': accuracy }) return epoch_results
def predict(model, X_test, batch_size, iterator, threshold_for_binary_case=None): """ Load torch model and make predictions on new data. """ all_preds = [] with th.no_grad(): for b_X, _ in iterator.get_batches(SignalAndTarget(X_test, X_test), False): b_X_var = np_to_var(b_X) all_preds.append(var_to_np(model(b_X_var))) pred_labels = compute_pred_labels_from_trial_preds( all_preds, threshold_for_binary_case) return pred_labels
def test_adamw_should_decay_weights_with_lr_0(): init_w = np.float32(3) w_var = np_to_var(init_w, dtype=np.float64) x_var = np_to_var(2, dtype=np.float64) y_var = np_to_var(100, dtype=np.float64) w_var = th.nn.Parameter(w_var.data) wd = 0.1 lr = 0 optim = AdamW([w_var], lr=lr, weight_decay=wd) n_epochs = 10 for i_epoch in range(n_epochs): expected_w = init_w * ((1 - wd)**(i_epoch + 1)) loss = th.abs(y_var - w_var * x_var) optim.zero_grad() loss.backward() optim.step() assert np.allclose(expected_w, var_to_np(w_var))
def predict(self, X): self.model.eval() #i_trials_in_batch = get_balanced_batches(len(X), self.rng, batch_size=32, shuffle=False) outputs = [] for i_trials in i_trials_in_batch: batch_X = dataset.X[i_trials][:, :, :, None] net_in = np_to_var(batch_X) if self.cuda: net_in = net_in.cuda() output = var_to_np(self.model(net_in)) outputs.append(output) return outputs
def shallowCNN(): cuda = torch.cuda.is_available() test_y = np.load('data/uploads/test_y.npy') test_X = np.load('data/uploads/test_X.npy') net_in = np_to_var(test_X[:,:,:,None]) if cuda: net_in = net_in.cuda() model = torch.load('data/models/shallowCNN.pth', map_location=lambda storage, loc: storage) if cuda: model.cuda() outputs = model(net_in) predicted_labels = np.argmax(var_to_np(outputs), axis=1) print( predicted_labels ) accuracy = np.mean(test_y == predicted_labels) print( 'Accuracy is', accuracy ) return accuracy
def test_cosine_annealing_should_affect_weight_decay_adamw(): init_w = np.float32(3) w_var = np_to_var(init_w, dtype=np.float64) x_var = np_to_var(2, dtype=np.float64) y_var = np_to_var(100, dtype=np.float64) w_var = th.nn.Parameter(w_var.data) wd = 0.1 lr = 0 optim = AdamW([w_var], lr=lr, weight_decay=wd) optim = ScheduledOptimizer(CosineAnnealing(10), optim) n_epochs = 10 cosine_val_per_epoch = 0.5 * np.cos(np.pi * np.arange(0, n_epochs) / (n_epochs)) + 0.5 decayed_w = init_w for i_epoch in range(n_epochs): decayed_w = decayed_w * (1 - wd * cosine_val_per_epoch[i_epoch]) loss = th.abs(y_var - w_var * x_var) optim.zero_grad() loss.backward() optim.step() assert np.allclose(decayed_w, var_to_np(w_var))
def predict_outs(self, X, individual_crops=False): """ Predict raw outputs of the network for given input. Parameters ---------- X: ndarray Input data. threshold_for_binary_case: float, optional In case of a model with single output, the threshold for assigning, label 0 or 1, e.g. 0.5. individual_crops: bool Returns ------- outs_per_trial: 2darray or list of 2darrays Network outputs for each trial, optionally for each crop within trial. """ if individual_crops: assert self.cropped, "Cropped labels only for cropped decoding" X = _ensure_float32(X) all_preds = [] with th.no_grad(): dummy_y = np.ones(len(X), dtype=np.int64) for b_X, _ in self.iterator.get_batches( SignalAndTarget(X, dummy_y), False): b_X_var = np_to_var(b_X) if self.is_cuda: b_X_var = b_X_var.cuda() all_preds.append(var_to_np(self.network(b_X_var))) if self.cropped: outs_per_trial = compute_preds_per_trial_from_crops( all_preds, self.iterator.input_time_length, X) if not individual_crops: outs_per_trial = np.array( [np.mean(o, axis=1) for o in outs_per_trial]) else: outs_per_trial = np.concatenate(all_preds) return outs_per_trial
def test_trialwise_decoding(): import mne from mne.io import concatenate_raws # 5,6,7,10,13,14 are codes for executed and imagined hands/feet subject_id = 1 event_codes = [5, 6, 9, 10, 13, 14] # This will download the files if you don't have them yet, # and then return the paths to the files. physionet_paths = mne.datasets.eegbci.load_data(subject_id, event_codes) # Load each of the files parts = [ mne.io.read_raw_edf(path, preload=True, stim_channel='auto', verbose='WARNING') for path in physionet_paths ] # Concatenate them raw = concatenate_raws(parts) # Find the events in this dataset events = mne.find_events(raw, shortest_event=0, stim_channel='STI 014') # Use only EEG channels eeg_channel_inds = mne.pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False, exclude='bads') # Extract trials, only using EEG channels epoched = mne.Epochs(raw, events, dict(hands=2, feet=3), tmin=1, tmax=4.1, proj=False, picks=eeg_channel_inds, baseline=None, preload=True) import numpy as np # Convert data from volt to millivolt # Pytorch expects float32 for input and int64 for labels. X = (epoched.get_data() * 1e6).astype(np.float32) y = (epoched.events[:, 2] - 2).astype(np.int64) # 2,3 -> 0,1 from braindecode.datautil.signal_target import SignalAndTarget train_set = SignalAndTarget(X[:60], y=y[:60]) test_set = SignalAndTarget(X[60:], y=y[60:]) from braindecode.models.shallow_fbcsp import ShallowFBCSPNet from torch import nn from braindecode.torch_ext.util import set_random_seeds # Set if you want to use GPU # You can also use torch.cuda.is_available() to determine if cuda is available on your machine. cuda = False set_random_seeds(seed=20170629, cuda=cuda) n_classes = 2 in_chans = train_set.X.shape[1] # final_conv_length = auto ensures we only get a single output in the time dimension model = ShallowFBCSPNet(in_chans=in_chans, n_classes=n_classes, input_time_length=train_set.X.shape[2], final_conv_length='auto').create_network() if cuda: model.cuda() from torch import optim optimizer = optim.Adam(model.parameters()) from braindecode.torch_ext.util import np_to_var, var_to_np from braindecode.datautil.iterators import get_balanced_batches import torch.nn.functional as F from numpy.random import RandomState rng = RandomState((2017, 6, 30)) losses = [] accuracies = [] for i_epoch in range(6): i_trials_in_batch = get_balanced_batches(len(train_set.X), rng, shuffle=True, batch_size=30) # Set model to training mode model.train() for i_trials in i_trials_in_batch: # Have to add empty fourth dimension to X batch_X = train_set.X[i_trials][:, :, :, None] batch_y = train_set.y[i_trials] net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() # Remove gradients of last backward pass from all parameters optimizer.zero_grad() # Compute outputs of the network outputs = model(net_in) # Compute the loss loss = F.nll_loss(outputs, net_target) # Do the backpropagation loss.backward() # Update parameters with the optimizer optimizer.step() # Print some statistics each epoch model.eval() print("Epoch {:d}".format(i_epoch)) for setname, dataset in (('Train', train_set), ('Test', test_set)): # Here, we will use the entire dataset at once, which is still possible # for such smaller datasets. Otherwise we would have to use batches. net_in = np_to_var(dataset.X[:, :, :, None]) if cuda: net_in = net_in.cuda() net_target = np_to_var(dataset.y) if cuda: net_target = net_target.cuda() outputs = model(net_in) loss = F.nll_loss(outputs, net_target) losses.append(float(var_to_np(loss))) print("{:6s} Loss: {:.5f}".format(setname, float(var_to_np(loss)))) predicted_labels = np.argmax(var_to_np(outputs), axis=1) accuracy = np.mean(dataset.y == predicted_labels) accuracies.append(accuracy * 100) print("{:6s} Accuracy: {:.1f}%".format(setname, accuracy * 100)) np.testing.assert_allclose(np.array(losses), np.array([ 1.1775966882705688, 1.2602351903915405, 0.7068756818771362, 0.9367912411689758, 0.394258975982666, 0.6598362326622009, 0.3359280526638031, 0.656258761882782, 0.2790488004684448, 0.6104397177696228, 0.27319177985191345, 0.5949864983558655 ]), rtol=1e-4, atol=1e-5) np.testing.assert_allclose(np.array(accuracies), np.array([ 51.666666666666671, 53.333333333333336, 63.333333333333329, 56.666666666666664, 86.666666666666671, 66.666666666666657, 90.0, 63.333333333333329, 96.666666666666671, 56.666666666666664, 96.666666666666671, 66.666666666666657 ]), rtol=1e-4, atol=1e-5)
def run(ex, test_on_eval, sensor_types, n_chans, max_recording_mins, n_recordings, sec_to_cut_at_start, sec_to_cut_at_end, duration_recording_mins, test_recording_mins, max_abs_val, clip_before_resample, sampling_freq, divisor, n_folds, i_test_fold, shuffle, merge_train_valid, model_name, input_time_length, final_conv_length, stride_before_pool, n_start_chans, n_chan_factor, optimizer, learning_rate, weight_decay, scheduler, model_constraint, batch_size, max_epochs, save_predictions, save_crop_predictions, np_th_seed, only_return_exp): log_dir = ex.observers[0].dir kwargs = locals() kwargs.pop('ex') kwargs.pop('save_predictions') kwargs.pop('save_crop_predictions') import sys logging.basicConfig(format='%(asctime)s %(levelname)s : %(message)s', level=logging.DEBUG, stream=sys.stdout) start_time = time.time() ex.info['finished'] = False confirm_gpu_availability() exp = run_exp(**kwargs) end_time = time.time() run_time = end_time - start_time ex.info['finished'] = True if not only_return_exp: last_row = exp.epochs_df.iloc[-1] for key, val in last_row.iteritems(): ex.info[key] = float(val) ex.info['runtime'] = run_time if not only_return_exp: save_pkl_artifact(ex, exp.epochs_df, 'epochs_df.pkl') save_pkl_artifact(ex, exp.before_stop_df, 'before_stop_df.pkl') save_torch_artifact(ex, exp.model.state_dict(), 'model_params.pkl') if save_predictions: exp.model.eval() for setname in ('train', 'valid', 'test'): log.info( "Compute and save predictions for {:s}...".format(setname)) dataset = exp.datasets[setname] log.info("Save labels for {:s}...".format(setname)) save_npy_artifact(ex, dataset.y, '{:s}_trial_labels.npy'.format(setname)) preds_per_batch = [ var_to_np(exp.model(np_to_var(b[0]).cuda())) for b in exp.iterator.get_batches(dataset, shuffle=False) ] preds_per_trial = compute_preds_per_trial( preds_per_batch, dataset, input_time_length=exp.iterator.input_time_length, n_stride=exp.iterator.n_preds_per_input) mean_preds_per_trial = [ np.mean(preds, axis=(0, 2)) for preds in preds_per_trial ] mean_preds_per_trial = np.array(mean_preds_per_trial) log.info("Save trial predictions for {:s}...".format(setname)) save_npy_artifact(ex, mean_preds_per_trial, '{:s}_trial_preds.npy'.format(setname)) if save_crop_predictions: log.info( "Save crop predictions for {:s}...".format(setname)) save_npy_artifact(ex, preds_per_trial, '{:s}_crop_preds.npy'.format(setname)) else: return exp
strategy='per_subject', csv_file=None, evolution_file=None) naiveNAS.train_and_evaluate_model(model) train_batches = list(iterator.get_batches(train_set[subject_id], shuffle=False)) train_X_batches = np.concatenate(list(zip(*train_batches))[0]) new_model = nn.Sequential() for name, module in model.named_children(): if 'softmax' in name: break new_model.add_module(name, module) new_model.eval() pred_fn = lambda x: var_to_np( th.mean(new_model(np_to_var(x).cuda())[:, :, :, 0], dim=2, keepdim=False)) from braindecode.visualization.perturbation import compute_amplitude_prediction_correlations amp_pred_corrs = compute_amplitude_prediction_correlations(pred_fn, train_X_batches, n_iterations=12, batch_size=30) freqs = np.fft.rfftfreq(train_X_batches.shape[2], d=1.0 / fs) alpha_band = {'start': 7, 'stop': 14} beta_band = {'start': 14, 'stop': 31} high_gamma_band = {'start': 71, 'stop': 91} bands = [alpha_band, beta_band, high_gamma_band] for band in bands:
def run_one_epoch(self, epoch, dataset, train_or_eval, return_last=True, return_ordinal_z=False, use_gpu=False, evaluate_loss=True): if train_or_eval == 'train': running_loss = 0. self.model.train() stateful = self.stateful subsample = self.subsample #samples_weight = th.from_numpy(dataset.Z[:,-1]) #sampler = WeightedRandomSampler(samples_weight, len(samples_weight)) if stateful: gen = MyStateFullDataLoader(dataset, batch_size=self.batch_size, random_state=epoch + 2018) else: #gen = MyBalancedDataLoader(dataset, batch_size=self.batch_size, random_state=epoch+2018) gen = DataLoader(dataset, batch_size=self.batch_size, shuffle=True, num_workers=0, pin_memory=False) verbosity = 100 else: total_loss = 0. total_outputs = [] self.model.eval() stateful = False subsample = None #sampler = None gen = DataLoader(dataset, batch_size=self.batch_size, shuffle=False, num_workers=0, pin_memory=False) N = 0. last_state = None mix_period = 20 for bi, batch in enumerate(gen): if subsample is not None and np.random.rand() > subsample: continue X = Variable(batch['X']) batch_size = len(X) N += batch_size y = Variable(batch['y']) if type(self.loss_function) == MyMultiClassLoss: y = y.long() if len(y.shape) == 2: y = y[:, -1].contiguous() y = y.view(-1, 1) Z = Variable(batch['Z']) if len(Z.shape) == 2: Z = Z[:, -1].contiguous() Z = Z.view(-1, 1) exist_L = 'L' in batch if exist_L: # lengths #L = Variable(batch['L']) Lint = batch['L'].numpy() return_last2 = return_last return_last = False if use_gpu: X = X.cuda() y = y.cuda() Z = Z.cuda() #if exist_L: # L = L.cuda() if train_or_eval == 'train': self.optimizer.zero_grad() #if batch_size<4 and type(self.model)==nn.DataParallel: # this is a bug in nn.DataParallel if batch_size<n_gpu # outputs = self.model(th.cat([X,X,X,X], dim=0), return_last=return_last, return_ordinal_z=return_ordinal_z) # outputs = [outputs[iii][:batch_size] for iii in range(len(outputs))] #else: outputs = self.model(X, initial_state=last_state, return_last=return_last, return_ordinal_z=return_ordinal_z) if exist_L: return_last = return_last2 #if not return_last: # mix_period = min(50, dataset.X.shape[1]//5) # only count time steps after mix_period if stateful: if (bi + 1) % dataset.shorten_amount == 0: last_state = None else: if type(outputs[-1]) == tuple: last_state = tuple([xx.detach() for xx in outputs[-1]]) else: last_state = outputs[-1].detach() #if bi%dataset.shorten_amount!=0: # mix_period = 0 # decide y, output, Z (weight) for computing loss if outputs[0] is not None: if exist_L: nonzero_ids = np.where(Lint > 0)[0] #th.nonzero(L)[:,0] if len(nonzero_ids) == 0: continue y_loss = y[ nonzero_ids] #th.index_select(y, 0, nonzero_ids) Z_loss = Z[nonzero_ids] Lint = Lint[nonzero_ids].tolist() output_loss = outputs[0][nonzero_ids] ll = len(y_loss) if return_last: output_loss = th.cat([ output_loss[iii, Lint[iii] - 1].unsqueeze(0) for iii in range(ll) ], dim=0) #.view(-1,1) else: #y_loss = th.cat([y_loss[iii].expand(Lint[iii]-mix_period) for iii in range(ll)], dim=0).view(-1,1) output_loss = th.cat([ output_loss[iii, mix_period:Lint[iii]].mean( 0).unsqueeze(0) for iii in range(ll) ], dim=0) #Z_loss = th.cat([Z_loss[iii].expand(Lint[iii]-mix_period) for iii in range(ll)], dim=0).view(-1,1) else: if return_last: y_loss = y output_loss = outputs[0] Z_loss = Z else: y_loss = y #.expand(batch_size,outputs[0].shape[1]-mix_period).contiguous().view(-1,1) output_loss = outputs[0][:, mix_period:].mean( dim=1 ) #.contiguous(); output_loss=output_loss.view(-1,output_loss.shape[-1]) Z_loss = Z #.expand(batch_size,outputs[0].shape[1]-mix_period).contiguous().view(-1,1) # this is for autoencoder if self.loss_function == 'mse' and len(output_loss.shape) > 2: y_loss = X Z_loss = 1. if train_or_eval == 'train': loss = self.get_per_sample_loss(y_loss, output_loss) #if len(loss.shape)>1 and loss.shape[1]>1: # multiple labels # loss = loss*Z[:,:-1] # loss = loss.sum(dim=1) #else: loss = loss * Z_loss loss = th.mean(loss) + self.get_weight_loss() running_loss += float(loss.data.cpu().numpy()) loss.backward() if self.clip_weight > 0: nn.utils.clip_grad_norm(self.model.parameters(), self.clip_weight) self.optimizer.step() if bi % verbosity == verbosity - 1: #print('\n'.join(['%s\t%f'%(wn,w.grad.data.max()-w.grad.data.min()) for wn,w in self.model.named_parameters() if w.requires_grad and w.grad is not None])) #print('\n'.join(['%s\t%f'%(wn,th.mean(th.abs(w.grad.data))) for wn,w in self.model.named_parameters() if w.requires_grad and w.grad is not None])) print('[%d, %d %s] loss: %g' % (epoch + 1, bi + 1, datetime.datetime.now(), running_loss / verbosity)) running_loss = 0. else: if evaluate_loss: loss = self.get_per_sample_loss( y_loss, th.log(output_loss)) #, matching_features=F) #if len(loss.shape)>1 and loss.shape[1]>1: # loss = loss*Z[:,:-1] # multiple labels #else: loss = loss * Z_loss if not return_last: loss = loss * batch_size / len(y_loss) loss = th.sum(loss) + self.get_weight_loss() total_loss += float(loss.data.cpu().numpy()) outputs2 = [] for ii in range(len(outputs)): if outputs[ii] is None or type(outputs[ii]) == tuple: outputs2.append([]) else: outputs2.append(var_to_np(outputs[ii])) total_outputs.append(outputs2) del outputs if train_or_eval != 'train': if N == 0: N = 1 return total_loss / N, total_outputs
config.max_epochs, config.cuda, ) end_time = time.time() run_time = end_time - start_time log.info("Experiment runtime: {:.2f} sec".format(run_time)) # In case you want to recompute predictions for further analysis: exp.model.eval() for setname in ('train', 'valid', 'test'): log.info("Compute predictions for {:s}...".format(setname)) dataset = exp.datasets[setname] if config.cuda: preds_per_batch = [ var_to_np(exp.model(np_to_var(b[0]).cuda())) for b in exp.iterator.get_batches(dataset, shuffle=False) ] else: preds_per_batch = [ var_to_np(exp.model(np_to_var(b[0]))) for b in exp.iterator.get_batches(dataset, shuffle=False) ] preds_per_trial = compute_preds_per_trial( preds_per_batch, dataset, input_time_length=exp.iterator.input_time_length, n_stride=exp.iterator.n_preds_per_input) mean_preds_per_trial = [ np.mean(preds, axis=(0, 2)) for preds in preds_per_trial ]
def fit_transform_2(model, optimizer, train_data, y_train, test_data, y_test, num_epochs=20, n_channels=22, input_time_length=500): train_set = SignalAndTarget(train_data, y=y_train) test_set = SignalAndTarget(test_data, y=y_test) # # # # # # # # CREATE CROPPED ITERATOR # # # # # # # # # # determine output size test_input = np_to_var( np.ones((2, n_channels, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] iterator = CropsFromTrialsIterator(batch_size=32, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) accuracy_out = [] min_loss = 1000 for i_epoch in range(num_epochs): # Set model to training mode model.train() for batch_X, batch_y in iterator.get_batches(train_set, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() # print(batch_y) net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() # Remove gradients of last backward pass from all parameters optimizer.zero_grad() outputs = model(net_in) # Mean predictions across trial # Note that this will give identical gradients to computing # a per-prediction loss (at least for the combination of log softmax activation # and negative log likelihood loss which we are using here) outputs = th.mean(outputs, dim=2, keepdim=False) loss = F.nll_loss(outputs, net_target) loss.backward() optimizer.step() # Print some statistics each epoch model.eval() # print("Epoch {:d}".format(i_epoch)) for setname, dataset in (('Train', train_set), ('Test', test_set)): # Collect all predictions and losses all_preds = [] all_losses = [] batch_sizes = [] for batch_X, batch_y in iterator.get_batches(dataset, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() outputs = model(net_in) all_preds.append(var_to_np(outputs)) outputs = th.mean(outputs, dim=2, keepdim=False) loss = F.nll_loss(outputs, net_target) loss = float(var_to_np(loss)) all_losses.append(loss) batch_sizes.append(len(batch_X)) # Compute mean per-input loss loss = np.mean( np.array(all_losses) * np.array(batch_sizes) / np.mean(batch_sizes)) # print("{:6s} Loss: {:.5f}".format(setname, loss)) # Assign the predictions to the trials preds_per_trial = compute_preds_per_trial_from_crops( all_preds, input_time_length, dataset.X) # preds per trial are now trials x classes x timesteps/predictions # Now mean across timesteps for each trial to get per-trial predictions meaned_preds_per_trial = np.array( [np.mean(p, axis=1) for p in preds_per_trial]) predicted_labels = np.argmax(meaned_preds_per_trial, axis=1) accuracy = np.mean(predicted_labels == dataset.y) # print("{:6s} Accuracy: {:.2f}%".format(setname, accuracy * 100)) if setname == 'Test': accuracy_out.append(accuracy) if loss < min_loss: min_loss = loss elif loss > min_loss * 1.1: print("Training Stopping") return model, np.asarray(accuracy_out) return model, np.asarray(accuracy_out)
def test_trialwise_decoding(): # 5,6,7,10,13,14 are codes for executed and imagined hands/feet subject_id = 1 event_codes = [5, 6, 9, 10, 13, 14] # event_codes = [6] # This will download the files if you don't have them yet, # and then return the paths to the files. physionet_paths = mne.datasets.eegbci.load_data(subject_id, event_codes) # Load each of the files parts = [ mne.io.read_raw_edf(path, preload=True, stim_channel='auto', verbose='WARNING') for path in physionet_paths ] # Concatenate them raw = concatenate_raws(parts) # Find the events in this dataset # events = mne.find_events(raw, shortest_event=0, stim_channel='STI 014') events, _ = mne.events_from_annotations(raw) # Extract trials, only using EEG channels eeg_channel_inds = mne.pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False, exclude='bads') # Extract trials, only using EEG channels epoched = mne.Epochs(raw, events, dict(hands=2, feet=3), tmin=1, tmax=4.1, proj=False, picks=eeg_channel_inds, baseline=None, preload=True) # Convert data from volt to millivolt # Pytorch expects float32 for input and int64 for labels. # X:[90,64,497] X = (epoched.get_data() * 1e6).astype(np.float32) # y:[90] y = (epoched.events[:, 2] - 2).astype(np.int64) # 2,3 -> 0,1 # X_train:[60,64,497], y_train:[60] train_set = SignalAndTarget(X[:60], y=y[:60]) # X_test:[30,64,497], y_test:[30] test_set = SignalAndTarget(X[60:], y=y[60:]) # Set if you want to use GPU # You can also use torch.cuda.is_available() to determine if cuda is available on your machine. cuda = False set_random_seeds(seed=20170629, cuda=cuda) n_classes = 2 in_chans = train_set.X.shape[1] # final_conv_length = auto ensures we only get a single output in the time dimension # def __init__(self, in_chans=64, n_classes=2, input_time_length=497, n_filters_time=40, filter_time_length=25, n_filters_spat=40, pool_time_length=75, pool_time_stride=15, final_conv_length='auto, conv_nonlin=square, pool_mode="mean", pool_nonlin=safe_log, split_first_layer=True, batch_norm=True, batch_norm_alpha=0.1, drop_prob=0.5, ): # 感觉create_network()就是__init__的一部分, 现在改成用self.model调用了, 还是感觉不优雅, 主要是forward集成在nn.Sequential里面了 # 然后这个model的实际__init__不是ShallowFBCSPNet, 而是nn.Sequential, 感觉我更喜欢原来的定义方式, 这种方式看不到中间输出 # model = ShallowFBCSPNet(in_chans=in_chans, n_classes=n_classes, input_time_length=train_set.X.shape[2], final_conv_length='auto').create_network() #原来的 model = ShallowFBCSPNet(in_chans=in_chans, n_classes=n_classes, input_time_length=train_set.X.shape[2], final_conv_length='auto').model if cuda: model.cuda() optimizer = optim.Adam(model.parameters()) rng = RandomState((2017, 6, 30)) losses = [] accuracies = [] for i_epoch in range(6): i_trials_in_batch = get_balanced_batches(len(train_set.X), rng, shuffle=True, batch_size=10) # Set model to training mode model.train() for i_trials in i_trials_in_batch: # Have to add empty fourth dimension to X batch_X = train_set.X[i_trials][:, :, :, None] batch_y = train_set.y[i_trials] net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() # Remove gradients of last backward pass from all parameters optimizer.zero_grad() # Compute outputs of the network #net_in: [10, 64, 497, 1]=[bsz, H_im, W_im, C_im] # outputs = model.forward(net_in) # model=Sequential( # (dimshuffle): Expression(expression=_transpose_time_to_spat) # (conv_time): Conv2d(1, 40, kernel_size=(25, 1), stride=(1, 1)) # (conv_spat): Conv2d(40, 40, kernel_size=(1, 64), stride=(1, 1), bias=False) # (bnorm): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) # (conv_nonlin): Expression(expression=square) # (pool): AvgPool2d(kernel_size=(75, 1), stride=(15, 1), padding=0) # (pool_nonlin): Expression(expression=safe_log) # (drop): Dropout(p=0.5) # (conv_classifier): Conv2d(40, 2, kernel_size=(27, 1), stride=(1, 1)) # (softmax): LogSoftmax() # (squeeze): Expression(expression=_squeeze_final_output) # ) # Compute the loss loss = F.nll_loss(outputs, net_target) # Do the backpropagation loss.backward() # Update parameters with the optimizer optimizer.step() # Print some statistics each epoch model.eval() print("Epoch {:d}".format(i_epoch)) for setname, dataset in (('Train', train_set), ('Test', test_set)): # Here, we will use the entire dataset at once, which is still possible # for such smaller datasets. Otherwise we would have to use batches. net_in = np_to_var(dataset.X[:, :, :, None]) if cuda: net_in = net_in.cuda() net_target = np_to_var(dataset.y) if cuda: net_target = net_target.cuda() outputs = model(net_in) loss = F.nll_loss(outputs, net_target) losses.append(float(var_to_np(loss))) print("{:6s} Loss: {:.5f}".format(setname, float(var_to_np(loss)))) predicted_labels = np.argmax(var_to_np(outputs), axis=1) accuracy = np.mean(dataset.y == predicted_labels) accuracies.append(accuracy * 100) print("{:6s} Accuracy: {:.1f}%".format(setname, accuracy * 100)) np.testing.assert_allclose(np.array(losses), np.array([ 1.1775966882705688, 1.2602351903915405, 0.7068756818771362, 0.9367912411689758, 0.394258975982666, 0.6598362326622009, 0.3359280526638031, 0.656258761882782, 0.2790488004684448, 0.6104397177696228, 0.27319177985191345, 0.5949864983558655 ]), rtol=1e-4, atol=1e-5) np.testing.assert_allclose(np.array(accuracies), np.array([ 51.666666666666671, 53.333333333333336, 63.333333333333329, 56.666666666666664, 86.666666666666671, 66.666666666666657, 90.0, 63.333333333333329, 96.666666666666671, 56.666666666666664, 96.666666666666671, 66.666666666666657 ]), rtol=1e-4, atol=1e-5)
def test_cropped_decoding(): import mne from mne.io import concatenate_raws # 5,6,7,10,13,14 are codes for executed and imagined hands/feet subject_id = 1 event_codes = [5, 6, 9, 10, 13, 14] # This will download the files if you don't have them yet, # and then return the paths to the files. physionet_paths = mne.datasets.eegbci.load_data(subject_id, event_codes) # Load each of the files parts = [mne.io.read_raw_edf(path, preload=True, stim_channel='auto', verbose='WARNING') for path in physionet_paths] # Concatenate them raw = concatenate_raws(parts) # Find the events in this dataset events = mne.find_events(raw, shortest_event=0, stim_channel='STI 014') # Use only EEG channels eeg_channel_inds = mne.pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False, exclude='bads') # Extract trials, only using EEG channels epoched = mne.Epochs(raw, events, dict(hands=2, feet=3), tmin=1, tmax=4.1, proj=False, picks=eeg_channel_inds, baseline=None, preload=True) import numpy as np from braindecode.datautil.signal_target import SignalAndTarget # Convert data from volt to millivolt # Pytorch expects float32 for input and int64 for labels. X = (epoched.get_data() * 1e6).astype(np.float32) y = (epoched.events[:, 2] - 2).astype(np.int64) # 2,3 -> 0,1 train_set = SignalAndTarget(X[:60], y=y[:60]) test_set = SignalAndTarget(X[60:], y=y[60:]) from braindecode.models.shallow_fbcsp import ShallowFBCSPNet from torch import nn from braindecode.torch_ext.util import set_random_seeds from braindecode.models.util import to_dense_prediction_model # Set if you want to use GPU # You can also use torch.cuda.is_available() to determine if cuda is available on your machine. cuda = False set_random_seeds(seed=20170629, cuda=cuda) # This will determine how many crops are processed in parallel input_time_length = 450 n_classes = 2 in_chans = train_set.X.shape[1] # final_conv_length determines the size of the receptive field of the ConvNet model = ShallowFBCSPNet(in_chans=in_chans, n_classes=n_classes, input_time_length=input_time_length, final_conv_length=12).create_network() to_dense_prediction_model(model) if cuda: model.cuda() from torch import optim optimizer = optim.Adam(model.parameters()) from braindecode.torch_ext.util import np_to_var # determine output size test_input = np_to_var( np.ones((2, in_chans, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] print("{:d} predictions per input/trial".format(n_preds_per_input)) from braindecode.datautil.iterators import CropsFromTrialsIterator iterator = CropsFromTrialsIterator(batch_size=32, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) from braindecode.torch_ext.util import np_to_var, var_to_np import torch.nn.functional as F from numpy.random import RandomState import torch as th from braindecode.experiments.monitors import compute_preds_per_trial_from_crops rng = RandomState((2017, 6, 30)) losses = [] accuracies = [] for i_epoch in range(4): # Set model to training mode model.train() for batch_X, batch_y in iterator.get_batches(train_set, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() # Remove gradients of last backward pass from all parameters optimizer.zero_grad() outputs = model(net_in) # Mean predictions across trial # Note that this will give identical gradients to computing # a per-prediction loss (at least for the combination of log softmax activation # and negative log likelihood loss which we are using here) outputs = th.mean(outputs, dim=2, keepdim=False) loss = F.nll_loss(outputs, net_target) loss.backward() optimizer.step() # Print some statistics each epoch model.eval() print("Epoch {:d}".format(i_epoch)) for setname, dataset in (('Train', train_set), ('Test', test_set)): # Collect all predictions and losses all_preds = [] all_losses = [] batch_sizes = [] for batch_X, batch_y in iterator.get_batches(dataset, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() outputs = model(net_in) all_preds.append(var_to_np(outputs)) outputs = th.mean(outputs, dim=2, keepdim=False) loss = F.nll_loss(outputs, net_target) loss = float(var_to_np(loss)) all_losses.append(loss) batch_sizes.append(len(batch_X)) # Compute mean per-input loss loss = np.mean(np.array(all_losses) * np.array(batch_sizes) / np.mean(batch_sizes)) print("{:6s} Loss: {:.5f}".format(setname, loss)) losses.append(loss) # Assign the predictions to the trials preds_per_trial = compute_preds_per_trial_from_crops(all_preds, input_time_length, dataset.X) # preds per trial are now trials x classes x timesteps/predictions # Now mean across timesteps for each trial to get per-trial predictions meaned_preds_per_trial = np.array( [np.mean(p, axis=1) for p in preds_per_trial]) predicted_labels = np.argmax(meaned_preds_per_trial, axis=1) accuracy = np.mean(predicted_labels == dataset.y) accuracies.append(accuracy * 100) print("{:6s} Accuracy: {:.1f}%".format( setname, accuracy * 100)) np.testing.assert_allclose( np.array(losses), np.array([1.703004002571106, 1.6295261979103088, 0.71168938279151917, 0.70825588703155518, 0.58231228590011597, 0.60176041722297668, 0.46629951894283295, 0.51184913516044617]), rtol=1e-4, atol=1e-5) np.testing.assert_allclose( np.array(accuracies), np.array( [50.0, 46.666666666666664, 60.0, 53.333333333333336, 68.333333333333329, 66.666666666666657, 88.333333333333329, 83.333333333333343]), rtol=1e-4, atol=1e-5)
def runModel(mode): cudnn.benchmark = True start = time.time() #mode = str(sys.argv[1]) #X,y,test_X,test_y = loadSubNormData(mode='all') #X,y,test_X,test_y = loadNEDCdata(mode=mode) #data = np.load('sessionsData/data%s-sessions.npy'%mode[:3]) #labels = np.load('sessionsData/labels%s-sessions.npy'%mode[:3]) data = np.load('data%s.npy' % mode[:3]) labels = np.load('labels%s.npy' % mode[:3]) X, y, test_X, test_y = splitDataRandom_Loaded(data, labels, mode) print('Mode - %s Total n: %d, Test n: %d' % (mode, len(y) + len(test_y), len(test_y))) #return 0 #X = addDataNoise(X,band=[1,4]) #test_X = addDataNoise(test_X,band=[1,4]) max_shape = np.max([list(x.shape) for x in X], axis=0) assert max_shape[1] == int(config.duration_recording_mins * config.sampling_freq * 60) n_classes = 2 n_recordings = None # set to an integer, if you want to restrict the set size sensor_types = ["EEG"] n_chans = 19 #21 max_recording_mins = 35 # exclude larger recordings from training set sec_to_cut = 60 # cut away at start of each recording duration_recording_mins = 5 #20 # how many minutes to use per recording test_recording_mins = 5 #20 max_abs_val = 800 # for clipping sampling_freq = 100 divisor = 10 # divide signal by this test_on_eval = True # teston evaluation set or on training set # in case of test on eval, n_folds and i_testfold determine # validation fold in training set for training until first stop n_folds = 10 i_test_fold = 9 shuffle = True model_name = 'linear' #'deep'#'shallow' 'linear' n_start_chans = 25 n_chan_factor = 2 # relevant for deep model only input_time_length = 6000 final_conv_length = 1 model_constraint = 'defaultnorm' init_lr = 1e-3 batch_size = 64 max_epochs = 35 # until first stop, the continue train on train+valid cuda = True # False if model_name == 'shallow': model = ShallowFBCSPNet( in_chans=n_chans, n_classes=n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, final_conv_length=final_conv_length).create_network() elif model_name == 'deep': model = Deep4Net(n_chans, n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, n_filters_2=int(n_start_chans * n_chan_factor), n_filters_3=int(n_start_chans * (n_chan_factor**2.0)), n_filters_4=int(n_start_chans * (n_chan_factor**3.0)), final_conv_length=final_conv_length, stride_before_pool=True).create_network() elif (model_name == 'deep_smac'): if model_name == 'deep_smac': do_batch_norm = False else: assert model_name == 'deep_smac_bnorm' do_batch_norm = True double_time_convs = False drop_prob = 0.244445 filter_length_2 = 12 filter_length_3 = 14 filter_length_4 = 12 filter_time_length = 21 final_conv_length = 1 first_nonlin = elu first_pool_mode = 'mean' first_pool_nonlin = identity later_nonlin = elu later_pool_mode = 'mean' later_pool_nonlin = identity n_filters_factor = 1.679066 n_filters_start = 32 pool_time_length = 1 pool_time_stride = 2 split_first_layer = True n_chan_factor = n_filters_factor n_start_chans = n_filters_start model = Deep4Net(n_chans, n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, n_filters_2=int(n_start_chans * n_chan_factor), n_filters_3=int(n_start_chans * (n_chan_factor**2.0)), n_filters_4=int(n_start_chans * (n_chan_factor**3.0)), final_conv_length=final_conv_length, batch_norm=do_batch_norm, double_time_convs=double_time_convs, drop_prob=drop_prob, filter_length_2=filter_length_2, filter_length_3=filter_length_3, filter_length_4=filter_length_4, filter_time_length=filter_time_length, first_nonlin=first_nonlin, first_pool_mode=first_pool_mode, first_pool_nonlin=first_pool_nonlin, later_nonlin=later_nonlin, later_pool_mode=later_pool_mode, later_pool_nonlin=later_pool_nonlin, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, split_first_layer=split_first_layer, stride_before_pool=True).create_network() elif model_name == 'shallow_smac': conv_nonlin = identity do_batch_norm = True drop_prob = 0.328794 filter_time_length = 56 final_conv_length = 22 n_filters_spat = 73 n_filters_time = 24 pool_mode = 'max' pool_nonlin = identity pool_time_length = 84 pool_time_stride = 3 split_first_layer = True model = ShallowFBCSPNet( in_chans=n_chans, n_classes=n_classes, n_filters_time=n_filters_time, n_filters_spat=n_filters_spat, input_time_length=input_time_length, final_conv_length=final_conv_length, conv_nonlin=conv_nonlin, batch_norm=do_batch_norm, drop_prob=drop_prob, filter_time_length=filter_time_length, pool_mode=pool_mode, pool_nonlin=pool_nonlin, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, split_first_layer=split_first_layer, ).create_network() elif model_name == 'linear': model = nn.Sequential() model.add_module("conv_classifier", nn.Conv2d(n_chans, n_classes, (600, 1))) model.add_module('softmax', nn.LogSoftmax(dim=1)) model.add_module('squeeze', Expression(lambda x: x.squeeze(3))) else: assert False, "unknown model name {:s}".format(model_name) to_dense_prediction_model(model) if config.cuda: model.cuda() test_input = np_to_var( np.ones((2, config.n_chans, config.input_time_length, 1), dtype=np.float32)) if config.cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] iterator = CropsFromTrialsIterator( batch_size=config.batch_size, input_time_length=config.input_time_length, n_preds_per_input=n_preds_per_input) #model.add_module('softmax', nn.LogSoftmax(dim=1)) model.eval() mode[2] = str(mode[2]) mode[3] = str(mode[3]) modelName = '-'.join(mode[:4]) #params = th.load('sessionsData/%sModel%s-sessions.pt'%(modelName,mode[4])) #params = th.load('%sModel%s.pt'%(modelName,mode[4])) params = th.load('linear/%sModel%s.pt' % (modelName, mode[4])) model.load_state_dict(params) if config.test_on_eval: #test_X, test_y = test_dataset.load() #test_X, test_y = loadNEDCdata(mode='eval') max_shape = np.max([list(x.shape) for x in test_X], axis=0) assert max_shape[1] == int(config.test_recording_mins * config.sampling_freq * 60) if not config.test_on_eval: splitter = TrainValidTestSplitter(config.n_folds, config.i_test_fold, shuffle=config.shuffle) train_set, valid_set, test_set = splitter.split(X, y) else: splitter = TrainValidSplitter(config.n_folds, i_valid_fold=config.i_test_fold, shuffle=config.shuffle) train_set, valid_set = splitter.split(X, y) test_set = SignalAndTarget(test_X, test_y) del test_X, test_y del X, y # shouldn't be necessary, but just to make sure datasets = OrderedDict( (('train', train_set), ('valid', valid_set), ('test', test_set))) for setname in ('train', 'valid', 'test'): #setname = 'test' #print("Compute predictions for {:s}...".format(setname)) dataset = datasets[setname] if config.cuda: preds_per_batch = [ var_to_np(model(np_to_var(b[0]).cuda())) for b in iterator.get_batches(dataset, shuffle=False) ] else: preds_per_batch = [ var_to_np(model(np_to_var(b[0]))) for b in iterator.get_batches(dataset, shuffle=False) ] preds_per_trial = compute_preds_per_trial( preds_per_batch, dataset, input_time_length=iterator.input_time_length, n_stride=iterator.n_preds_per_input) mean_preds_per_trial = [ np.mean(preds, axis=(0, 2)) for preds in preds_per_trial ] mean_preds_per_trial = np.array(mean_preds_per_trial) all_pred_labels = np.argmax(mean_preds_per_trial, axis=1).squeeze() all_target_labels = dataset.y acc_per_class = [] for i_class in range(n_classes): mask = all_target_labels == i_class acc = np.mean(all_pred_labels[mask] == all_target_labels[mask]) acc_per_class.append(acc) misclass = 1 - np.mean(acc_per_class) #print('Acc:{}, Class 0:{}, Class 1:{}'.format(np.mean(acc_per_class),acc_per_class[0],acc_per_class[1])) if setname == 'test': testResult = np.mean(acc_per_class) return testResult
def fit_transform(model, optimizer, data, labels, num_epochs=10, n_channels=22, input_time_length=500): # # # # # # # # CREATE CROPPED ITERATOR # # # # # # # # # train_set = SignalAndTarget(data, y=labels) # determine output size test_input = np_to_var( np.ones((2, n_channels, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] # print("{:d} predictions per input/trial".format(n_preds_per_input)) iterator = CropsFromTrialsIterator(batch_size=32, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) # # # # # # # # TRAINING LOOP # # # # # # # for i_epoch in range(num_epochs): # Set model to training mode model.train() for batch_X, batch_y in iterator.get_batches(train_set, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() # Remove gradients of last backward pass from all parameters optimizer.zero_grad() outputs = model(net_in) outputs = th.mean(outputs, dim=2, keepdim=False) loss = F.nll_loss(outputs, net_target) loss.backward() optimizer.step() model.eval() # print("Epoch {:d}".format(i_epoch)) # Collect all predictions and losses all_preds = [] all_losses = [] batch_sizes = [] for batch_X, batch_y in iterator.get_batches(train_set, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() outputs = model(net_in) all_preds.append(var_to_np(outputs)) outputs = th.mean(outputs, dim=2, keepdim=False) loss = F.nll_loss(outputs, net_target) loss = float(var_to_np(loss)) all_losses.append(loss) batch_sizes.append(len(batch_X)) # Compute mean per-input loss loss = np.mean( np.array(all_losses) * np.array(batch_sizes) / np.mean(batch_sizes)) # print("{:6s} Loss: {:.5f}".format('Train', loss)) # Assign the predictions to the trials preds_per_trial = compute_preds_per_trial_from_crops( all_preds, input_time_length, train_set) # preds per trial are now trials x classes x timesteps/predictions # Now mean across timesteps for each trial to get per-trial predictions meaned_preds_per_trial = np.array( [np.mean(p, axis=1) for p in preds_per_trial]) predicted_labels = np.argmax(meaned_preds_per_trial, axis=1) accuracy = np.mean(predicted_labels == train_set.y) # print("{:6s} Accuracy: {:.2f}%".format('Train', accuracy * 100)) return model
model.eval() print("Epoch {:d}".format(i_epoch)) for setname, dataset in (('Train', train_set), ('Test', test_set)): # Collect all predictions and losses all_preds = [] all_losses = [] batch_sizes = [] for batch_X, batch_y in iterator.get_batches(dataset, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() outputs = model(net_in) all_preds.append(var_to_np(outputs)) outputs = th.mean(outputs, dim=2, keepdim=False) loss = F.nll_loss(outputs, net_target) loss = float(var_to_np(loss)) all_losses.append(loss) batch_sizes.append(len(batch_X)) # Compute mean per-input loss loss = np.mean( np.array(all_losses) * np.array(batch_sizes) / np.mean(batch_sizes)) print("{:6s} Loss: {:.5f}".format(setname, loss)) # Assign the predictions to the trials preds_per_trial = compute_preds_per_trial_for_set( all_preds, input_time_length, dataset) # preds per trial are now trials x classes x timesteps/predictions # Now mean across timesteps for each trial to get per-trial predictions