def new_model_from_structure_pytorch(layer_collection, applyFix=False, check_model=False): model = nn.Sequential() if global_vars.get('channel_dim') != 'channels' or global_vars.get( 'exp_type') == 'target': model.add_module('dimshuffle', _transpose(shape=[0, 3, 2, 1])) if global_vars.get('time_factor') != -1: model.add_module('stack_by_time', Expression(_stack_input_by_time)) activations = {'elu': nn.ELU, 'softmax': nn.Softmax, 'sigmoid': nn.Sigmoid} input_shape = (2, global_vars.get('eeg_chans'), global_vars.get('input_time_len'), 1) for i in range(len(layer_collection)): layer = layer_collection[i] if i > 0: out = model.forward( np_to_var(np.ones(input_shape, dtype=np.float32))) prev_channels = out.cpu().data.numpy().shape[1] prev_time = out.cpu().data.numpy().shape[2] prev_eeg_channels = out.cpu().data.numpy().shape[3] else: prev_eeg_channels = global_vars.get('eeg_chans') prev_time = global_vars.get('input_time_len') prev_channels = 1 if global_vars.get('channel_dim') == 'channels': prev_channels = global_vars.get('eeg_chans') prev_eeg_channels = 1 if isinstance(layer, PoolingLayer): while applyFix and (prev_time - layer.pool_time) / layer.stride_time < 1: if random.uniform(0, 1) < 0.5 and layer.pool_time > 1: layer.pool_time -= 1 elif layer.stride_time > 1: layer.stride_time -= 1 if layer.pool_time == 1 and layer.stride_time == 1: break if global_vars.get('channel_dim') == 'channels': layer.pool_eeg_chan = 1 model.add_module( '%s_%d' % (type(layer).__name__, i), nn.MaxPool2d(kernel_size=(int(layer.pool_time), int(layer.pool_eeg_chan)), stride=(int(layer.stride_time), 1))) elif isinstance(layer, ConvLayer): if layer.kernel_time == 'down_to_one' or i >= global_vars.get( 'num_layers'): layer.kernel_time = prev_time layer.kernel_eeg_chan = prev_eeg_channels conv_name = 'conv_classifier' else: conv_name = '%s_%d' % (type(layer).__name__, i) if applyFix and layer.kernel_eeg_chan > prev_eeg_channels: layer.kernel_eeg_chan = prev_eeg_channels if applyFix and layer.kernel_time > prev_time: layer.kernel_time = prev_time if global_vars.get('channel_dim') == 'channels': layer.kernel_eeg_chan = 1 model.add_module( conv_name, nn.Conv2d(prev_channels, layer.filter_num, (layer.kernel_time, layer.kernel_eeg_chan), stride=1)) elif isinstance(layer, BatchNormLayer): model.add_module( '%s_%d' % (type(layer).__name__, i), nn.BatchNorm2d(prev_channels, momentum=global_vars.get('batch_norm_alpha'), affine=True, eps=1e-5), ) elif isinstance(layer, ActivationLayer): model.add_module('%s_%d' % (layer.activation_type, i), activations[layer.activation_type]()) elif isinstance(layer, DropoutLayer): model.add_module('%s_%d' % (type(layer).__name__, i), nn.Dropout(p=global_vars.get('dropout_p'))) elif isinstance(layer, IdentityLayer): model.add_module('%s_%d' % (type(layer).__name__, i), IdentityModule()) elif isinstance(layer, FlattenLayer): model.add_module('squeeze', _squeeze_final_output()) if applyFix: return layer_collection if check_model: return init.xavier_uniform_(list(model._modules.items())[-3][1].weight, gain=1) init.constant_(list(model._modules.items())[-3][1].bias, 0) return model
def run_exp( data_folders, n_recordings, sensor_types, n_chans, max_recording_mins, sec_to_cut, duration_recording_mins, test_recording_mins, max_abs_val, sampling_freq, divisor, test_on_eval, n_folds, i_test_fold, shuffle, model_name, n_start_chans, n_chan_factor, input_time_length, final_conv_length, model_constraint, init_lr, batch_size, max_epochs, cuda, ): import torch.backends.cudnn as cudnn cudnn.benchmark = True preproc_functions = [] preproc_functions.append(lambda data, fs: ( data[:, int(sec_to_cut * fs):-int(sec_to_cut * fs)], fs)) preproc_functions.append(lambda data, fs: (data[:, :int( duration_recording_mins * 60 * fs)], fs)) if max_abs_val is not None: preproc_functions.append( lambda data, fs: (np.clip(data, -max_abs_val, max_abs_val), fs)) preproc_functions.append(lambda data, fs: (resampy.resample( data, fs, sampling_freq, axis=1, filter='kaiser_fast'), sampling_freq)) if divisor is not None: preproc_functions.append(lambda data, fs: (data / divisor, fs)) dataset = DiagnosisSet(n_recordings=n_recordings, max_recording_mins=max_recording_mins, preproc_functions=preproc_functions, data_folders=data_folders, train_or_eval='train', sensor_types=sensor_types) if test_on_eval: if test_recording_mins is None: test_recording_mins = duration_recording_mins test_preproc_functions = copy(preproc_functions) test_preproc_functions[1] = lambda data, fs: (data[:, :int( test_recording_mins * 60 * fs)], fs) test_dataset = DiagnosisSet(n_recordings=n_recordings, max_recording_mins=None, preproc_functions=test_preproc_functions, data_folders=data_folders, train_or_eval='eval', sensor_types=sensor_types) X, y = dataset.load() max_shape = np.max([list(x.shape) for x in X], axis=0) assert max_shape[1] == int(duration_recording_mins * sampling_freq * 60) if test_on_eval: test_X, test_y = test_dataset.load() max_shape = np.max([list(x.shape) for x in test_X], axis=0) assert max_shape[1] == int(test_recording_mins * sampling_freq * 60) if not test_on_eval: splitter = TrainValidTestSplitter(n_folds, i_test_fold, shuffle=shuffle) train_set, valid_set, test_set = splitter.split(X, y) else: splitter = TrainValidSplitter(n_folds, i_valid_fold=i_test_fold, shuffle=shuffle) train_set, valid_set = splitter.split(X, y) test_set = SignalAndTarget(test_X, test_y) del test_X, test_y del X, y # shouldn't be necessary, but just to make sure set_random_seeds(seed=20170629, cuda=cuda) n_classes = 2 if model_name == 'shallow': model = ShallowFBCSPNet( in_chans=n_chans, n_classes=n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, final_conv_length=final_conv_length).create_network() elif model_name == 'deep': model = Deep4Net(n_chans, n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, n_filters_2=int(n_start_chans * n_chan_factor), n_filters_3=int(n_start_chans * (n_chan_factor**2.0)), n_filters_4=int(n_start_chans * (n_chan_factor**3.0)), final_conv_length=final_conv_length, stride_before_pool=True).create_network() elif (model_name == 'deep_smac'): if model_name == 'deep_smac': do_batch_norm = False else: assert model_name == 'deep_smac_bnorm' do_batch_norm = True double_time_convs = False drop_prob = 0.244445 filter_length_2 = 12 filter_length_3 = 14 filter_length_4 = 12 filter_time_length = 21 final_conv_length = 1 first_nonlin = elu first_pool_mode = 'mean' first_pool_nonlin = identity later_nonlin = elu later_pool_mode = 'mean' later_pool_nonlin = identity n_filters_factor = 1.679066 n_filters_start = 32 pool_time_length = 1 pool_time_stride = 2 split_first_layer = True n_chan_factor = n_filters_factor n_start_chans = n_filters_start model = Deep4Net(n_chans, n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, n_filters_2=int(n_start_chans * n_chan_factor), n_filters_3=int(n_start_chans * (n_chan_factor**2.0)), n_filters_4=int(n_start_chans * (n_chan_factor**3.0)), final_conv_length=final_conv_length, batch_norm=do_batch_norm, double_time_convs=double_time_convs, drop_prob=drop_prob, filter_length_2=filter_length_2, filter_length_3=filter_length_3, filter_length_4=filter_length_4, filter_time_length=filter_time_length, first_nonlin=first_nonlin, first_pool_mode=first_pool_mode, first_pool_nonlin=first_pool_nonlin, later_nonlin=later_nonlin, later_pool_mode=later_pool_mode, later_pool_nonlin=later_pool_nonlin, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, split_first_layer=split_first_layer, stride_before_pool=True).create_network() elif model_name == 'shallow_smac': conv_nonlin = identity do_batch_norm = True drop_prob = 0.328794 filter_time_length = 56 final_conv_length = 22 n_filters_spat = 73 n_filters_time = 24 pool_mode = 'max' pool_nonlin = identity pool_time_length = 84 pool_time_stride = 3 split_first_layer = True model = ShallowFBCSPNet( in_chans=n_chans, n_classes=n_classes, n_filters_time=n_filters_time, n_filters_spat=n_filters_spat, input_time_length=input_time_length, final_conv_length=final_conv_length, conv_nonlin=conv_nonlin, batch_norm=do_batch_norm, drop_prob=drop_prob, filter_time_length=filter_time_length, pool_mode=pool_mode, pool_nonlin=pool_nonlin, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, split_first_layer=split_first_layer, ).create_network() elif model_name == 'linear': model = nn.Sequential() model.add_module("conv_classifier", nn.Conv2d(n_chans, n_classes, (600, 1))) model.add_module('softmax', nn.LogSoftmax()) model.add_module('squeeze', Expression(lambda x: x.squeeze(3))) else: assert False, "unknown model name {:s}".format(model_name) to_dense_prediction_model(model) log.info("Model:\n{:s}".format(str(model))) if cuda: model.cuda() # determine output size test_input = np_to_var( np.ones((2, n_chans, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() log.info("In shape: {:s}".format(str(test_input.cpu().data.numpy().shape))) out = model(test_input) log.info("Out shape: {:s}".format(str(out.cpu().data.numpy().shape))) n_preds_per_input = out.cpu().data.numpy().shape[2] log.info("{:d} predictions per input/trial".format(n_preds_per_input)) iterator = CropsFromTrialsIterator(batch_size=batch_size, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) optimizer = optim.Adam(model.parameters(), lr=init_lr) loss_function = lambda preds, targets: F.nll_loss( th.mean(preds, dim=2, keepdim=False), targets) if model_constraint is not None: assert model_constraint == 'defaultnorm' model_constraint = MaxNormDefaultConstraint() monitors = [ LossMonitor(), MisclassMonitor(col_suffix='sample_misclass'), CroppedDiagnosisMonitor(input_time_length, n_preds_per_input), RuntimeMonitor(), ] stop_criterion = MaxEpochs(max_epochs) batch_modifier = None run_after_early_stop = True exp = Experiment(model, train_set, valid_set, test_set, iterator, loss_function, optimizer, model_constraint, monitors, stop_criterion, remember_best_column='valid_misclass', run_after_early_stop=run_after_early_stop, batch_modifier=batch_modifier, cuda=cuda) exp.run() return exp
csv_file=None, evolution_file=None) naiveNAS.train_and_evaluate_model(model) train_batches = list(iterator.get_batches(train_set[subject_id], shuffle=False)) train_X_batches = np.concatenate(list(zip(*train_batches))[0]) new_model = nn.Sequential() for name, module in model.named_children(): if 'softmax' in name: break new_model.add_module(name, module) new_model.eval() pred_fn = lambda x: var_to_np( th.mean(new_model(np_to_var(x).cuda())[:, :, :, 0], dim=2, keepdim=False)) from braindecode.visualization.perturbation import compute_amplitude_prediction_correlations amp_pred_corrs = compute_amplitude_prediction_correlations(pred_fn, train_X_batches, n_iterations=12, batch_size=30) freqs = np.fft.rfftfreq(train_X_batches.shape[2], d=1.0 / fs) alpha_band = {'start': 7, 'stop': 14} beta_band = {'start': 14, 'stop': 31} high_gamma_band = {'start': 71, 'stop': 91} bands = [alpha_band, beta_band, high_gamma_band] for band in bands:
model = new_model if cuda: model.cuda() if not ResNet: to_dense_prediction_model(model) start_param_values = deepcopy(new_model.state_dict()) # %% setup optimizer -> new for each x-val fold from torch import optim # %% # determine output size from braindecode.torch_ext.util import np_to_var test_input = np_to_var( np.ones((2, in_chans, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[1] log.info( "predictor length = {:d} samples".format(n_preds_per_input)) log.info("predictor length = {:f} s".format(n_preds_per_input / samplingRate)) iterator = CropsFromTrialsIterator( batch_size=batch_size, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) # %% Loss function takes predictions as they come out of the network and the targets and returns a loss
def create_network(self): if self.stride_before_pool: conv_stride = self.pool_time_stride pool_stride = 1 else: conv_stride = 1 pool_stride = self.pool_time_stride pool_class_dict = dict(max=nn.MaxPool2d, mean=AvgPool2dWithConv) first_pool_class = pool_class_dict[self.first_pool_mode] later_pool_class = pool_class_dict[self.later_pool_mode] model = nn.Sequential() if self.split_first_layer: model.add_module('dimshuffle', Expression(_transpose_time_to_spat)) model.add_module( 'conv_time', nn.Conv2d( 1, self.n_filters_time, (self.filter_time_length, 1), stride=1, )) model.add_module( 'conv_spat', nn.Conv2d(self.n_filters_time, self.n_filters_spat, (1, self.in_chans), stride=(conv_stride, 1), bias=not self.batch_norm)) n_filters_conv = self.n_filters_spat else: model.add_module( 'conv_time', nn.Conv2d(self.in_chans, self.n_filters_time, (self.filter_time_length, 1), stride=(conv_stride, 1), bias=not self.batch_norm)) n_filters_conv = self.n_filters_time if self.batch_norm: model.add_module( 'bnorm', nn.BatchNorm2d(n_filters_conv, momentum=self.batch_norm_alpha, affine=True, eps=1e-5), ) model.add_module('conv_nonlin', Expression(self.first_nonlin)) model.add_module( 'pool', first_pool_class(kernel_size=(self.pool_time_length, 1), stride=(pool_stride, 1))) model.add_module('pool_nonlin', Expression(self.first_pool_nonlin)) def add_conv_pool_block(model, n_filters_before, n_filters, filter_length, block_nr): suffix = '_{:d}'.format(block_nr) model.add_module('drop' + suffix, nn.Dropout(p=self.drop_prob)) model.add_module( 'conv' + suffix.format(block_nr), nn.Conv2d(n_filters_before, n_filters, (filter_length, 1), stride=(conv_stride, 1), bias=not self.batch_norm)) if self.batch_norm: model.add_module( 'bnorm' + suffix, nn.BatchNorm2d(n_filters, momentum=self.batch_norm_alpha, affine=True, eps=1e-5)) model.add_module('nonlin' + suffix, Expression(self.later_nonlin)) model.add_module( 'pool' + suffix, later_pool_class(kernel_size=(self.pool_time_length, 1), stride=(pool_stride, 1))) model.add_module('pool_nonlin' + suffix, Expression(self.later_pool_nonlin)) add_conv_pool_block(model, n_filters_conv, self.n_filters_2, self.filter_length_2, 2) add_conv_pool_block(model, self.n_filters_2, self.n_filters_3, self.filter_length_3, 3) add_conv_pool_block(model, self.n_filters_3, self.n_filters_4, self.filter_length_4, 4) model.eval() if self.final_conv_length == 'auto': out = model( np_to_var( np.ones((1, self.in_chans, self.input_time_length, 1), dtype=np.float32))) n_out_time = out.cpu().data.numpy().shape[2] self.final_conv_length = n_out_time model.add_module( 'conv_classifier', nn.Conv2d(self.n_filters_4, self.n_classes, (self.final_conv_length, 1), bias=True)) model.add_module('softmax', nn.LogSoftmax()) model.add_module('squeeze', Expression(_squeeze_final_output)) # Initialization, xavier is same as in our paper... # was default from lasagne init.xavier_uniform(model.conv_time.weight, gain=1) # maybe no bias in case of no split layer and batch norm if self.split_first_layer or (not self.batch_norm): init.constant(model.conv_time.bias, 0) if self.split_first_layer: init.xavier_uniform(model.conv_spat.weight, gain=1) if not self.batch_norm: init.constant(model.conv_spat.bias, 0) if self.batch_norm: init.constant(model.bnorm.weight, 1) init.constant(model.bnorm.bias, 0) param_dict = dict(list(model.named_parameters())) for block_nr in range(2, 5): conv_weight = param_dict['conv_{:d}.weight'.format(block_nr)] init.xavier_uniform(conv_weight, gain=1) if not self.batch_norm: conv_bias = param_dict['conv_{:d}.bias'.format(block_nr)] init.constant(conv_bias, 0) else: bnorm_weight = param_dict['bnorm_{:d}.weight'.format(block_nr)] bnorm_bias = param_dict['bnorm_{:d}.bias'.format(block_nr)] init.constant(bnorm_weight, 1) init.constant(bnorm_bias, 0) init.xavier_uniform(model.conv_classifier.weight, gain=1) init.constant(model.conv_classifier.bias, 0) # Start in eval mode model.eval() return model
def test_cropped_decoding(): import mne from mne.io import concatenate_raws # 5,6,7,10,13,14 are codes for executed and imagined hands/feet subject_id = 1 event_codes = [5, 6, 9, 10, 13, 14] # This will download the files if you don't have them yet, # and then return the paths to the files. physionet_paths = mne.datasets.eegbci.load_data(subject_id, event_codes) # Load each of the files parts = [mne.io.read_raw_edf(path, preload=True, stim_channel='auto', verbose='WARNING') for path in physionet_paths] # Concatenate them raw = concatenate_raws(parts) # Find the events in this dataset events = mne.find_events(raw, shortest_event=0, stim_channel='STI 014') # Use only EEG channels eeg_channel_inds = mne.pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False, exclude='bads') # Extract trials, only using EEG channels epoched = mne.Epochs(raw, events, dict(hands=2, feet=3), tmin=1, tmax=4.1, proj=False, picks=eeg_channel_inds, baseline=None, preload=True) import numpy as np from braindecode.datautil.signal_target import SignalAndTarget # Convert data from volt to millivolt # Pytorch expects float32 for input and int64 for labels. X = (epoched.get_data() * 1e6).astype(np.float32) y = (epoched.events[:, 2] - 2).astype(np.int64) # 2,3 -> 0,1 train_set = SignalAndTarget(X[:60], y=y[:60]) test_set = SignalAndTarget(X[60:], y=y[60:]) from braindecode.models.shallow_fbcsp import ShallowFBCSPNet from torch import nn from braindecode.torch_ext.util import set_random_seeds from braindecode.models.util import to_dense_prediction_model # Set if you want to use GPU # You can also use torch.cuda.is_available() to determine if cuda is available on your machine. cuda = False set_random_seeds(seed=20170629, cuda=cuda) # This will determine how many crops are processed in parallel input_time_length = 450 n_classes = 2 in_chans = train_set.X.shape[1] # final_conv_length determines the size of the receptive field of the ConvNet model = ShallowFBCSPNet(in_chans=in_chans, n_classes=n_classes, input_time_length=input_time_length, final_conv_length=12).create_network() to_dense_prediction_model(model) if cuda: model.cuda() from torch import optim optimizer = optim.Adam(model.parameters()) from braindecode.torch_ext.util import np_to_var # determine output size test_input = np_to_var( np.ones((2, in_chans, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] print("{:d} predictions per input/trial".format(n_preds_per_input)) from braindecode.datautil.iterators import CropsFromTrialsIterator iterator = CropsFromTrialsIterator(batch_size=32, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) from braindecode.torch_ext.util import np_to_var, var_to_np import torch.nn.functional as F from numpy.random import RandomState import torch as th from braindecode.experiments.monitors import compute_preds_per_trial_from_crops rng = RandomState((2017, 6, 30)) losses = [] accuracies = [] for i_epoch in range(4): # Set model to training mode model.train() for batch_X, batch_y in iterator.get_batches(train_set, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() # Remove gradients of last backward pass from all parameters optimizer.zero_grad() outputs = model(net_in) # Mean predictions across trial # Note that this will give identical gradients to computing # a per-prediction loss (at least for the combination of log softmax activation # and negative log likelihood loss which we are using here) outputs = th.mean(outputs, dim=2, keepdim=False) loss = F.nll_loss(outputs, net_target) loss.backward() optimizer.step() # Print some statistics each epoch model.eval() print("Epoch {:d}".format(i_epoch)) for setname, dataset in (('Train', train_set), ('Test', test_set)): # Collect all predictions and losses all_preds = [] all_losses = [] batch_sizes = [] for batch_X, batch_y in iterator.get_batches(dataset, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() outputs = model(net_in) all_preds.append(var_to_np(outputs)) outputs = th.mean(outputs, dim=2, keepdim=False) loss = F.nll_loss(outputs, net_target) loss = float(var_to_np(loss)) all_losses.append(loss) batch_sizes.append(len(batch_X)) # Compute mean per-input loss loss = np.mean(np.array(all_losses) * np.array(batch_sizes) / np.mean(batch_sizes)) print("{:6s} Loss: {:.5f}".format(setname, loss)) losses.append(loss) # Assign the predictions to the trials preds_per_trial = compute_preds_per_trial_from_crops(all_preds, input_time_length, dataset.X) # preds per trial are now trials x classes x timesteps/predictions # Now mean across timesteps for each trial to get per-trial predictions meaned_preds_per_trial = np.array( [np.mean(p, axis=1) for p in preds_per_trial]) predicted_labels = np.argmax(meaned_preds_per_trial, axis=1) accuracy = np.mean(predicted_labels == dataset.y) accuracies.append(accuracy * 100) print("{:6s} Accuracy: {:.1f}%".format( setname, accuracy * 100)) np.testing.assert_allclose( np.array(losses), np.array([1.703004002571106, 1.6295261979103088, 0.71168938279151917, 0.70825588703155518, 0.58231228590011597, 0.60176041722297668, 0.46629951894283295, 0.51184913516044617]), rtol=1e-4, atol=1e-5) np.testing.assert_allclose( np.array(accuracies), np.array( [50.0, 46.666666666666664, 60.0, 53.333333333333336, 68.333333333333329, 66.666666666666657, 88.333333333333329, 83.333333333333343]), rtol=1e-4, atol=1e-5)
def run_exp(max_recording_mins, n_recordings, sec_to_cut, duration_recording_mins, max_abs_val, max_min_threshold, max_min_expected, shrink_val, max_min_remove, batch_set_zero_val, batch_set_zero_test, sampling_freq, low_cut_hz, high_cut_hz, exp_demean, exp_standardize, moving_demean, moving_standardize, channel_demean, channel_standardize, divisor, n_folds, i_test_fold, input_time_length, final_conv_length, pool_stride, n_blocks_to_add, sigmoid, model_constraint, batch_size, max_epochs, only_return_exp): cuda = True preproc_functions = [] preproc_functions.append(lambda data, fs: ( data[:, int(sec_to_cut * fs):-int(sec_to_cut * fs)], fs)) preproc_functions.append(lambda data, fs: (data[:, :int( duration_recording_mins * 60 * fs)], fs)) if max_abs_val is not None: preproc_functions.append( lambda data, fs: (np.clip(data, -max_abs_val, max_abs_val), fs)) if max_min_threshold is not None: preproc_functions.append(lambda data, fs: (clean_jumps( data, 200, max_min_threshold, max_min_expected, cuda), fs)) if max_min_remove is not None: window_len = 200 preproc_functions.append(lambda data, fs: (set_jumps_to_zero( data, window_len=window_len, threshold=max_min_remove, cuda=cuda, clip_min_max_to_zero=True), fs)) if shrink_val is not None: preproc_functions.append(lambda data, fs: (shrink_spikes( data, shrink_val, 1, 9, ), fs)) preproc_functions.append(lambda data, fs: (resampy.resample( data, fs, sampling_freq, axis=1, filter='kaiser_fast'), sampling_freq)) preproc_functions.append(lambda data, fs: (bandpass_cnt( data, low_cut_hz, high_cut_hz, fs, filt_order=4, axis=1), fs)) if exp_demean: preproc_functions.append(lambda data, fs: (exponential_running_demean( data.T, factor_new=0.001, init_block_size=100).T, fs)) if exp_standardize: preproc_functions.append( lambda data, fs: (exponential_running_standardize( data.T, factor_new=0.001, init_block_size=100).T, fs)) if moving_demean: preproc_functions.append(lambda data, fs: (padded_moving_demean( data, axis=1, n_window=201), fs)) if moving_standardize: preproc_functions.append(lambda data, fs: (padded_moving_standardize( data, axis=1, n_window=201), fs)) if channel_demean: preproc_functions.append(lambda data, fs: (demean(data, axis=1), fs)) if channel_standardize: preproc_functions.append(lambda data, fs: (standardize(data, axis=1), fs)) if divisor is not None: preproc_functions.append(lambda data, fs: (data / divisor, fs)) dataset = DiagnosisSet(n_recordings=n_recordings, max_recording_mins=max_recording_mins, preproc_functions=preproc_functions) if not only_return_exp: X, y = dataset.load() splitter = Splitter( n_folds, i_test_fold, ) if not only_return_exp: train_set, valid_set, test_set = splitter.split(X, y) del X, y # shouldn't be necessary, but just to make sure else: train_set = None valid_set = None test_set = None set_random_seeds(seed=20170629, cuda=cuda) if sigmoid: n_classes = 1 else: n_classes = 2 in_chans = 21 net = Deep4Net( in_chans=in_chans, n_classes=n_classes, input_time_length=input_time_length, final_conv_length=final_conv_length, pool_time_length=pool_stride, pool_time_stride=pool_stride, n_filters_2=50, n_filters_3=80, n_filters_4=120, ) model = net_with_more_layers(net, n_blocks_to_add, nn.MaxPool2d) if sigmoid: model = to_linear_plus_minus_net(model) optimizer = optim.Adam(model.parameters()) to_dense_prediction_model(model) log.info("Model:\n{:s}".format(str(model))) if cuda: model.cuda() # determine output size test_input = np_to_var( np.ones((2, in_chans, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] log.info("{:d} predictions per input/trial".format(n_preds_per_input)) iterator = CropsFromTrialsIterator(batch_size=batch_size, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) if sigmoid: loss_function = lambda preds, targets: binary_cross_entropy_with_logits( th.mean(preds, dim=2)[:, 1, 0], targets.type_as(preds)) else: loss_function = lambda preds, targets: F.nll_loss( th.mean(preds, dim=2)[:, :, 0], targets) if model_constraint is not None: model_constraint = MaxNormDefaultConstraint() monitors = [ LossMonitor(), MisclassMonitor(col_suffix='sample_misclass'), CroppedTrialMisclassMonitor(input_time_length), RuntimeMonitor(), ] stop_criterion = MaxEpochs(max_epochs) batch_modifier = None if batch_set_zero_val is not None: batch_modifier = RemoveMinMaxDiff(batch_set_zero_val, clip_max_abs=True, set_zero=True) if (batch_set_zero_val is not None) and (batch_set_zero_test == True): iterator = ModifiedIterator( iterator, batch_modifier, ) batch_modifier = None exp = Experiment(model, train_set, valid_set, test_set, iterator, loss_function, optimizer, model_constraint, monitors, stop_criterion, remember_best_column='valid_misclass', run_after_early_stop=True, batch_modifier=batch_modifier, cuda=cuda) if not only_return_exp: exp.run() else: exp.dataset = dataset exp.splitter = splitter return exp
def run_exp_on_high_gamma_dataset(train_filename, test_filename, low_cut_hz, model_name, max_epochs, max_increase_epochs, np_th_seed, debug): train_set, valid_set, test_set = load_train_valid_test( train_filename=train_filename, test_filename=test_filename, low_cut_hz=low_cut_hz, debug=debug) if debug: max_epochs = 4 set_random_seeds(np_th_seed, cuda=True) #torch.backends.cudnn.benchmark = True# sometimes crashes? n_classes = int(np.max(train_set.y) + 1) n_chans = int(train_set.X.shape[1]) input_time_length = 1000 if model_name == 'deep': model = Deep4Net(n_chans, n_classes, input_time_length=input_time_length, final_conv_length=2).create_network() elif model_name == 'shallow': model = ShallowFBCSPNet(n_chans, n_classes, input_time_length=input_time_length, final_conv_length=30).create_network() to_dense_prediction_model(model) model.cuda() model.eval() out = model(np_to_var(train_set.X[:1, :, :input_time_length, None]).cuda()) n_preds_per_input = out.cpu().data.numpy().shape[2] optimizer = optim.Adam(model.parameters(), weight_decay=0, lr=1e-3) iterator = CropsFromTrialsIterator(batch_size=60, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input, seed=np_th_seed) monitors = [ LossMonitor(), MisclassMonitor(col_suffix='sample_misclass'), CroppedTrialMisclassMonitor(input_time_length=input_time_length), RuntimeMonitor() ] model_constraint = MaxNormDefaultConstraint() loss_function = lambda preds, targets: F.nll_loss(th.mean(preds, dim=2), targets) run_after_early_stop = True do_early_stop = True remember_best_column = 'valid_misclass' stop_criterion = Or([ MaxEpochs(max_epochs), NoDecrease('valid_misclass', max_increase_epochs) ]) exp = Experiment(model, train_set, valid_set, test_set, iterator=iterator, loss_function=loss_function, optimizer=optimizer, model_constraint=model_constraint, monitors=monitors, stop_criterion=stop_criterion, remember_best_column=remember_best_column, run_after_early_stop=run_after_early_stop, cuda=True, do_early_stop=do_early_stop) exp.run() return exp
def run_exp(data_folder, subject_id, low_cut_hz, model, cuda): train_filename = 'A{:01d}T.gdf'.format(subject_id) test_filename = 'A{:01d}E.gdf'.format(subject_id) train_filepath = os.path.join(data_folder, train_filename) test_filepath = os.path.join(data_folder, test_filename) train_label_filepath = train_filepath.replace('.gdf', '.mat') test_label_filepath = test_filepath.replace('.gdf', '.mat') train_loader = BCICompetition4Set2A( train_filepath, labels_filename=train_label_filepath) test_loader = BCICompetition4Set2A( test_filepath, labels_filename=test_label_filepath) #print(train_loader) train_cnt = train_loader.load() test_cnt = test_loader.load() # Preprocessing train_cnt = train_cnt.drop_channels(['STI 014', 'EOG-left', 'EOG-central', 'EOG-right']) assert len(train_cnt.ch_names) == 22 # lets convert to millvolt for numerical stability of next operations train_cnt = mne_apply(lambda a: a * 1e6, train_cnt) train_cnt = mne_apply( lambda a: bandpass_cnt(a, low_cut_hz, 38, train_cnt.info['sfreq'], filt_order=3, axis=1), train_cnt) train_cnt = mne_apply( lambda a: exponential_running_standardize(a.T, factor_new=1e-3, init_block_size=1000, eps=1e-4).T, train_cnt) test_cnt = test_cnt.drop_channels(['STI 014', 'EOG-left', 'EOG-central', 'EOG-right']) assert len(test_cnt.ch_names) == 22 test_cnt = mne_apply(lambda a: a * 1e6, test_cnt) test_cnt = mne_apply( lambda a: bandpass_cnt(a, low_cut_hz, 38, test_cnt.info['sfreq'], filt_order=3, axis=1), test_cnt) test_cnt = mne_apply( lambda a: exponential_running_standardize(a.T, factor_new=1e-3, init_block_size=1000, eps=1e-4).T, test_cnt) marker_def = OrderedDict([('Left Hand', [1]), ('Right Hand', [2],), ('Foot', [3]), ('Tongue', [4])]) ival = [-500, 4000] train_set = create_signal_target_from_raw_mne(train_cnt, marker_def, ival) test_set = create_signal_target_from_raw_mne(test_cnt, marker_def, ival) train_set, valid_set = split_into_two_sets(train_set, first_set_fraction=0.8) set_random_seeds(seed=20190706, cuda=cuda) n_classes = 4 n_chans = int(train_set.X.shape[1]) input_time_length=1000 if model == 'shallow': model = ShallowFBCSPNet(n_chans, n_classes, input_time_length=input_time_length, final_conv_length=30).create_network() elif model == 'deep': model = Deep4Net(n_chans, n_classes, input_time_length=input_time_length, final_conv_length=2).create_network() to_dense_prediction_model(model) if cuda: model.cuda() log.info("Model: \n{:s}".format(str(model))) dummy_input = np_to_var(train_set.X[:1, :, :, None]) if cuda: dummy_input = dummy_input.cuda() out = model(dummy_input) n_preds_per_input = out.cpu().data.numpy().shape[2] optimizer = optim.Adam(model.parameters()) iterator = CropsFromTrialsIterator(batch_size=60, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) stop_criterion = Or([MaxEpochs(800), NoDecrease('valid_misclass', 80)]) monitors = [LossMonitor(), MisclassMonitor(col_suffix='sample_misclass'), CroppedTrialMisclassMonitor( input_time_length=input_time_length), RuntimeMonitor()] model_constraint = MaxNormDefaultConstraint() loss_function = lambda preds, targets: F.nll_loss( th.mean(preds, dim=2, keepdim=False), targets) exp = Experiment(model, train_set, valid_set, test_set, iterator=iterator, loss_function=loss_function, optimizer=optimizer, model_constraint=model_constraint, monitors=monitors, stop_criterion=stop_criterion, remember_best_column='valid_misclass', run_after_early_stop=True, cuda=cuda) exp.run() return exp
def run_exp(max_recording_mins, n_recordings, sec_to_cut, duration_recording_mins, max_abs_val, shrink_val, sampling_freq, divisor, n_folds, i_test_fold, final_conv_length, model_constraint, batch_size, max_epochs, n_filters_time, n_filters_spat, filter_time_length, conv_nonlin, pool_time_length, pool_time_stride, pool_mode, pool_nonlin, split_first_layer, do_batch_norm, drop_prob, time_cut_off_sec, start_time, input_time_length, only_return_exp): kwargs = locals() for model_param in [ 'final_conv_length', 'n_filters_time', 'n_filters_spat', 'filter_time_length', 'conv_nonlin', 'pool_time_length', 'pool_time_stride', 'pool_mode', 'pool_nonlin', 'split_first_layer', 'do_batch_norm', 'drop_prob', ]: kwargs.pop(model_param) nonlin_dict = { 'elu': elu, 'relu': relu, 'relu6': relu6, 'tanh': tanh, 'square': square, 'identity': identity, 'log': safe_log, } assert input_time_length == 6000 # copy over from early seizure # make proper n_classes = 2 in_chans = 21 cuda = True set_random_seeds(seed=20170629, cuda=cuda) model = ShallowFBCSPNet(in_chans=in_chans, n_classes=n_classes, input_time_length=input_time_length, final_conv_length=final_conv_length, n_filters_time=n_filters_time, filter_time_length=filter_time_length, n_filters_spat=n_filters_spat, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, conv_nonlin=nonlin_dict[conv_nonlin], pool_mode=pool_mode, pool_nonlin=nonlin_dict[pool_nonlin], split_first_layer=split_first_layer, batch_norm=do_batch_norm, batch_norm_alpha=0.1, drop_prob=drop_prob).create_network() to_dense_prediction_model(model) if cuda: model.cuda() model.eval() test_input = np_to_var( np.ones((2, in_chans, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() try: out = model(test_input) except RuntimeError: raise ValueError("Model receptive field too large...") n_preds_per_input = out.cpu().data.numpy().shape[2] n_receptive_field = input_time_length - n_preds_per_input if n_receptive_field > 6000: raise ValueError("Model receptive field ({:d}) too large...".format( n_receptive_field)) # For future, here optionally add input time length instead model = ShallowFBCSPNet(in_chans=in_chans, n_classes=n_classes, input_time_length=input_time_length, final_conv_length=final_conv_length, n_filters_time=n_filters_time, filter_time_length=filter_time_length, n_filters_spat=n_filters_spat, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, conv_nonlin=nonlin_dict[conv_nonlin], pool_mode=pool_mode, pool_nonlin=nonlin_dict[pool_nonlin], split_first_layer=split_first_layer, batch_norm=do_batch_norm, batch_norm_alpha=0.1, drop_prob=drop_prob).create_network() return common.run_exp(model=model, **kwargs)
def test_trialwise_decoding(): import mne from mne.io import concatenate_raws # 5,6,7,10,13,14 are codes for executed and imagined hands/feet subject_id = 1 event_codes = [5, 6, 9, 10, 13, 14] # This will download the files if you don't have them yet, # and then return the paths to the files. physionet_paths = mne.datasets.eegbci.load_data(subject_id, event_codes) # Load each of the files parts = [ mne.io.read_raw_edf(path, preload=True, stim_channel='auto', verbose='WARNING') for path in physionet_paths ] # Concatenate them raw = concatenate_raws(parts) # Find the events in this dataset events = mne.find_events(raw, shortest_event=0, stim_channel='STI 014') # Use only EEG channels eeg_channel_inds = mne.pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False, exclude='bads') # Extract trials, only using EEG channels epoched = mne.Epochs(raw, events, dict(hands=2, feet=3), tmin=1, tmax=4.1, proj=False, picks=eeg_channel_inds, baseline=None, preload=True) import numpy as np # Convert data from volt to millivolt # Pytorch expects float32 for input and int64 for labels. X = (epoched.get_data() * 1e6).astype(np.float32) y = (epoched.events[:, 2] - 2).astype(np.int64) # 2,3 -> 0,1 from braindecode.datautil.signal_target import SignalAndTarget train_set = SignalAndTarget(X[:60], y=y[:60]) test_set = SignalAndTarget(X[60:], y=y[60:]) from braindecode.models.shallow_fbcsp import ShallowFBCSPNet from torch import nn from braindecode.torch_ext.util import set_random_seeds # Set if you want to use GPU # You can also use torch.cuda.is_available() to determine if cuda is available on your machine. cuda = False set_random_seeds(seed=20170629, cuda=cuda) n_classes = 2 in_chans = train_set.X.shape[1] # final_conv_length = auto ensures we only get a single output in the time dimension model = ShallowFBCSPNet(in_chans=in_chans, n_classes=n_classes, input_time_length=train_set.X.shape[2], final_conv_length='auto').create_network() if cuda: model.cuda() from torch import optim optimizer = optim.Adam(model.parameters()) from braindecode.torch_ext.util import np_to_var, var_to_np from braindecode.datautil.iterators import get_balanced_batches import torch.nn.functional as F from numpy.random import RandomState rng = RandomState((2017, 6, 30)) losses = [] accuracies = [] for i_epoch in range(6): i_trials_in_batch = get_balanced_batches(len(train_set.X), rng, shuffle=True, batch_size=30) # Set model to training mode model.train() for i_trials in i_trials_in_batch: # Have to add empty fourth dimension to X batch_X = train_set.X[i_trials][:, :, :, None] batch_y = train_set.y[i_trials] net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() # Remove gradients of last backward pass from all parameters optimizer.zero_grad() # Compute outputs of the network outputs = model(net_in) # Compute the loss loss = F.nll_loss(outputs, net_target) # Do the backpropagation loss.backward() # Update parameters with the optimizer optimizer.step() # Print some statistics each epoch model.eval() print("Epoch {:d}".format(i_epoch)) for setname, dataset in (('Train', train_set), ('Test', test_set)): # Here, we will use the entire dataset at once, which is still possible # for such smaller datasets. Otherwise we would have to use batches. net_in = np_to_var(dataset.X[:, :, :, None]) if cuda: net_in = net_in.cuda() net_target = np_to_var(dataset.y) if cuda: net_target = net_target.cuda() outputs = model(net_in) loss = F.nll_loss(outputs, net_target) losses.append(float(var_to_np(loss))) print("{:6s} Loss: {:.5f}".format(setname, float(var_to_np(loss)))) predicted_labels = np.argmax(var_to_np(outputs), axis=1) accuracy = np.mean(dataset.y == predicted_labels) accuracies.append(accuracy * 100) print("{:6s} Accuracy: {:.1f}%".format(setname, accuracy * 100)) np.testing.assert_allclose(np.array(losses), np.array([ 1.1775966882705688, 1.2602351903915405, 0.7068756818771362, 0.9367912411689758, 0.394258975982666, 0.6598362326622009, 0.3359280526638031, 0.656258761882782, 0.2790488004684448, 0.6104397177696228, 0.27319177985191345, 0.5949864983558655 ]), rtol=1e-4, atol=1e-5) np.testing.assert_allclose(np.array(accuracies), np.array([ 51.666666666666671, 53.333333333333336, 63.333333333333329, 56.666666666666664, 86.666666666666671, 66.666666666666657, 90.0, 63.333333333333329, 96.666666666666671, 56.666666666666664, 96.666666666666671, 66.666666666666657 ]), rtol=1e-4, atol=1e-5)
def __init__(self, in_chans, n_classes, final_conv_length='auto', input_time_length=None, pool_mode='mean', f1=8, d=2, f2=16, # usually set to F1*D (?) kernel_length=64, third_kernel_size=(8, 4), drop_prob=0.25, siamese=False, i_feature_alignment_layer=None # 0-based index modules ): super(EEGNet, self).__init__() if i_feature_alignment_layer is None: i_feature_alignment_layer = 2 # default alignment layer if final_conv_length == 'auto': assert input_time_length is not None # Assigns all parameters in init to self.param_name self.__dict__.update(locals()) del self.self # Define kind of pooling used: pool_class = dict(max=nn.MaxPool2d, mean=nn.AvgPool2d)[self.pool_mode] # Convolution accros temporal axis self.temporal_conv = nn.Sequential( # Rearrange dimensions, dimshuffle, # tranform to shape required by pytorch: Expression(_transpose_to_b_1_c_0), # Temporal conv layer: nn.Conv2d(in_channels=1, out_channels=self.f1, kernel_size=(1, self.kernel_length), stride=1, bias=False, padding=(0, self.kernel_length // 2)), nn.BatchNorm2d(self.f1, momentum=0.01, affine=True, eps=1e-3) ) self.spatial_conv = nn.Sequential( # Spatial conv layer: Conv2dWithConstraint(self.f1, self.f1 * self.d, (self.in_chans, 1), max_norm=1, stride=1, bias=False, groups=self.f1, padding=(0, 0)), nn.BatchNorm2d(self.f1 * self.d, momentum=0.01, affine=True, eps=1e-3), nn.ELU(), pool_class(kernel_size=(1, 4), stride=(1, 4)) ) self.separable_conv = nn.Sequential( nn.Dropout(p=self.drop_prob), # Separable conv layer: nn.Conv2d(self.f1 * self.d, self.f1 * self.d, (1, 16), stride=1, bias=False, groups=self.f1 * self.d, padding=(0, 16 // 2)), nn.Conv2d(self.f1 * self.d, self.f2, (1, 1), stride=1, bias=False, padding=(0, 0)), nn.BatchNorm2d(self.f2, momentum=0.01, affine=True, eps=1e-3), nn.ELU(), pool_class(kernel_size=(1, 8), stride=(1, 8)) ) out = np_to_var( np.ones((1, self.in_chans, self.input_time_length, 1), dtype=np.float32)) out = self.forward_init(out) # out = self.separable_conv(self.spatial_conv(self.temporal_conv(out))) n_out_virtual_chans = out.cpu().data.numpy().shape[2] if self.final_conv_length == 'auto': n_out_time = out.cpu().data.numpy().shape[3] self.final_conv_length = n_out_time # Classifier part: self.cls = nn.Sequential( nn.Dropout(p=self.drop_prob), nn.Conv2d(self.f2, self.n_classes, (n_out_virtual_chans, self.final_conv_length), bias=True), nn.LogSoftmax(dim=1), # Transpose back to the the logic of _braindecode, # so time in third dimension (axis=2) # Transform back to original shape and # squeeze to (batch_size, n_classes) size Expression(_transpose_1_0), Expression(_squeeze_final_output) ) # Initialize weights of the network self.apply(glorot_weight_zero_bias) # Set feature space alignment layer, used in siamese training/testing assert 0 <= self.i_feature_alignment_layer < len(self._modules), \ "Given feature space alignment layer does not " \ "exist for current model" self.feature_alignment_layer = \ list(self._modules.items())[self.i_feature_alignment_layer][0]
return x, hidden def create_network(self): # model = nn.Sequential() # model.add_module('1', self.inception_block_1) # model.add_module('2', self.inception_block_2) # model.add_module('3', self.inception_block_3) # # model.add_module('5', self.gru_1) # model.add_module('6', self.gru_2) # model.add_module('7', self.gru_3) # model.add_module('8', self.gru_4) # return model return self def offset_size(self, sequence_size): assert sequence_size % 8 == 0, "For this model it is better if sequence size is divisible by 8" return sequence_size - sequence_size // 8 def init_hidden(self): weight = next(self.parameters()).data return Variable(weight.new(1, 1, hidden_size).zero_()) model = ChronoNet().create_network() out = model.forward( np_to_var(np.ones((2, 1125, 22, 1), dtype=np.float32)), Variable(model.init_hidden().data), ) pass
exp.model.eval() train_X_batches = [] train_y_batches = [] log.info("Create batches...") for batch in exp.iterator.get_batches(train_set, shuffle=False): train_X_batches.append(batch[0].astype(np.float32)) train_y_batches.append(batch[1]) log.info("Delete unnecessary variables...") del X, y del train_set log.info("Concatenates batches to array...") train_X_batches = np.concatenate(train_X_batches, axis=0) train_y_batches = np.concatenate(train_y_batches) model_without_softmax = nn.Sequential() for name, module in exp.model.named_children(): if name == 'softmax': break model_without_softmax.add_module(name, module) pred_fn = lambda x: var_to_np( th.mean(model_without_softmax(np_to_var(x).cuda()), dim=2)[:, :, 0, 0]) log.info("Gaussian perturbation...") run_save_for_gaussian(pred_fn, train_X_batches, n_iterations, train_y_batches, folder) #log.info("Scaled (gaussian) perturbation...") #run_save_for_scaled( # pred_fn, train_X_batches, n_iterations, train_y_batches, folder)
def create_network(self): pool_class = dict(max=nn.MaxPool2d, mean=nn.AvgPool2d)[self.pool_mode] model = nn.Sequential() n_filters_1 = 16 model.add_module( 'conv_1', nn.Conv2d(self.in_chans, n_filters_1, (1, 1), stride=1, bias=True)) model.add_module( 'bnorm_1', nn.BatchNorm2d(n_filters_1, momentum=0.01, affine=True, eps=1e-3), ) model.add_module('elu_1', Expression(elu)) # transpose to examples x 1 x (virtual, not EEG) channels x time model.add_module('permute_1', Expression(lambda x: x.permute(0, 3, 1, 2))) model.add_module('drop_1', nn.Dropout(p=self.drop_prob)) n_filters_2 = 4 # keras padds unequal padding more in front, so padding # too large should be ok. # Not padding in time so that croped training makes sense # https://stackoverflow.com/questions/43994604/padding-with-even-kernel-size-in-a-convolutional-layer-in-keras-theano model.add_module( 'conv_2', nn.Conv2d(1, n_filters_2, self.second_kernel_size, stride=1, padding=(self.second_kernel_size[0] // 2, 0), bias=True)) model.add_module( 'bnorm_2', nn.BatchNorm2d(n_filters_2, momentum=0.01, affine=True, eps=1e-3), ) model.add_module('elu_2', Expression(elu)) model.add_module('pool_2', pool_class(kernel_size=(2, 4), stride=(2, 4))) model.add_module('drop_2', nn.Dropout(p=self.drop_prob)) n_filters_3 = 4 model.add_module( 'conv_3', nn.Conv2d(n_filters_2, n_filters_3, self.third_kernel_size, stride=1, padding=(self.third_kernel_size[0] // 2, 0), bias=True)) model.add_module( 'bnorm_3', nn.BatchNorm2d(n_filters_3, momentum=0.01, affine=True, eps=1e-3), ) model.add_module('elu_3', Expression(elu)) model.add_module('pool_3', pool_class(kernel_size=(2, 4), stride=(2, 4))) model.add_module('drop_3', nn.Dropout(p=self.drop_prob)) out = model( np_to_var( np.ones((1, self.in_chans, self.input_time_length, 1), dtype=np.float32))) n_out_virtual_chans = out.cpu().data.numpy().shape[2] if self.final_conv_length == 'auto': n_out_time = out.cpu().data.numpy().shape[3] self.final_conv_length = n_out_time model.add_module( 'conv_classifier', nn.Conv2d(n_filters_3, self.n_classes, ( n_out_virtual_chans, self.final_conv_length, ), bias=True)) model.add_module('softmax', nn.LogSoftmax()) # Transpose back to the the logic of braindecode, # so time in third dimension (axis=2) model.add_module('permute_2', Expression(lambda x: x.permute(0, 1, 3, 2))) model.add_module('squeeze', Expression(_squeeze_final_output)) glorot_weight_zero_bias(model) return model
def create_network(self): pool_class = dict(max=nn.MaxPool2d, mean=nn.AvgPool2d)[self.pool_mode] model = nn.Sequential() if self.split_first_layer: model.add_module('dimshuffle', Expression(_transpose_time_to_spat)) model.add_module( 'conv_time', nn.Conv2d( 1, self.n_filters_time, (self.filter_time_length, 1), stride=1, )) model.add_module( 'conv_spat', nn.Conv2d(self.n_filters_time, self.n_filters_spat, (1, self.in_chans), stride=1, bias=not self.batch_norm)) n_filters_conv = self.n_filters_spat else: model.add_module( 'conv_time', nn.Conv2d(self.in_chans, self.n_filters_time, (self.filter_time_length, 1), stride=1, bias=not self.batch_norm)) n_filters_conv = self.n_filters_time if self.batch_norm: model.add_module( 'bnorm', nn.BatchNorm2d(n_filters_conv, momentum=self.batch_norm_alpha, affine=True), ) model.add_module('conv_nonlin', Expression(self.conv_nonlin)) model.add_module( 'pool', pool_class(kernel_size=(self.pool_time_length, 1), stride=(self.pool_time_stride, 1))) model.add_module('pool_nonlin', Expression(self.pool_nonlin)) model.add_module('drop', nn.Dropout(p=self.drop_prob)) if self.final_conv_length == 'auto': out = model( np_to_var( np.ones((1, self.in_chans, self.input_time_length, 1), dtype=np.float32))) n_out_time = out.cpu().data.numpy().shape[2] self.final_conv_length = n_out_time model.add_module( 'conv_classifier', nn.Conv2d(n_filters_conv, self.n_classes, (self.final_conv_length, 1), bias=True)) model.add_module('softmax', nn.LogSoftmax(dim=1)) model.add_module('squeeze', Expression(_squeeze_final_output)) # Initialization, xavier is same as in paper... init.xavier_uniform_(model.conv_time.weight, gain=1) # maybe no bias in case of no split layer and batch norm if self.split_first_layer or (not self.batch_norm): init.constant_(model.conv_time.bias, 0) if self.split_first_layer: init.xavier_uniform_(model.conv_spat.weight, gain=1) if not self.batch_norm: init.constant_(model.conv_spat.bias, 0) if self.batch_norm: init.constant_(model.bnorm.weight, 1) init.constant_(model.bnorm.bias, 0) init.xavier_uniform_(model.conv_classifier.weight, gain=1) init.constant_(model.conv_classifier.bias, 0) return model
def create_network(self): pool_class = dict(max=nn.MaxPool2d, mean=nn.AvgPool2d)[self.pool_mode] model = nn.Sequential() # b c 0 1 # now to b 1 0 c model.add_module('dimshuffle', Expression(_transpose_to_b_1_c_0)) model.add_module( 'conv_temporal', nn.Conv2d(1, self.F1, (1, self.kernel_length), stride=1, bias=False, padding=( 0, self.kernel_length // 2, ))) model.add_module( 'bnorm_temporal', nn.BatchNorm2d(self.F1, momentum=0.01, affine=True, eps=1e-3), ) model.add_module( 'conv_spatial', Conv2dWithConstraint(self.F1, self.F1 * self.D, (self.in_chans, 1), max_norm=1, stride=1, bias=False, groups=self.F1, padding=(0, 0))) model.add_module( 'bnorm_1', nn.BatchNorm2d(self.F1 * self.D, momentum=0.01, affine=True, eps=1e-3), ) model.add_module('elu_1', Expression(elu)) model.add_module('pool_1', pool_class(kernel_size=(1, 4), stride=(1, 4))) model.add_module('drop_1', nn.Dropout(p=self.drop_prob)) # https://discuss.pytorch.org/t/how-to-modify-a-conv2d-to-depthwise-separable-convolution/15843/7 model.add_module( 'conv_separable_depth', nn.Conv2d(self.F1 * self.D, self.F1 * self.D, (1, 16), stride=1, bias=False, groups=self.F1 * self.D, padding=(0, 16 // 2))) model.add_module( 'conv_separable_point', nn.Conv2d(self.F1 * self.D, self.F2, (1, 1), stride=1, bias=False, padding=(0, 0))) model.add_module( 'bnorm_2', nn.BatchNorm2d(self.F2, momentum=0.01, affine=True, eps=1e-3), ) model.add_module('elu_2', Expression(elu)) model.add_module('pool_2', pool_class(kernel_size=(1, 8), stride=(1, 8))) model.add_module('drop_2', nn.Dropout(p=self.drop_prob)) out = model( np_to_var( np.ones((1, self.in_chans, self.input_time_length, 1), dtype=np.float32))) n_out_virtual_chans = out.cpu().data.numpy().shape[2] if self.final_conv_length == 'auto': n_out_time = out.cpu().data.numpy().shape[3] self.final_conv_length = n_out_time model.add_module( 'conv_classifier', nn.Conv2d(self.F2, self.n_classes, ( n_out_virtual_chans, self.final_conv_length, ), bias=True)) model.add_module('softmax', nn.LogSoftmax()) # Transpose back to the the logic of braindecode, # so time in third dimension (axis=2) model.add_module('permute_back', Expression(_transpose_1_0)) model.add_module('squeeze', Expression(_squeeze_final_output)) glorot_weight_zero_bias(model) return model
def run_exp(test_on_eval, sensor_types, n_chans, max_recording_mins, test_recording_mins, n_recordings, sec_to_cut_at_start, sec_to_cut_at_end, duration_recording_mins, max_abs_val, clip_before_resample, sampling_freq, divisor, n_folds, i_test_fold, shuffle, merge_train_valid, model_name, n_start_chans, n_chan_factor, input_time_length, final_conv_length, stride_before_pool, optimizer, learning_rate, weight_decay, scheduler, model_constraint, batch_size, max_epochs, log_dir, only_return_exp, np_th_seed): cuda = True if ('smac' in model_name) and (input_time_length is None): input_time_length = 12000 fix_input_length_for_smac = True else: fix_input_length_for_smac = False set_random_seeds(seed=np_th_seed, cuda=cuda) n_classes = 2 if model_name == 'shallow': model = ShallowFBCSPNet( in_chans=n_chans, n_classes=n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, final_conv_length=final_conv_length).create_network() elif model_name == 'deep': model = Deep4Net( n_chans, n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, n_filters_2=int(n_start_chans * n_chan_factor), n_filters_3=int(n_start_chans * (n_chan_factor**2.0)), n_filters_4=int(n_start_chans * (n_chan_factor**3.0)), final_conv_length=final_conv_length, stride_before_pool=stride_before_pool).create_network() elif (model_name == 'deep_smac') or (model_name == 'deep_smac_bnorm'): if model_name == 'deep_smac': do_batch_norm = False else: assert model_name == 'deep_smac_bnorm' do_batch_norm = True double_time_convs = False drop_prob = 0.244445 filter_length_2 = 12 filter_length_3 = 14 filter_length_4 = 12 filter_time_length = 21 final_conv_length = 1 first_nonlin = elu first_pool_mode = 'mean' first_pool_nonlin = identity later_nonlin = elu later_pool_mode = 'mean' later_pool_nonlin = identity n_filters_factor = 1.679066 n_filters_start = 32 pool_time_length = 1 pool_time_stride = 2 split_first_layer = True n_chan_factor = n_filters_factor n_start_chans = n_filters_start model = Deep4Net(n_chans, n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, n_filters_2=int(n_start_chans * n_chan_factor), n_filters_3=int(n_start_chans * (n_chan_factor**2.0)), n_filters_4=int(n_start_chans * (n_chan_factor**3.0)), final_conv_length=final_conv_length, batch_norm=do_batch_norm, double_time_convs=double_time_convs, drop_prob=drop_prob, filter_length_2=filter_length_2, filter_length_3=filter_length_3, filter_length_4=filter_length_4, filter_time_length=filter_time_length, first_nonlin=first_nonlin, first_pool_mode=first_pool_mode, first_pool_nonlin=first_pool_nonlin, later_nonlin=later_nonlin, later_pool_mode=later_pool_mode, later_pool_nonlin=later_pool_nonlin, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, split_first_layer=split_first_layer, stride_before_pool=True).create_network() elif model_name == 'shallow_smac': conv_nonlin = identity do_batch_norm = True drop_prob = 0.328794 filter_time_length = 56 final_conv_length = 22 n_filters_spat = 73 n_filters_time = 24 pool_mode = 'max' pool_nonlin = identity pool_time_length = 84 pool_time_stride = 3 split_first_layer = True model = ShallowFBCSPNet( in_chans=n_chans, n_classes=n_classes, n_filters_time=n_filters_time, n_filters_spat=n_filters_spat, input_time_length=input_time_length, final_conv_length=final_conv_length, conv_nonlin=conv_nonlin, batch_norm=do_batch_norm, drop_prob=drop_prob, filter_time_length=filter_time_length, pool_mode=pool_mode, pool_nonlin=pool_nonlin, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, split_first_layer=split_first_layer, ).create_network() elif model_name == 'deep_smac_new': from torch.nn.functional import elu, relu, relu6, tanh from braindecode.torch_ext.functions import identity, square, safe_log n_filters_factor = 1.9532637176784269 n_filters_start = 61 deep_kwargs = { "batch_norm": False, "double_time_convs": False, "drop_prob": 0.3622676569047184, "filter_length_2": 9, "filter_length_3": 6, "filter_length_4": 10, "filter_time_length": 17, "final_conv_length": 5, "first_nonlin": elu, "first_pool_mode": "max", "first_pool_nonlin": identity, "later_nonlin": relu6, "later_pool_mode": "max", "later_pool_nonlin": identity, "n_filters_time": n_filters_start, "n_filters_spat": n_filters_start, "n_filters_2": int(n_filters_start * n_filters_factor), "n_filters_3": int(n_filters_start * (n_filters_factor**2.0)), "n_filters_4": int(n_filters_start * (n_filters_factor**3.0)), "pool_time_length": 1, "pool_time_stride": 4, "split_first_layer": True, "stride_before_pool": True, } model = Deep4Net(n_chans, n_classes, input_time_length=input_time_length, **deep_kwargs).create_network() elif model_name == 'shallow_smac_new': from torch.nn.functional import elu, relu, relu6, tanh from braindecode.torch_ext.functions import identity, square, safe_log shallow_kwargs = { "conv_nonlin": square, "batch_norm": True, "drop_prob": 0.10198630723385381, "filter_time_length": 51, "final_conv_length": 1, "n_filters_spat": 200, "n_filters_time": 76, "pool_mode": "max", "pool_nonlin": safe_log, "pool_time_length": 139, "pool_time_stride": 49, "split_first_layer": True, } model = ShallowFBCSPNet(in_chans=n_chans, n_classes=n_classes, input_time_length=input_time_length, **shallow_kwargs).create_network() elif model_name == 'linear': model = nn.Sequential() model.add_module("conv_classifier", nn.Conv2d(n_chans, n_classes, (600, 1))) model.add_module('softmax', nn.LogSoftmax()) model.add_module('squeeze', Expression(lambda x: x.squeeze(3))) elif model_name == '3path': virtual_chan_1x1_conv = True mean_across_features = False drop_prob = 0.5 n_start_filters = 10 early_bnorm = False n_classifier_filters = 100 later_kernel_len = 5 extra_conv_stride = 4 # dont forget to reset n_preds_per_blabla model = create_multi_start_path_net( in_chans=n_chans, virtual_chan_1x1_conv=virtual_chan_1x1_conv, n_start_filters=n_start_filters, early_bnorm=early_bnorm, later_kernel_len=later_kernel_len, extra_conv_stride=extra_conv_stride, mean_across_features=mean_across_features, n_classifier_filters=n_classifier_filters, drop_prob=drop_prob) else: assert False, "unknown model name {:s}".format(model_name) if not model_name == '3path': to_dense_prediction_model(model) log.info("Model:\n{:s}".format(str(model))) time_cut_off_sec = np.inf start_time = time.time() # fix input time length in case of smac models if fix_input_length_for_smac: assert ('smac' in model_name) and (input_time_length == 12000) if cuda: model.cuda() test_input = np_to_var( np.ones((2, n_chans, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() try: out = model(test_input) except: raise ValueError("Model receptive field too large...") n_preds_per_input = out.cpu().data.numpy().shape[2] n_receptive_field = input_time_length - n_preds_per_input input_time_length = 2 * n_receptive_field exp = common.run_exp( max_recording_mins, n_recordings, sec_to_cut_at_start, sec_to_cut_at_end, duration_recording_mins, max_abs_val, clip_before_resample, sampling_freq, divisor, n_folds, i_test_fold, shuffle, merge_train_valid, model, input_time_length, optimizer, learning_rate, weight_decay, scheduler, model_constraint, batch_size, max_epochs, only_return_exp, time_cut_off_sec, start_time, test_on_eval, test_recording_mins, sensor_types, log_dir, np_th_seed, ) return exp
def test_trialwise_decoding(): # 5,6,7,10,13,14 are codes for executed and imagined hands/feet subject_id = 1 event_codes = [5, 6, 9, 10, 13, 14] # event_codes = [6] # This will download the files if you don't have them yet, # and then return the paths to the files. physionet_paths = mne.datasets.eegbci.load_data(subject_id, event_codes) # Load each of the files parts = [ mne.io.read_raw_edf(path, preload=True, stim_channel='auto', verbose='WARNING') for path in physionet_paths ] # Concatenate them raw = concatenate_raws(parts) # Find the events in this dataset # events = mne.find_events(raw, shortest_event=0, stim_channel='STI 014') events, _ = mne.events_from_annotations(raw) # Extract trials, only using EEG channels eeg_channel_inds = mne.pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False, exclude='bads') # Extract trials, only using EEG channels epoched = mne.Epochs(raw, events, dict(hands=2, feet=3), tmin=1, tmax=4.1, proj=False, picks=eeg_channel_inds, baseline=None, preload=True) # Convert data from volt to millivolt # Pytorch expects float32 for input and int64 for labels. # X:[90,64,497] X = (epoched.get_data() * 1e6).astype(np.float32) # y:[90] y = (epoched.events[:, 2] - 2).astype(np.int64) # 2,3 -> 0,1 # X_train:[60,64,497], y_train:[60] train_set = SignalAndTarget(X[:60], y=y[:60]) # X_test:[30,64,497], y_test:[30] test_set = SignalAndTarget(X[60:], y=y[60:]) # Set if you want to use GPU # You can also use torch.cuda.is_available() to determine if cuda is available on your machine. cuda = False set_random_seeds(seed=20170629, cuda=cuda) n_classes = 2 in_chans = train_set.X.shape[1] # final_conv_length = auto ensures we only get a single output in the time dimension # def __init__(self, in_chans=64, n_classes=2, input_time_length=497, n_filters_time=40, filter_time_length=25, n_filters_spat=40, pool_time_length=75, pool_time_stride=15, final_conv_length='auto, conv_nonlin=square, pool_mode="mean", pool_nonlin=safe_log, split_first_layer=True, batch_norm=True, batch_norm_alpha=0.1, drop_prob=0.5, ): # 感觉create_network()就是__init__的一部分, 现在改成用self.model调用了, 还是感觉不优雅, 主要是forward集成在nn.Sequential里面了 # 然后这个model的实际__init__不是ShallowFBCSPNet, 而是nn.Sequential, 感觉我更喜欢原来的定义方式, 这种方式看不到中间输出 # model = ShallowFBCSPNet(in_chans=in_chans, n_classes=n_classes, input_time_length=train_set.X.shape[2], final_conv_length='auto').create_network() #原来的 model = ShallowFBCSPNet(in_chans=in_chans, n_classes=n_classes, input_time_length=train_set.X.shape[2], final_conv_length='auto').model if cuda: model.cuda() optimizer = optim.Adam(model.parameters()) rng = RandomState((2017, 6, 30)) losses = [] accuracies = [] for i_epoch in range(6): i_trials_in_batch = get_balanced_batches(len(train_set.X), rng, shuffle=True, batch_size=10) # Set model to training mode model.train() for i_trials in i_trials_in_batch: # Have to add empty fourth dimension to X batch_X = train_set.X[i_trials][:, :, :, None] batch_y = train_set.y[i_trials] net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() # Remove gradients of last backward pass from all parameters optimizer.zero_grad() # Compute outputs of the network #net_in: [10, 64, 497, 1]=[bsz, H_im, W_im, C_im] # outputs = model.forward(net_in) # model=Sequential( # (dimshuffle): Expression(expression=_transpose_time_to_spat) # (conv_time): Conv2d(1, 40, kernel_size=(25, 1), stride=(1, 1)) # (conv_spat): Conv2d(40, 40, kernel_size=(1, 64), stride=(1, 1), bias=False) # (bnorm): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True) # (conv_nonlin): Expression(expression=square) # (pool): AvgPool2d(kernel_size=(75, 1), stride=(15, 1), padding=0) # (pool_nonlin): Expression(expression=safe_log) # (drop): Dropout(p=0.5) # (conv_classifier): Conv2d(40, 2, kernel_size=(27, 1), stride=(1, 1)) # (softmax): LogSoftmax() # (squeeze): Expression(expression=_squeeze_final_output) # ) # Compute the loss loss = F.nll_loss(outputs, net_target) # Do the backpropagation loss.backward() # Update parameters with the optimizer optimizer.step() # Print some statistics each epoch model.eval() print("Epoch {:d}".format(i_epoch)) for setname, dataset in (('Train', train_set), ('Test', test_set)): # Here, we will use the entire dataset at once, which is still possible # for such smaller datasets. Otherwise we would have to use batches. net_in = np_to_var(dataset.X[:, :, :, None]) if cuda: net_in = net_in.cuda() net_target = np_to_var(dataset.y) if cuda: net_target = net_target.cuda() outputs = model(net_in) loss = F.nll_loss(outputs, net_target) losses.append(float(var_to_np(loss))) print("{:6s} Loss: {:.5f}".format(setname, float(var_to_np(loss)))) predicted_labels = np.argmax(var_to_np(outputs), axis=1) accuracy = np.mean(dataset.y == predicted_labels) accuracies.append(accuracy * 100) print("{:6s} Accuracy: {:.1f}%".format(setname, accuracy * 100)) np.testing.assert_allclose(np.array(losses), np.array([ 1.1775966882705688, 1.2602351903915405, 0.7068756818771362, 0.9367912411689758, 0.394258975982666, 0.6598362326622009, 0.3359280526638031, 0.656258761882782, 0.2790488004684448, 0.6104397177696228, 0.27319177985191345, 0.5949864983558655 ]), rtol=1e-4, atol=1e-5) np.testing.assert_allclose(np.array(accuracies), np.array([ 51.666666666666671, 53.333333333333336, 63.333333333333329, 56.666666666666664, 86.666666666666671, 66.666666666666657, 90.0, 63.333333333333329, 96.666666666666671, 56.666666666666664, 96.666666666666671, 66.666666666666657 ]), rtol=1e-4, atol=1e-5)
def run(ex, test_on_eval, sensor_types, n_chans, max_recording_mins, n_recordings, sec_to_cut_at_start, sec_to_cut_at_end, duration_recording_mins, test_recording_mins, max_abs_val, clip_before_resample, sampling_freq, divisor, n_folds, i_test_fold, shuffle, merge_train_valid, model_name, input_time_length, final_conv_length, stride_before_pool, n_start_chans, n_chan_factor, optimizer, learning_rate, weight_decay, scheduler, model_constraint, batch_size, max_epochs, save_predictions, save_crop_predictions, np_th_seed, only_return_exp): log_dir = ex.observers[0].dir kwargs = locals() kwargs.pop('ex') kwargs.pop('save_predictions') kwargs.pop('save_crop_predictions') import sys logging.basicConfig(format='%(asctime)s %(levelname)s : %(message)s', level=logging.DEBUG, stream=sys.stdout) start_time = time.time() ex.info['finished'] = False confirm_gpu_availability() exp = run_exp(**kwargs) end_time = time.time() run_time = end_time - start_time ex.info['finished'] = True if not only_return_exp: last_row = exp.epochs_df.iloc[-1] for key, val in last_row.iteritems(): ex.info[key] = float(val) ex.info['runtime'] = run_time if not only_return_exp: save_pkl_artifact(ex, exp.epochs_df, 'epochs_df.pkl') save_pkl_artifact(ex, exp.before_stop_df, 'before_stop_df.pkl') save_torch_artifact(ex, exp.model.state_dict(), 'model_params.pkl') if save_predictions: exp.model.eval() for setname in ('train', 'valid', 'test'): log.info( "Compute and save predictions for {:s}...".format(setname)) dataset = exp.datasets[setname] log.info("Save labels for {:s}...".format(setname)) save_npy_artifact(ex, dataset.y, '{:s}_trial_labels.npy'.format(setname)) preds_per_batch = [ var_to_np(exp.model(np_to_var(b[0]).cuda())) for b in exp.iterator.get_batches(dataset, shuffle=False) ] preds_per_trial = compute_preds_per_trial( preds_per_batch, dataset, input_time_length=exp.iterator.input_time_length, n_stride=exp.iterator.n_preds_per_input) mean_preds_per_trial = [ np.mean(preds, axis=(0, 2)) for preds in preds_per_trial ] mean_preds_per_trial = np.array(mean_preds_per_trial) log.info("Save trial predictions for {:s}...".format(setname)) save_npy_artifact(ex, mean_preds_per_trial, '{:s}_trial_preds.npy'.format(setname)) if save_crop_predictions: log.info( "Save crop predictions for {:s}...".format(setname)) save_npy_artifact(ex, preds_per_trial, '{:s}_crop_preds.npy'.format(setname)) else: return exp
def create_network(self): pool_class = dict(max=nn.MaxPool2d, mean=nn.AvgPool2d)[self.pool_mode] if self.split_first_layer: self.add_module("dimshuffle", Expression(_transpose_time_to_spat)) self.add_module( "conv_time", nn.Conv2d( 1, self.n_filters_time, (self.filter_time_length, 1), stride=1, ), ) self.add_module( "conv_spat", nn.Conv2d( self.n_filters_time, self.n_filters_spat, (1, self.in_chans), stride=1, bias=not self.batch_norm, ), ) n_filters_conv = self.n_filters_spat else: self.add_module( "conv_time", nn.Conv2d( self.in_chans, self.n_filters_time, (self.filter_time_length, 1), stride=1, bias=not self.batch_norm, ), ) n_filters_conv = self.n_filters_time if self.batch_norm: self.add_module( "bnorm", nn.BatchNorm2d( n_filters_conv, momentum=self.batch_norm_alpha, affine=True ), ) self.add_module("conv_nonlin", Expression(self.conv_nonlin_func)) self.add_module( "pool", pool_class( kernel_size=(self.pool_time_length, 1), stride=(self.pool_time_stride, 1), ), ) self.add_module("pool_nonlin", Expression(self.pool_nonlin_func)) self.add_module("drop", nn.Dropout(p=self.drop_prob)) if self.final_conv_length == "auto": out = self( np_to_var( np.ones( (1, self.in_chans, self.input_time_length, 1), dtype=np.float32 ) ) ) n_out_time = out.cpu().data.numpy().shape[2] self.final_conv_length = n_out_time self.add_module( "conv_classifier", nn.Conv2d( n_filters_conv, self.n_classes, (self.final_conv_length, 1), bias=True ), ) self.add_module("softmax", nn.LogSoftmax(dim=1)) self.add_module("squeeze", Expression(_squeeze_final_output)) # Initialization, xavier is same as in paper... init.xavier_uniform_(self.conv_time.weight, gain=1) # maybe no bias in case of no split layer and batch norm if self.split_first_layer or (not self.batch_norm): init.constant_(self.conv_time.bias, 0) if self.split_first_layer: init.xavier_uniform_(self.conv_spat.weight, gain=1) if not self.batch_norm: init.constant_(self.conv_spat.bias, 0) if self.batch_norm: init.constant_(self.bnorm.weight, 1) init.constant_(self.bnorm.bias, 0) init.xavier_uniform_(self.conv_classifier.weight, gain=1) init.constant_(self.conv_classifier.bias, 0)
def __init__(self, in_chans, n_classes, input_time_length=None, n_filters_time=40, filter_time_length=25, n_filters_spat=40, pool_time_length=75, pool_time_stride=15, final_conv_length=30, conv_nonlin=square, pool_mode='mean', pool_nonlin=safe_log, split_first_layer=True, batch_norm=True, batch_norm_alpha=0.1, drop_prob=0.5, siamese=False, i_feature_alignment_layer=None): super(ShallowConvNet, self).__init__() if i_feature_alignment_layer is None: i_feature_alignment_layer = 1 # default alignment layer if final_conv_length == 'auto': assert input_time_length is not None self.__dict__.update(locals()) del self.self pool_class = dict(max=nn.MaxPool2d, mean=nn.AvgPool2d)[self.pool_mode] n_filters_conv = self.n_filters_spat self.temporal_conv = nn.Sequential( Expression(_transpose_time_to_spat), nn.Conv2d(1, self.n_filters_time, (self.filter_time_length, 1), stride=1)) self.spatial_conv = nn.Sequential( nn.Conv2d(self.n_filters_time, self.n_filters_spat, (1, self.in_chans), stride=1, bias=not self.batch_norm), nn.BatchNorm2d(n_filters_conv, momentum=self.batch_norm_alpha, affine=True), Expression(self.conv_nonlin), pool_class(kernel_size=(self.pool_time_length, 1), stride=(self.pool_time_stride, 1)), Expression(self.pool_nonlin)) if self.final_conv_length == 'auto': out = np_to_var( np.ones((1, self.in_chans, self.input_time_length, 1), dtype=np.float32)) out = self.forward_once(out) n_out_time = out.cpu().data.numpy().shape[2] self.final_conv_length = n_out_time self.conv_cls = nn.Sequential( nn.Dropout(p=self.drop_prob), nn.Conv2d(n_filters_conv, self.n_classes, (self.final_conv_length, 1), bias=True), nn.LogSoftmax(dim=1), Expression(_squeeze_final_output)) # Initialize weights of the network self.apply(glorot_weight_zero_bias) # Set feature space alignment layer, used in siamese training/testing assert 0 <= self.i_feature_alignment_layer < len(self._modules), \ "Given feature space alignment layer does not " \ "exist for current model" self.feature_alignment_layer = \ list(self._modules.items())[self.i_feature_alignment_layer][0]
def create_network(self): print('creating ResNet!') model = nn.Sequential() if self.split_first_layer: model.add_module('dimshuffle', Expression(_transpose_time_to_spat)) model.add_module( 'conv_time', nn.Conv2d(1, self.n_first_filters, (self.first_filter_length, 1), stride=1, padding=(self.first_filter_length // 2, 0))) model.add_module( 'conv_spat', nn.Conv2d(self.n_first_filters, self.n_first_filters, (1, self.in_chans), stride=(1, 1), bias=False)) else: model.add_module( 'conv_time', nn.Conv2d( self.in_chans, self.n_first_filters, (self.first_filter_length, 1), stride=(1, 1), padding=(self.first_filter_length // 2, 0), bias=False, )) n_filters_conv = self.n_first_filters model.add_module( 'bnorm', nn.BatchNorm2d(n_filters_conv, momentum=self.batch_norm_alpha, affine=True, eps=1e-5), ) model.add_module('conv_nonlin', Expression(self.nonlinearity)) cur_dilation = np.array([1, 1]) n_cur_filters = n_filters_conv i_block = 1 for i_layer in range(self.n_layers_per_block): model.add_module( 'res_{:d}_{:d}'.format(i_block, i_layer), ResidualBlock(n_cur_filters, n_cur_filters, dilation=cur_dilation)) i_block += 1 cur_dilation[0] *= 2 n_out_filters = int(2 * n_cur_filters) model.add_module( 'res_{:d}_{:d}'.format(i_block, 0), ResidualBlock( n_cur_filters, n_out_filters, dilation=cur_dilation, )) n_cur_filters = n_out_filters for i_layer in range(1, self.n_layers_per_block): model.add_module( 'res_{:d}_{:d}'.format(i_block, i_layer), ResidualBlock(n_cur_filters, n_cur_filters, dilation=cur_dilation)) i_block += 1 cur_dilation[0] *= 2 n_out_filters = int(1.5 * n_cur_filters) model.add_module( 'res_{:d}_{:d}'.format(i_block, 0), ResidualBlock( n_cur_filters, n_out_filters, dilation=cur_dilation, )) n_cur_filters = n_out_filters for i_layer in range(1, self.n_layers_per_block): model.add_module( 'res_{:d}_{:d}'.format(i_block, i_layer), ResidualBlock(n_cur_filters, n_cur_filters, dilation=cur_dilation)) i_block += 1 cur_dilation[0] *= 2 model.add_module( 'res_{:d}_{:d}'.format(i_block, 0), ResidualBlock( n_cur_filters, n_cur_filters, dilation=cur_dilation, )) for i_layer in range(1, self.n_layers_per_block): model.add_module( 'res_{:d}_{:d}'.format(i_block, i_layer), ResidualBlock(n_cur_filters, n_cur_filters, dilation=cur_dilation)) i_block += 1 cur_dilation[0] *= 2 model.add_module( 'res_{:d}_{:d}'.format(i_block, 0), ResidualBlock( n_cur_filters, n_cur_filters, dilation=cur_dilation, )) for i_layer in range(1, self.n_layers_per_block): model.add_module( 'res_{:d}_{:d}'.format(i_block, i_layer), ResidualBlock(n_cur_filters, n_cur_filters, dilation=cur_dilation)) i_block += 1 cur_dilation[0] *= 2 model.add_module( 'res_{:d}_{:d}'.format(i_block, 0), ResidualBlock( n_cur_filters, n_cur_filters, dilation=cur_dilation, )) for i_layer in range(1, self.n_layers_per_block): model.add_module( 'res_{:d}_{:d}'.format(i_block, i_layer), ResidualBlock(n_cur_filters, n_cur_filters, dilation=cur_dilation)) i_block += 1 cur_dilation[0] *= 2 model.add_module( 'res_{:d}_{:d}'.format(i_block, 0), ResidualBlock( n_cur_filters, n_cur_filters, dilation=cur_dilation, )) model.eval() if self.final_pool_length == 'auto': print('Final Pool length is auto!') out = model( np_to_var( np.ones((1, self.in_chans, self.input_time_length, 1), dtype=np.float32))) n_out_time = out.cpu().data.numpy().shape[2] self.final_pool_length = n_out_time # model.add_module('mean_pool', AvgPool2dWithConv( # (self.final_pool_length, 1), (1,1), dilation=(int(cur_dilation[0]), # int(cur_dilation[1])))) # model.add_module('conv_classifier', # nn.Conv2d(n_cur_filters, self.n_classes, # (1, 1), bias=True)) # start added code martin model.add_module( 'conv_classifier', nn.Conv2d(n_cur_filters, self.n_classes, (self.final_pool_length, 1), bias=True)) #end added code martin model.add_module('softmax', nn.LogSoftmax()) model.add_module('squeeze', Expression(_squeeze_final_output)) # Initialize all weights model.apply( lambda module: weights_init(module, self.conv_weight_init_fn)) # Start in eval mode model.eval() return model
input_time_length=input_time_length, final_conv_length=12).create_network() model # model = Deep4Net(in_chans=in_chans, n_classes=n_classes, input_time_length=input_time_length, # final_conv_length=12).create_network() to_dense_prediction_model(model) if cuda: model.cuda() optimizer = optim.Adam(model.parameters()) # determine output size test_input = np_to_var( np.ones((2, in_chans, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] print("{:d} predictions per input/trial".format(n_preds_per_input)) iterator = CropsFromTrialsIterator(batch_size=16, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) rng = RandomState((2017, 6, 30)) for i_epoch in range(20): # Set model to training mode model.train() for batch_X, batch_y in iterator.get_batches(train_set, shuffle=False):
config.max_epochs, config.cuda, ) end_time = time.time() run_time = end_time - start_time log.info("Experiment runtime: {:.2f} sec".format(run_time)) # In case you want to recompute predictions for further analysis: exp.model.eval() for setname in ('train', 'valid', 'test'): log.info("Compute predictions for {:s}...".format(setname)) dataset = exp.datasets[setname] if config.cuda: preds_per_batch = [ var_to_np(exp.model(np_to_var(b[0]).cuda())) for b in exp.iterator.get_batches(dataset, shuffle=False) ] else: preds_per_batch = [ var_to_np(exp.model(np_to_var(b[0]))) for b in exp.iterator.get_batches(dataset, shuffle=False) ] preds_per_trial = compute_preds_per_trial( preds_per_batch, dataset, input_time_length=exp.iterator.input_time_length, n_stride=exp.iterator.n_preds_per_input) mean_preds_per_trial = [ np.mean(preds, axis=(0, 2)) for preds in preds_per_trial ]
def fit( self, train_X, train_y, epochs, batch_size, input_time_length=None, validation_data=None, model_constraint=None, remember_best_column=None, scheduler=None, log_0_epoch=True, ): """ Fit the model using the given training data. Will set `epochs_df` variable with a pandas dataframe to the history of the training process. Parameters ---------- train_X: ndarray Training input data train_y: 1darray Training labels epochs: int Number of epochs to train batch_size: int input_time_length: int, optional Super crop size, what temporal size is pushed forward through the network, see cropped decoding tuturial. validation_data: (ndarray, 1darray), optional X and y for validation set if wanted model_constraint: object, optional You can supply :class:`.MaxNormDefaultConstraint` if wanted. remember_best_column: string, optional In case you want to do an early stopping/reset parameters to some "best" epoch, define here the monitored value whose minimum determines the best epoch. scheduler: 'cosine' or None, optional Whether to use cosine annealing (:class:`.CosineAnnealing`). log_0_epoch: bool Whether to compute the metrics once before training as well. Returns ------- exp: Underlying braindecode :class:`.Experiment` """ if (not hasattr(self, "compiled")) or (not self.compiled): raise ValueError( "Compile the model first by calling model.compile(loss, optimizer, metrics)" ) if self.cropped and input_time_length is None: raise ValueError( "In cropped mode, need to specify input_time_length," "which is the number of timesteps that will be pushed through" "the network in a single pass.") train_X = _ensure_float32(train_X) if self.cropped: self.network.eval() test_input = np_to_var( np.ones( (1, train_X[0].shape[0], input_time_length) + train_X[0].shape[2:], dtype=np.float32, )) while len(test_input.size()) < 4: test_input = test_input.unsqueeze(-1) if self.is_cuda: test_input = test_input.cuda() out = self.network(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] self.iterator = CropsFromTrialsIterator( batch_size=batch_size, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input, seed=self.seed_rng.randint(0, np.iinfo(np.int32).max - 1), ) else: self.iterator = BalancedBatchSizeIterator( batch_size=batch_size, seed=self.seed_rng.randint(0, np.iinfo(np.int32).max - 1), ) if log_0_epoch: stop_criterion = MaxEpochs(epochs) else: stop_criterion = MaxEpochs(epochs - 1) train_set = SignalAndTarget(train_X, train_y) optimizer = self.optimizer if scheduler is not None: assert (scheduler == "cosine" ), "Supply either 'cosine' or None as scheduler." n_updates_per_epoch = sum([ 1 for _ in self.iterator.get_batches(train_set, shuffle=True) ]) n_updates_per_period = n_updates_per_epoch * epochs if scheduler == "cosine": scheduler = CosineAnnealing(n_updates_per_period) schedule_weight_decay = False if optimizer.__class__.__name__ == "AdamW": schedule_weight_decay = True optimizer = ScheduledOptimizer( scheduler, self.optimizer, schedule_weight_decay=schedule_weight_decay, ) loss_function = self.loss if self.cropped: loss_function = lambda outputs, targets: self.loss( th.mean(outputs, dim=2), targets) if validation_data is not None: valid_X = _ensure_float32(validation_data[0]) valid_y = validation_data[1] valid_set = SignalAndTarget(valid_X, valid_y) else: valid_set = None test_set = None self.monitors = [LossMonitor()] if self.cropped: self.monitors.append( CroppedTrialMisclassMonitor(input_time_length)) else: self.monitors.append(MisclassMonitor()) if self.extra_monitors is not None: self.monitors.extend(self.extra_monitors) self.monitors.append(RuntimeMonitor()) exp = Experiment( self.network, train_set, valid_set, test_set, iterator=self.iterator, loss_function=loss_function, optimizer=optimizer, model_constraint=model_constraint, monitors=self.monitors, stop_criterion=stop_criterion, remember_best_column=remember_best_column, run_after_early_stop=False, cuda=self.is_cuda, log_0_epoch=log_0_epoch, do_early_stop=(remember_best_column is not None), ) exp.run() self.epochs_df = exp.epochs_df return exp
def run_exp(max_recording_mins, n_recordings, sec_to_cut_at_start, sec_to_cut_at_end, duration_recording_mins, max_abs_val, clip_before_resample, sampling_freq, divisor, n_folds, i_test_fold, shuffle, merge_train_valid, model, input_time_length, optimizer, learning_rate, weight_decay, scheduler, model_constraint, batch_size, max_epochs, only_return_exp, time_cut_off_sec, start_time, test_on_eval, test_recording_mins, sensor_types, log_dir, np_th_seed, cuda=True): import torch.backends.cudnn as cudnn cudnn.benchmark = True if optimizer == 'adam': assert merge_train_valid == False else: assert optimizer == 'adamw' assert merge_train_valid == True preproc_functions = create_preproc_functions( sec_to_cut_at_start=sec_to_cut_at_start, sec_to_cut_at_end=sec_to_cut_at_end, duration_recording_mins=duration_recording_mins, max_abs_val=max_abs_val, clip_before_resample=clip_before_resample, sampling_freq=sampling_freq, divisor=divisor) dataset = DiagnosisSet(n_recordings=n_recordings, max_recording_mins=max_recording_mins, preproc_functions=preproc_functions, train_or_eval='train', sensor_types=sensor_types) if test_on_eval: if test_recording_mins is None: test_recording_mins = duration_recording_mins test_preproc_functions = create_preproc_functions( sec_to_cut_at_start=sec_to_cut_at_start, sec_to_cut_at_end=sec_to_cut_at_end, duration_recording_mins=test_recording_mins, max_abs_val=max_abs_val, clip_before_resample=clip_before_resample, sampling_freq=sampling_freq, divisor=divisor) test_dataset = DiagnosisSet(n_recordings=n_recordings, max_recording_mins=None, preproc_functions=test_preproc_functions, train_or_eval='eval', sensor_types=sensor_types) if not only_return_exp: X, y = dataset.load() max_shape = np.max([list(x.shape) for x in X], axis=0) assert max_shape[1] == int(duration_recording_mins * sampling_freq * 60) if test_on_eval: test_X, test_y = test_dataset.load() max_shape = np.max([list(x.shape) for x in test_X], axis=0) assert max_shape[1] == int(test_recording_mins * sampling_freq * 60) if not test_on_eval: splitter = TrainValidTestSplitter(n_folds, i_test_fold, shuffle=shuffle) else: splitter = TrainValidSplitter(n_folds, i_valid_fold=i_test_fold, shuffle=shuffle) if not only_return_exp: if not test_on_eval: train_set, valid_set, test_set = splitter.split(X, y) else: train_set, valid_set = splitter.split(X, y) test_set = SignalAndTarget(test_X, test_y) del test_X, test_y del X, y # shouldn't be necessary, but just to make sure if merge_train_valid: train_set = concatenate_sets([train_set, valid_set]) # just reduce valid for faster computations valid_set.X = valid_set.X[:8] valid_set.y = valid_set.y[:8] # np.save('/data/schirrmr/schirrmr/auto-diag/lukasrepr/compare/mne-0-16-2/train_X.npy', train_set.X) # np.save('/data/schirrmr/schirrmr/auto-diag/lukasrepr/compare/mne-0-16-2/train_y.npy', train_set.y) # np.save('/data/schirrmr/schirrmr/auto-diag/lukasrepr/compare/mne-0-16-2/valid_X.npy', valid_set.X) # np.save('/data/schirrmr/schirrmr/auto-diag/lukasrepr/compare/mne-0-16-2/valid_y.npy', valid_set.y) # np.save('/data/schirrmr/schirrmr/auto-diag/lukasrepr/compare/mne-0-16-2/test_X.npy', test_set.X) # np.save('/data/schirrmr/schirrmr/auto-diag/lukasrepr/compare/mne-0-16-2/test_y.npy', test_set.y) else: train_set = None valid_set = None test_set = None log.info("Model:\n{:s}".format(str(model))) if cuda: model.cuda() model.eval() in_chans = 21 # determine output size test_input = np_to_var( np.ones((2, in_chans, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] log.info("{:d} predictions per input/trial".format(n_preds_per_input)) iterator = CropsFromTrialsIterator(batch_size=batch_size, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input, seed=np_th_seed) assert optimizer in ['adam', 'adamw'], ("Expect optimizer to be either " "adam or adamw") schedule_weight_decay = optimizer == 'adamw' if optimizer == 'adam': optim_class = optim.Adam assert schedule_weight_decay == False assert merge_train_valid == False else: optim_class = AdamW assert schedule_weight_decay == True assert merge_train_valid == True optimizer = optim_class(model.parameters(), lr=learning_rate, weight_decay=weight_decay) if scheduler is not None: assert scheduler == 'cosine' n_updates_per_epoch = sum( [1 for _ in iterator.get_batches(train_set, shuffle=True)]) # Adapt if you have a different number of epochs n_updates_per_period = n_updates_per_epoch * max_epochs scheduler = CosineAnnealing(n_updates_per_period) optimizer = ScheduledOptimizer( scheduler, optimizer, schedule_weight_decay=schedule_weight_decay) loss_function = nll_loss_on_mean if model_constraint is not None: assert model_constraint == 'defaultnorm' model_constraint = MaxNormDefaultConstraint() monitors = [ LossMonitor(), MisclassMonitor(col_suffix='sample_misclass'), CroppedDiagnosisMonitor(input_time_length, n_preds_per_input), RuntimeMonitor(), ] stop_criterion = MaxEpochs(max_epochs) loggers = [Printer(), TensorboardWriter(log_dir)] batch_modifier = None exp = Experiment(model, train_set, valid_set, test_set, iterator, loss_function, optimizer, model_constraint, monitors, stop_criterion, remember_best_column='valid_misclass', run_after_early_stop=True, batch_modifier=batch_modifier, cuda=cuda, loggers=loggers) if not only_return_exp: # Until first stop exp.setup_training() exp.monitor_epoch(exp.datasets) exp.log_epoch() exp.rememberer.remember_epoch(exp.epochs_df, exp.model, exp.optimizer) exp.iterator.reset_rng() while not exp.stop_criterion.should_stop(exp.epochs_df): if (time.time() - start_time) > time_cut_off_sec: log.info( "Ran out of time after {:.2f} sec.".format(time.time() - start_time)) return exp log.info("Still in time after {:.2f} sec.".format(time.time() - start_time)) exp.run_one_epoch(exp.datasets, remember_best=True) if (time.time() - start_time) > time_cut_off_sec: log.info("Ran out of time after {:.2f} sec.".format(time.time() - start_time)) return exp if not merge_train_valid: exp.setup_after_stop_training() # Run until second stop datasets = exp.datasets datasets['train'] = concatenate_sets( [datasets['train'], datasets['valid']]) exp.monitor_epoch(datasets) exp.log_epoch() exp.iterator.reset_rng() while not exp.stop_criterion.should_stop(exp.epochs_df): if (time.time() - start_time) > time_cut_off_sec: log.info("Ran out of time after {:.2f} sec.".format( time.time() - start_time)) return exp log.info("Still in time after {:.2f} sec.".format(time.time() - start_time)) exp.run_one_epoch(datasets, remember_best=False) else: exp.dataset = dataset exp.splitter = splitter if test_on_eval: exp.test_dataset = test_dataset return exp
def runModel(mode): cudnn.benchmark = True start = time.time() #mode = str(sys.argv[1]) #X,y,test_X,test_y = loadSubNormData(mode='all') #X,y,test_X,test_y = loadNEDCdata(mode=mode) #data = np.load('sessionsData/data%s-sessions.npy'%mode[:3]) #labels = np.load('sessionsData/labels%s-sessions.npy'%mode[:3]) data = np.load('data%s.npy' % mode[:3]) labels = np.load('labels%s.npy' % mode[:3]) X, y, test_X, test_y = splitDataRandom_Loaded(data, labels, mode) print('Mode - %s Total n: %d, Test n: %d' % (mode, len(y) + len(test_y), len(test_y))) #return 0 #X = addDataNoise(X,band=[1,4]) #test_X = addDataNoise(test_X,band=[1,4]) max_shape = np.max([list(x.shape) for x in X], axis=0) assert max_shape[1] == int(config.duration_recording_mins * config.sampling_freq * 60) n_classes = 2 n_recordings = None # set to an integer, if you want to restrict the set size sensor_types = ["EEG"] n_chans = 19 #21 max_recording_mins = 35 # exclude larger recordings from training set sec_to_cut = 60 # cut away at start of each recording duration_recording_mins = 5 #20 # how many minutes to use per recording test_recording_mins = 5 #20 max_abs_val = 800 # for clipping sampling_freq = 100 divisor = 10 # divide signal by this test_on_eval = True # teston evaluation set or on training set # in case of test on eval, n_folds and i_testfold determine # validation fold in training set for training until first stop n_folds = 10 i_test_fold = 9 shuffle = True model_name = 'linear' #'deep'#'shallow' 'linear' n_start_chans = 25 n_chan_factor = 2 # relevant for deep model only input_time_length = 6000 final_conv_length = 1 model_constraint = 'defaultnorm' init_lr = 1e-3 batch_size = 64 max_epochs = 35 # until first stop, the continue train on train+valid cuda = True # False if model_name == 'shallow': model = ShallowFBCSPNet( in_chans=n_chans, n_classes=n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, final_conv_length=final_conv_length).create_network() elif model_name == 'deep': model = Deep4Net(n_chans, n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, n_filters_2=int(n_start_chans * n_chan_factor), n_filters_3=int(n_start_chans * (n_chan_factor**2.0)), n_filters_4=int(n_start_chans * (n_chan_factor**3.0)), final_conv_length=final_conv_length, stride_before_pool=True).create_network() elif (model_name == 'deep_smac'): if model_name == 'deep_smac': do_batch_norm = False else: assert model_name == 'deep_smac_bnorm' do_batch_norm = True double_time_convs = False drop_prob = 0.244445 filter_length_2 = 12 filter_length_3 = 14 filter_length_4 = 12 filter_time_length = 21 final_conv_length = 1 first_nonlin = elu first_pool_mode = 'mean' first_pool_nonlin = identity later_nonlin = elu later_pool_mode = 'mean' later_pool_nonlin = identity n_filters_factor = 1.679066 n_filters_start = 32 pool_time_length = 1 pool_time_stride = 2 split_first_layer = True n_chan_factor = n_filters_factor n_start_chans = n_filters_start model = Deep4Net(n_chans, n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, n_filters_2=int(n_start_chans * n_chan_factor), n_filters_3=int(n_start_chans * (n_chan_factor**2.0)), n_filters_4=int(n_start_chans * (n_chan_factor**3.0)), final_conv_length=final_conv_length, batch_norm=do_batch_norm, double_time_convs=double_time_convs, drop_prob=drop_prob, filter_length_2=filter_length_2, filter_length_3=filter_length_3, filter_length_4=filter_length_4, filter_time_length=filter_time_length, first_nonlin=first_nonlin, first_pool_mode=first_pool_mode, first_pool_nonlin=first_pool_nonlin, later_nonlin=later_nonlin, later_pool_mode=later_pool_mode, later_pool_nonlin=later_pool_nonlin, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, split_first_layer=split_first_layer, stride_before_pool=True).create_network() elif model_name == 'shallow_smac': conv_nonlin = identity do_batch_norm = True drop_prob = 0.328794 filter_time_length = 56 final_conv_length = 22 n_filters_spat = 73 n_filters_time = 24 pool_mode = 'max' pool_nonlin = identity pool_time_length = 84 pool_time_stride = 3 split_first_layer = True model = ShallowFBCSPNet( in_chans=n_chans, n_classes=n_classes, n_filters_time=n_filters_time, n_filters_spat=n_filters_spat, input_time_length=input_time_length, final_conv_length=final_conv_length, conv_nonlin=conv_nonlin, batch_norm=do_batch_norm, drop_prob=drop_prob, filter_time_length=filter_time_length, pool_mode=pool_mode, pool_nonlin=pool_nonlin, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, split_first_layer=split_first_layer, ).create_network() elif model_name == 'linear': model = nn.Sequential() model.add_module("conv_classifier", nn.Conv2d(n_chans, n_classes, (600, 1))) model.add_module('softmax', nn.LogSoftmax(dim=1)) model.add_module('squeeze', Expression(lambda x: x.squeeze(3))) else: assert False, "unknown model name {:s}".format(model_name) to_dense_prediction_model(model) if config.cuda: model.cuda() test_input = np_to_var( np.ones((2, config.n_chans, config.input_time_length, 1), dtype=np.float32)) if config.cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] iterator = CropsFromTrialsIterator( batch_size=config.batch_size, input_time_length=config.input_time_length, n_preds_per_input=n_preds_per_input) #model.add_module('softmax', nn.LogSoftmax(dim=1)) model.eval() mode[2] = str(mode[2]) mode[3] = str(mode[3]) modelName = '-'.join(mode[:4]) #params = th.load('sessionsData/%sModel%s-sessions.pt'%(modelName,mode[4])) #params = th.load('%sModel%s.pt'%(modelName,mode[4])) params = th.load('linear/%sModel%s.pt' % (modelName, mode[4])) model.load_state_dict(params) if config.test_on_eval: #test_X, test_y = test_dataset.load() #test_X, test_y = loadNEDCdata(mode='eval') max_shape = np.max([list(x.shape) for x in test_X], axis=0) assert max_shape[1] == int(config.test_recording_mins * config.sampling_freq * 60) if not config.test_on_eval: splitter = TrainValidTestSplitter(config.n_folds, config.i_test_fold, shuffle=config.shuffle) train_set, valid_set, test_set = splitter.split(X, y) else: splitter = TrainValidSplitter(config.n_folds, i_valid_fold=config.i_test_fold, shuffle=config.shuffle) train_set, valid_set = splitter.split(X, y) test_set = SignalAndTarget(test_X, test_y) del test_X, test_y del X, y # shouldn't be necessary, but just to make sure datasets = OrderedDict( (('train', train_set), ('valid', valid_set), ('test', test_set))) for setname in ('train', 'valid', 'test'): #setname = 'test' #print("Compute predictions for {:s}...".format(setname)) dataset = datasets[setname] if config.cuda: preds_per_batch = [ var_to_np(model(np_to_var(b[0]).cuda())) for b in iterator.get_batches(dataset, shuffle=False) ] else: preds_per_batch = [ var_to_np(model(np_to_var(b[0]))) for b in iterator.get_batches(dataset, shuffle=False) ] preds_per_trial = compute_preds_per_trial( preds_per_batch, dataset, input_time_length=iterator.input_time_length, n_stride=iterator.n_preds_per_input) mean_preds_per_trial = [ np.mean(preds, axis=(0, 2)) for preds in preds_per_trial ] mean_preds_per_trial = np.array(mean_preds_per_trial) all_pred_labels = np.argmax(mean_preds_per_trial, axis=1).squeeze() all_target_labels = dataset.y acc_per_class = [] for i_class in range(n_classes): mask = all_target_labels == i_class acc = np.mean(all_pred_labels[mask] == all_target_labels[mask]) acc_per_class.append(acc) misclass = 1 - np.mean(acc_per_class) #print('Acc:{}, Class 0:{}, Class 1:{}'.format(np.mean(acc_per_class),acc_per_class[0],acc_per_class[1])) if setname == 'test': testResult = np.mean(acc_per_class) return testResult
def fit_transform_2(model, optimizer, train_data, y_train, test_data, y_test, num_epochs=20, n_channels=22, input_time_length=500): train_set = SignalAndTarget(train_data, y=y_train) test_set = SignalAndTarget(test_data, y=y_test) # # # # # # # # CREATE CROPPED ITERATOR # # # # # # # # # # determine output size test_input = np_to_var( np.ones((2, n_channels, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] iterator = CropsFromTrialsIterator(batch_size=32, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) accuracy_out = [] min_loss = 1000 for i_epoch in range(num_epochs): # Set model to training mode model.train() for batch_X, batch_y in iterator.get_batches(train_set, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() # print(batch_y) net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() # Remove gradients of last backward pass from all parameters optimizer.zero_grad() outputs = model(net_in) # Mean predictions across trial # Note that this will give identical gradients to computing # a per-prediction loss (at least for the combination of log softmax activation # and negative log likelihood loss which we are using here) outputs = th.mean(outputs, dim=2, keepdim=False) loss = F.nll_loss(outputs, net_target) loss.backward() optimizer.step() # Print some statistics each epoch model.eval() # print("Epoch {:d}".format(i_epoch)) for setname, dataset in (('Train', train_set), ('Test', test_set)): # Collect all predictions and losses all_preds = [] all_losses = [] batch_sizes = [] for batch_X, batch_y in iterator.get_batches(dataset, shuffle=False): net_in = np_to_var(batch_X) if cuda: net_in = net_in.cuda() net_target = np_to_var(batch_y) if cuda: net_target = net_target.cuda() outputs = model(net_in) all_preds.append(var_to_np(outputs)) outputs = th.mean(outputs, dim=2, keepdim=False) loss = F.nll_loss(outputs, net_target) loss = float(var_to_np(loss)) all_losses.append(loss) batch_sizes.append(len(batch_X)) # Compute mean per-input loss loss = np.mean( np.array(all_losses) * np.array(batch_sizes) / np.mean(batch_sizes)) # print("{:6s} Loss: {:.5f}".format(setname, loss)) # Assign the predictions to the trials preds_per_trial = compute_preds_per_trial_from_crops( all_preds, input_time_length, dataset.X) # preds per trial are now trials x classes x timesteps/predictions # Now mean across timesteps for each trial to get per-trial predictions meaned_preds_per_trial = np.array( [np.mean(p, axis=1) for p in preds_per_trial]) predicted_labels = np.argmax(meaned_preds_per_trial, axis=1) accuracy = np.mean(predicted_labels == dataset.y) # print("{:6s} Accuracy: {:.2f}%".format(setname, accuracy * 100)) if setname == 'Test': accuracy_out.append(accuracy) if loss < min_loss: min_loss = loss elif loss > min_loss * 1.1: print("Training Stopping") return model, np.asarray(accuracy_out) return model, np.asarray(accuracy_out)
def get_dummy_input(): input_shape = (2, global_vars.get('eeg_chans'), global_vars.get('input_time_len'), 1) return np_to_var(np.random.random(input_shape).astype(np.float32))