def __init__(self, conv_nonlin=square, pool_nonlin=safe_log): self.__dict__.update(locals()) # del self.self super(SelfShallow_loss12,self).__init__() # self.conv = Sequential() self.transpose = Expression(_transpose_time_to_spat) self.conv1_1 = nn.Sequential(nn.Conv2d(1, 40, kernel_size=(25, 1), stride=(1, 1))) self.conv1_2 = nn.Sequential(nn.Conv2d(40, 40, kernel_size=(1, 22), stride=(1, 1), bias=False), nn.BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), Expression(conv_nonlin)) self.pool1_1 = nn.Sequential(nn.AvgPool2d(kernel_size=(75, 1), stride=(15, 1), padding=0), Expression(pool_nonlin), nn.Dropout(p = 0.5)) self.classfier = nn.Sequential(nn.Conv2d(40, 4, kernel_size=(69, 1), stride=(1, 1)), nn.LogSoftmax(dim = 1), Expression(_squeeze_final_output)) self.guide = Waveguide()
def create_network(self): pool_class = dict(max=nn.MaxPool2d, mean=nn.AvgPool2d)[self.pool_mode] model = nn.Sequential() if self.split_first_layer: model.add_module('dimshuffle', Expression(_transpose_time_to_spat)) model.add_module('conv_time', nn.Conv2d(1, self.n_filters_time, ( self.filter_time_length, 1), stride=1, )) model.add_module('conv_spat', nn.Conv2d(self.n_filters_time, self.n_filters_spat, (1, self.in_chans), stride=1, bias=not self.batch_norm)) n_filters_conv = self.n_filters_spat else: model.add_module('conv_time', nn.Conv2d(self.in_chans, self.n_filters_time, (self.filter_time_length, 1), stride=1, bias=not self.batch_norm)) n_filters_conv = self.n_filters_time if self.batch_norm: model.add_module('bnorm', nn.BatchNorm2d(n_filters_conv, momentum=self.batch_norm_alpha, affine=True),) model.add_module('conv_nonlin', Expression(self.conv_nonlin)) model.add_module('pool', pool_class(kernel_size=(self.pool_time_length, 1), stride=(self.pool_time_stride, 1))) model.add_module('pool_nonlin', Expression(self.pool_nonlin)) model.add_module('drop', nn.Dropout(p=self.drop_prob)) if self.final_conv_length == 'auto': out = model(np_to_var(np.ones( (1, self.in_chans, self.input_time_length,1), dtype=np.float32))) n_out_time = out.cpu().data.numpy().shape[2] self.final_conv_length = n_out_time model.add_module('conv_classifier', nn.Conv2d(n_filters_conv, self.n_classes, (self.final_conv_length, 1), bias=True)) model.add_module('softmax', nn.LogSoftmax()) model.add_module('squeeze', Expression(_squeeze_final_output)) # Initialization, xavier is same as in paper... init.xavier_uniform(model.conv_time.weight, gain=1) # maybe no bias in case of no split layer and batch norm if self.split_first_layer or (not self.batch_norm): init.constant(model.conv_time.bias, 0) if self.split_first_layer: init.xavier_uniform(model.conv_spat.weight, gain=1) if not self.batch_norm: init.constant(model.conv_spat.bias, 0) if self.batch_norm: init.constant(model.bnorm.weight, 1) init.constant(model.bnorm.bias, 0) init.xavier_uniform(model.conv_classifier.weight, gain=1) init.constant(model.conv_classifier.bias, 0) return model
def add_conv_pool_block(model, n_filters_before, n_filters, filter_length, block_nr): model.add_module( f"conv_{block_nr}", nn.Conv2d(n_filters_before, n_filters, (filter_length, 1), stride=(conv_stride, 1), bias=not self.batch_norm)) if self.batch_norm: model.add_module( f"bnorm_{block_nr}", nn.BatchNorm2d(n_filters, momentum=self.batch_norm_alpha, affine=True, eps=1e-5)) model.add_module(f"nonlin_{block_nr}", Expression(self.conv_nonlin)) model.add_module(f"drop_{block_nr}", nn.Dropout(p=self.drop_prob)) model.add_module( "pool", first_pool_class(kernel_size=(self.pool_length_2, 1), stride=(self.pool_stride_2, 1)), ) model.add_module("pool_nonlin", Expression(self.pool_nonlin))
def add_conv_pool_block(model, n_filters_before, n_filters, filter_length, block_nr): suffix = "_{:d}".format(block_nr) model.add_module("drop" + suffix, nn.Dropout(p=self.drop_prob)) model.add_module( "conv" + suffix, nn.Conv2d( n_filters_before, n_filters, (filter_length, 1), stride=(conv_stride, 1), bias=not self.batch_norm, ), ) if self.batch_norm: model.add_module( "bnorm" + suffix, nn.BatchNorm2d( n_filters, momentum=self.batch_norm_alpha, affine=True, eps=1e-5, ), ) model.add_module("nonlin" + suffix, Expression(self.later_nonlin)) model.add_module( "pool" + suffix, later_pool_class( kernel_size=(self.pool_time_length, 1), stride=(pool_stride, 1), ), ) model.add_module("pool_nonlin" + suffix, Expression(self.later_pool_nonlin))
def get_sleep_classifier(): model = nn.Sequential() model.add_module('permute_1', Expression(MyModel._transpose_shift_and_swap)) model.add_module( 'conv_1', nn.Conv2d(1, global_vars.get('eeg_chans'), kernel_size=(global_vars.get('eeg_chans'), 1))) model.add_module('permute_2', Expression(MyModel._transpose_channels_with_length)) model.add_module('conv_2', nn.Conv2d(1, 8, kernel_size=(1, 64), stride=1)) model.add_module('pool_1', nn.MaxPool2d(kernel_size=(1, 16), stride=(1, 1))) model.add_module('conv_3', nn.Conv2d(8, 8, kernel_size=(1, 64), stride=1)) model.add_module('pool_2', nn.MaxPool2d(kernel_size=(1, 16), stride=(1, 1))) model.add_module('flatten', Flatten()) model.add_module('dropout', nn.Dropout(p=0.5)) input_shape = (2, global_vars.get('eeg_chans'), global_vars.get('input_time_len'), 1) out = model.forward(np_to_var(np.ones(input_shape, dtype=np.float32))) dim = 1 for muldim in out.shape[1:]: dim *= muldim model.add_module( 'dense', nn.Linear(in_features=dim, out_features=global_vars.get('n_classes'))) model.add_module('softmax', nn.Softmax()) return model
def create_network(self): pool_class = dict(max=nn.MaxPool2d, mean=nn.AvgPool2d)[self.pool_mode] model = nn.Sequential() # b c 0 1 # now to b 1 0 c model.add_module('dimshuffle', Expression(_transpose_to_b_1_c_0)) model.add_module('conv_temporal', nn.Conv2d( 1, self.F1, (1, self.kernel_length), stride=1, bias=False, padding=(0, self.kernel_length // 2,))) model.add_module('bnorm_temporal', nn.BatchNorm2d( self.F1, momentum=0.01, affine=True, eps=1e-3), ) model.add_module('conv_spatial', Conv2dWithConstraint( self.F1, self.F1 * self.D, (self.in_chans, 1), max_norm=1, stride=1, bias=False, groups=self.F1, padding=(0, 0))) model.add_module('bnorm_1', nn.BatchNorm2d( self.F1 * self.D, momentum=0.01, affine=True, eps=1e-3), ) model.add_module('elu_1', Expression(self.conv_nonlin)) model.add_module('pool_1', pool_class( kernel_size=(1, 4), stride=(1, 4))) model.add_module('drop_1', nn.Dropout(p=self.drop_prob)) # https://discuss.pytorch.org/t/how-to-modify-a-conv2d-to-depthwise-separable-convolution/15843/7 model.add_module('conv_separable_depth', nn.Conv2d( self.F1 * self.D, self.F1 * self.D, (1, 16), stride=1, bias=False, groups=self.F1 * self.D, padding=(0, 16 // 2))) model.add_module('conv_separable_point', nn.Conv2d( self.F1 * self.D, self.F2, (1, 1), stride=1, bias=False, padding=(0, 0))) model.add_module('bnorm_2', nn.BatchNorm2d( self.F2, momentum=0.01, affine=True, eps=1e-3), ) model.add_module('elu_2', Expression(self.conv_nonlin)) model.add_module('pool_2', pool_class( kernel_size=(1, 8), stride=(1, 8))) model.add_module('drop_2', nn.Dropout(p=self.drop_prob)) out = model(np_to_var(np.ones( (1, self.in_chans, self.input_time_length, 1), dtype=np.float32))) n_out_virtual_chans = out.cpu().data.numpy().shape[2] if self.final_conv_length == 'auto': n_out_time = out.cpu().data.numpy().shape[3] self.final_conv_length = n_out_time model.add_module('conv_classifier', nn.Conv2d( self.F2, self.n_classes, (n_out_virtual_chans, self.final_conv_length,), bias=True)) model.add_module('softmax', nn.LogSoftmax()) # Transpose back to the the logic of braindecode, # so time in third dimension (axis=2) model.add_module('permute_back', Expression(_transpose_1_0)) model.add_module('squeeze', Expression(_squeeze_final_output)) glorot_weight_zero_bias(model) return model
def add_conv_pool_block(model, n_filters_before, n_filters, filter_length, block_nr): suffix = '_{:d}'.format(block_nr) model.add_module('drop' + suffix, nn.Dropout(p=self.drop_prob)) model.add_module( 'conv' + suffix.format(block_nr), nn.Conv2d(n_filters_before, n_filters, (filter_length, 1), stride=(conv_stride, 1), bias=not self.batch_norm)) if self.batch_norm: model.add_module( 'bnorm' + suffix, nn.BatchNorm2d(n_filters, momentum=self.batch_norm_alpha, affine=True, eps=1e-5)) model.add_module('nonlin' + suffix, Expression(self.later_nonlin)) model.add_module( 'pool' + suffix, later_pool_class(kernel_size=(self.pool_time_length, 1), stride=(pool_stride, 1))) model.add_module('pool_nonlin' + suffix, Expression(self.later_pool_nonlin))
def add_conv_pool_block( self, n_filters_before, n_filters, filter_length, conv_stride, pool_stride, later_pool_class, ): return nn.Sequential( nn.Dropout(p=self.drop_prob), nn.Conv2d( n_filters_before, n_filters, (filter_length, 1), stride=(conv_stride, 1), bias=not self.batch_norm, ), nn.BatchNorm2d( n_filters, momentum=self.batch_norm_alpha, affine=True, eps=1e-5 ), Expression(self.later_nonlin), later_pool_class( kernel_size=(self.pool_time_length, 1), stride=(pool_stride, 1) ), Expression(self.later_pool_nonlin), )
def create_two_path_net(in_chans, n_start_filters, later_strides): model = nn.Sequential() model.add_module('start_block', TwoPathStartBlock(in_chans, n_start_filters)) model.add_module( 'conv_3', nn.Conv2d(n_start_filters * 3, n_start_filters * 2, (12, 1), stride=(later_strides, 1))) model.add_module('bnorm_3', nn.BatchNorm2d(n_start_filters * 2)) model.add_module('nonlin_3', Expression(elu)) model.add_module( 'conv_4', nn.Conv2d(n_start_filters * 2, n_start_filters * 2, (12, 1), stride=(later_strides, 1))) model.add_module('bnorm_4', nn.BatchNorm2d(n_start_filters * 2)) model.add_module('nonlin_4', Expression(elu)) model.add_module( 'conv_5', nn.Conv2d(n_start_filters * 2, n_start_filters * 2, (12, 1), stride=(later_strides, 1))) model.add_module('bnorm_5', nn.BatchNorm2d(n_start_filters * 2)) model.add_module('nonlin_5', Expression(elu)) model.add_module('classifier', nn.Conv2d(n_start_filters * 2, 2, (1, 1))) model.add_module('mean_class', Expression(lambda x: th.mean(x, dim=2))) model.add_module('softmax', nn.LogSoftmax()) model.add_module('squeeze', Expression(lambda x: x.squeeze(3))) return model
def to_mean_after_softmax(model): model_meaned = nn.Sequential() for name, module in model.named_children(): model_meaned.add_module(name, module) if name == 'softmax': model_meaned.add_module('mean_outputs', Expression(lambda x: th.mean(x, dim=2))) return model_meaned
def to_linear_plus_minus_net(model): model_sigmoid = nn.Sequential() for name, module in model.named_children(): if name != 'softmax': model_sigmoid.add_module(name, module) model_sigmoid.add_module('to_two_class', Expression(lambda x: th.cat([-x, x], dim=1))) return model_sigmoid
def conv_pool_block(): return nn.Sequential( nn.Conv2d(n_filters // 2, n_filters // 2, (filter_length, 1), stride=(1, 1), padding=(filter_length // 2, 0)), Expression(nonlin), nn.MaxPool2d(kernel_size=(pool_length, 1), stride=(pool_stride, 1), padding=(pool_length // 2, 0)))
def __init__( self, in_chans, n_start_filters, ): super(TwoPathStartBlock, self).__init__() self.conv_1a = nn.Conv2d(in_chans, n_start_filters, (32, 1)) self.bnorm_1a = nn.BatchNorm2d(n_start_filters) self.nonlin_1a = Expression(square) self.pool_1a = nn.AvgPool2d((42, 1), (8, 1)) self.pool_nonlin_1a = Expression(safe_log) self.conv_1b = nn.Conv2d(21, n_start_filters, (32, 1), stride=(4, 1)) self.bnorm_1b = nn.BatchNorm2d(n_start_filters) self.nonlin_1b = Expression(elu) self.conv_2b = nn.Conv2d(n_start_filters, n_start_filters * 2, (12, 1), stride=(2, 1)) self.bnorm_2b = nn.BatchNorm2d(n_start_filters * 2) self.nonlin_2b = Expression(elu)
def create_network(self): feature_model = larger_model( self.n_chans, self.n_time, final_fft=self.final_fft, constant_memory=False ) model = nn.Sequential(feature_model, Expression(lambda x: x[:,:2]), nn.LogSoftmax(dim=1)) if self.add_bnorm: add_bnorm_before_relu(model) model.eval() return model
def __init__(self, n_start_filters, early_bnorm): super(SplitStartBlock, self).__init__() self.conv_1a = nn.Conv2d(1, n_start_filters, (25, 1), padding=(12, 0), stride=(4, 1)) self.conv_1b = nn.Conv2d(1, n_start_filters, (25, 1), padding=(12, 0)) self.early_bnorm = early_bnorm if self.early_bnorm: self.bnorm_1a = nn.BatchNorm2d(n_start_filters, ) self.bnorm_1b = nn.BatchNorm2d(n_start_filters, ) else: self.bnorm_1a = identity self.bnorm_1b = identity self.nonlin_1b = Expression(square) self.pool_1b = nn.AvgPool2d((41, 1), stride=(8, 1), padding=(20, 0)) self.poolnonlin_1b = Expression(safe_log) self.conv_2c = nn.Conv2d(n_start_filters, n_start_filters, (25, 1), padding=(12, 0)) if early_bnorm: self.bnorm_2c = nn.BatchNorm2d(n_start_filters, ) else: self.bnorm_2c = identity self.nonlin_2c = Expression(square) self.pool_2c = nn.AvgPool2d((11, 1), stride=(2, 1), padding=(5, 0)) self.poolnonlin_2c = Expression(safe_log) self.conv_2a = nn.Conv2d(n_start_filters, n_start_filters, (11, 1), stride=(2, 1), padding=(5, 0)) if early_bnorm: self.bnorm_2a = nn.BatchNorm2d(n_start_filters, ) else: self.bnorm_2a = identity
def __init__(self, n_filters_time, filter_time_length, n_filters_spat, pool_time_length, pool_time_stride, conv_nonlin, pool_mode, pool_nonlin, batch_norm, batch_norm_alpha, drop_prob, final_conv_length, **kwargs): super().__init__(**kwargs) self.sequential = Sequential() split_cnn = SplitConv(in_size=self.input_size, middle_size=n_filters_time, out_size=n_filters_spat, time_kernel_size=filter_time_length, input_in_rnn_format=False) self.sequential.add_module('split_cnn', split_cnn) if batch_norm: bn = BatchNorm1d(n_filters_spat) self.sequential.add_module('batch_norm', bn) non_lin = Expression(square) self.sequential.add_module('non_lin_0', non_lin) pool = AvgPool1d(kernel_size=pool_time_length, stride=pool_time_stride) self.sequential.add_module('pool_1', pool) # non_lin = Expression(safe_log) self.sequential.add_module('non_lin_1', non_lin) dropout = Dropout(p=drop_prob) self.sequential.add_module('dropout', dropout) conv = nn.Conv1d(in_channels=n_filters_spat, out_channels=self.output_size, kernel_size=final_conv_length, bias=True) self.sequential.add_module('conv', conv)
def create_network(self): model = ShallowFBCSPNet.create_network(self) model = model[:-2] def final_output(x): return th.squeeze(x) model.add_module("squeeze", Expression(final_output)) init.xavier_uniform_(model.conv_time.weight, gain=1) # maybe no bias in case of no split layer and batch norm if self.split_first_layer or (not self.batch_norm): init.constant_(model.conv_time.bias, 0) if self.split_first_layer: init.xavier_uniform_(model.conv_spat.weight, gain=1) if not self.batch_norm: init.constant_(model.conv_spat.bias, 0) if self.batch_norm: init.constant_(model.bnorm.weight, 1) init.constant_(model.bnorm.bias, 0) init.xavier_uniform_(model.conv_classifier.weight, gain=1) init.constant_(model.conv_classifier.bias, 0) return model
def run_exp( data_folders, n_recordings, sensor_types, n_chans, max_recording_mins, sec_to_cut, duration_recording_mins, test_recording_mins, max_abs_val, sampling_freq, divisor, test_on_eval, n_folds, i_test_fold, shuffle, model_name, n_start_chans, n_chan_factor, input_time_length, final_conv_length, model_constraint, init_lr, batch_size, max_epochs, cuda, ): import torch.backends.cudnn as cudnn cudnn.benchmark = True preproc_functions = [] preproc_functions.append(lambda data, fs: ( data[:, int(sec_to_cut * fs):-int(sec_to_cut * fs)], fs)) preproc_functions.append(lambda data, fs: (data[:, :int( duration_recording_mins * 60 * fs)], fs)) if max_abs_val is not None: preproc_functions.append( lambda data, fs: (np.clip(data, -max_abs_val, max_abs_val), fs)) preproc_functions.append(lambda data, fs: (resampy.resample( data, fs, sampling_freq, axis=1, filter='kaiser_fast'), sampling_freq)) if divisor is not None: preproc_functions.append(lambda data, fs: (data / divisor, fs)) dataset = DiagnosisSet(n_recordings=n_recordings, max_recording_mins=max_recording_mins, preproc_functions=preproc_functions, data_folders=data_folders, train_or_eval='train', sensor_types=sensor_types) if test_on_eval: if test_recording_mins is None: test_recording_mins = duration_recording_mins test_preproc_functions = copy(preproc_functions) test_preproc_functions[1] = lambda data, fs: (data[:, :int( test_recording_mins * 60 * fs)], fs) test_dataset = DiagnosisSet(n_recordings=n_recordings, max_recording_mins=None, preproc_functions=test_preproc_functions, data_folders=data_folders, train_or_eval='eval', sensor_types=sensor_types) X, y = dataset.load() max_shape = np.max([list(x.shape) for x in X], axis=0) assert max_shape[1] == int(duration_recording_mins * sampling_freq * 60) if test_on_eval: test_X, test_y = test_dataset.load() max_shape = np.max([list(x.shape) for x in test_X], axis=0) assert max_shape[1] == int(test_recording_mins * sampling_freq * 60) if not test_on_eval: splitter = TrainValidTestSplitter(n_folds, i_test_fold, shuffle=shuffle) train_set, valid_set, test_set = splitter.split(X, y) else: splitter = TrainValidSplitter(n_folds, i_valid_fold=i_test_fold, shuffle=shuffle) train_set, valid_set = splitter.split(X, y) test_set = SignalAndTarget(test_X, test_y) del test_X, test_y del X, y # shouldn't be necessary, but just to make sure set_random_seeds(seed=20170629, cuda=cuda) n_classes = 2 if model_name == 'shallow': model = ShallowFBCSPNet( in_chans=n_chans, n_classes=n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, final_conv_length=final_conv_length).create_network() elif model_name == 'deep': model = Deep4Net(n_chans, n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, n_filters_2=int(n_start_chans * n_chan_factor), n_filters_3=int(n_start_chans * (n_chan_factor**2.0)), n_filters_4=int(n_start_chans * (n_chan_factor**3.0)), final_conv_length=final_conv_length, stride_before_pool=True).create_network() elif (model_name == 'deep_smac'): if model_name == 'deep_smac': do_batch_norm = False else: assert model_name == 'deep_smac_bnorm' do_batch_norm = True double_time_convs = False drop_prob = 0.244445 filter_length_2 = 12 filter_length_3 = 14 filter_length_4 = 12 filter_time_length = 21 final_conv_length = 1 first_nonlin = elu first_pool_mode = 'mean' first_pool_nonlin = identity later_nonlin = elu later_pool_mode = 'mean' later_pool_nonlin = identity n_filters_factor = 1.679066 n_filters_start = 32 pool_time_length = 1 pool_time_stride = 2 split_first_layer = True n_chan_factor = n_filters_factor n_start_chans = n_filters_start model = Deep4Net(n_chans, n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, n_filters_2=int(n_start_chans * n_chan_factor), n_filters_3=int(n_start_chans * (n_chan_factor**2.0)), n_filters_4=int(n_start_chans * (n_chan_factor**3.0)), final_conv_length=final_conv_length, batch_norm=do_batch_norm, double_time_convs=double_time_convs, drop_prob=drop_prob, filter_length_2=filter_length_2, filter_length_3=filter_length_3, filter_length_4=filter_length_4, filter_time_length=filter_time_length, first_nonlin=first_nonlin, first_pool_mode=first_pool_mode, first_pool_nonlin=first_pool_nonlin, later_nonlin=later_nonlin, later_pool_mode=later_pool_mode, later_pool_nonlin=later_pool_nonlin, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, split_first_layer=split_first_layer, stride_before_pool=True).create_network() elif model_name == 'shallow_smac': conv_nonlin = identity do_batch_norm = True drop_prob = 0.328794 filter_time_length = 56 final_conv_length = 22 n_filters_spat = 73 n_filters_time = 24 pool_mode = 'max' pool_nonlin = identity pool_time_length = 84 pool_time_stride = 3 split_first_layer = True model = ShallowFBCSPNet( in_chans=n_chans, n_classes=n_classes, n_filters_time=n_filters_time, n_filters_spat=n_filters_spat, input_time_length=input_time_length, final_conv_length=final_conv_length, conv_nonlin=conv_nonlin, batch_norm=do_batch_norm, drop_prob=drop_prob, filter_time_length=filter_time_length, pool_mode=pool_mode, pool_nonlin=pool_nonlin, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, split_first_layer=split_first_layer, ).create_network() elif model_name == 'linear': model = nn.Sequential() model.add_module("conv_classifier", nn.Conv2d(n_chans, n_classes, (600, 1))) model.add_module('softmax', nn.LogSoftmax()) model.add_module('squeeze', Expression(lambda x: x.squeeze(3))) else: assert False, "unknown model name {:s}".format(model_name) to_dense_prediction_model(model) log.info("Model:\n{:s}".format(str(model))) if cuda: model.cuda() # determine output size test_input = np_to_var( np.ones((2, n_chans, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() log.info("In shape: {:s}".format(str(test_input.cpu().data.numpy().shape))) out = model(test_input) log.info("Out shape: {:s}".format(str(out.cpu().data.numpy().shape))) n_preds_per_input = out.cpu().data.numpy().shape[2] log.info("{:d} predictions per input/trial".format(n_preds_per_input)) iterator = CropsFromTrialsIterator(batch_size=batch_size, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) optimizer = optim.Adam(model.parameters(), lr=init_lr) loss_function = lambda preds, targets: F.nll_loss( th.mean(preds, dim=2, keepdim=False), targets) if model_constraint is not None: assert model_constraint == 'defaultnorm' model_constraint = MaxNormDefaultConstraint() monitors = [ LossMonitor(), MisclassMonitor(col_suffix='sample_misclass'), CroppedDiagnosisMonitor(input_time_length, n_preds_per_input), RuntimeMonitor(), ] stop_criterion = MaxEpochs(max_epochs) batch_modifier = None run_after_early_stop = True exp = Experiment(model, train_set, valid_set, test_set, iterator, loss_function, optimizer, model_constraint, monitors, stop_criterion, remember_best_column='valid_misclass', run_after_early_stop=run_after_early_stop, batch_modifier=batch_modifier, cuda=cuda) exp.run() return exp
def create_network(self): if self.stride_before_pool: conv_stride = self.pool_time_stride pool_stride = 1 else: conv_stride = 1 pool_stride = self.pool_time_stride pool_class_dict = dict(max=nn.MaxPool2d, mean=AvgPool2dWithConv) first_pool_class = pool_class_dict[self.first_pool_mode] later_pool_class = pool_class_dict[self.later_pool_mode] model = nn.Sequential() if self.split_first_layer: model.add_module('dimshuffle', Expression(_transpose_time_to_spat)) model.add_module( 'conv_time', nn.Conv2d( 1, self.n_filters_time, (self.filter_time_length, 1), stride=1, )) model.add_module( 'conv_spat', nn.Conv2d(self.n_filters_time, self.n_filters_spat, (1, self.in_chans), stride=(conv_stride, 1), bias=not self.batch_norm)) n_filters_conv = self.n_filters_spat else: model.add_module( 'conv_time', nn.Conv2d(self.in_chans, self.n_filters_time, (self.filter_time_length, 1), stride=(conv_stride, 1), bias=not self.batch_norm)) n_filters_conv = self.n_filters_time if self.batch_norm: model.add_module( 'bnorm', nn.BatchNorm2d(n_filters_conv, momentum=self.batch_norm_alpha, affine=True, eps=1e-5), ) model.add_module('conv_nonlin', Expression(self.first_nonlin)) model.add_module( 'pool', first_pool_class(kernel_size=(self.pool_time_length, 1), stride=(pool_stride, 1))) model.add_module('pool_nonlin', Expression(self.first_pool_nonlin)) def add_conv_pool_block(model, n_filters_before, n_filters, filter_length, block_nr): suffix = '_{:d}'.format(block_nr) model.add_module('drop' + suffix, nn.Dropout(p=self.drop_prob)) model.add_module( 'conv' + suffix.format(block_nr), nn.Conv2d(n_filters_before, n_filters, (filter_length, 1), stride=(conv_stride, 1), bias=not self.batch_norm)) if self.batch_norm: model.add_module( 'bnorm' + suffix, nn.BatchNorm2d(n_filters, momentum=self.batch_norm_alpha, affine=True, eps=1e-5)) model.add_module('nonlin' + suffix, Expression(self.later_nonlin)) model.add_module( 'pool' + suffix, later_pool_class(kernel_size=(self.pool_time_length, 1), stride=(pool_stride, 1))) model.add_module('pool_nonlin' + suffix, Expression(self.later_pool_nonlin)) add_conv_pool_block(model, n_filters_conv, self.n_filters_2, self.filter_length_2, 2) add_conv_pool_block(model, self.n_filters_2, self.n_filters_3, self.filter_length_3, 3) add_conv_pool_block(model, self.n_filters_3, self.n_filters_4, self.filter_length_4, 4) model.eval() if self.final_conv_length == 'auto': out = model( np_to_var( np.ones((1, self.in_chans, self.input_time_length, 1), dtype=np.float32))) n_out_time = out.cpu().data.numpy().shape[2] self.final_conv_length = n_out_time model.add_module( 'conv_classifier', nn.Conv2d(self.n_filters_4, self.n_classes, (self.final_conv_length, 1), bias=True)) model.add_module('softmax', nn.LogSoftmax()) model.add_module('squeeze', Expression(_squeeze_final_output)) # Initialization, xavier is same as in our paper... # was default from lasagne init.xavier_uniform(model.conv_time.weight, gain=1) # maybe no bias in case of no split layer and batch norm if self.split_first_layer or (not self.batch_norm): init.constant(model.conv_time.bias, 0) if self.split_first_layer: init.xavier_uniform(model.conv_spat.weight, gain=1) if not self.batch_norm: init.constant(model.conv_spat.bias, 0) if self.batch_norm: init.constant(model.bnorm.weight, 1) init.constant(model.bnorm.bias, 0) param_dict = dict(list(model.named_parameters())) for block_nr in range(2, 5): conv_weight = param_dict['conv_{:d}.weight'.format(block_nr)] init.xavier_uniform(conv_weight, gain=1) if not self.batch_norm: conv_bias = param_dict['conv_{:d}.bias'.format(block_nr)] init.constant(conv_bias, 0) else: bnorm_weight = param_dict['bnorm_{:d}.weight'.format(block_nr)] bnorm_bias = param_dict['bnorm_{:d}.bias'.format(block_nr)] init.constant(bnorm_weight, 1) init.constant(bnorm_bias, 0) init.xavier_uniform(model.conv_classifier.weight, gain=1) init.constant(model.conv_classifier.bias, 0) # Start in eval mode model.eval() return model
def create_model(in_channels, num_classes, cuda=True): def squeeze_out(x): assert x.size()[1] == num_classes and x.size()[3] == 1 return x.squeeze(3).transpose(1, 2) if cfg.TRAINING.MODEL.lower() == 'rnn': model = RNNs(in_channels=in_channels) elif 'deep4' in cfg.TRAINING.MODEL.lower(): if 'wide' in cfg.TRAINING.MODEL.lower(): pool_length = 4 pool_stride = 4 elif 'narrow' in cfg.TRAINING.MODEL.lower(): pool_length = 2 pool_stride = 2 else: pool_length = 3 pool_stride = 3 model = Deep4Net(in_chans=in_channels, n_classes=num_classes, input_time_length=cfg.TRAINING.CROP_LEN, pool_time_length=pool_length, pool_time_stride=pool_stride, final_conv_length=2, stride_before_pool=True).create_network() # remove softmax new_model = nn.Sequential() for name, module in model.named_children(): if name == 'softmax': # continue break new_model.add_module(name, module) # remove empty final dimension and permute output shape new_model.add_module('squeeze', Expression(squeeze_out)) # if num_classes > 1: # def transpose_class_time(x): # return x.transpose(2, 1) # # new_model.add_module('trans', Expression(transpose_class_time)) model = new_model to_dense_prediction_model(model) elif cfg.TRAINING.MODEL.lower() == 'deep5': # pool_time_length=3 # pool_time_stride=3 model = Deep5Net(in_chans=in_channels, n_classes=num_classes, input_time_length=cfg.TRAINING.CROP_LEN, final_conv_length=2, stride_before_pool=True).create_network() # remove softmax new_model = nn.Sequential() for name, module in model.named_children(): if name == 'softmax': # continue break new_model.add_module(name, module) # remove empty final dimension and permute output shape new_model.add_module('squeeze', Expression(squeeze_out)) # if num_classes > 1: # def transpose_class_time(x): # return x.transpose(2, 1) # # new_model.add_module('trans', Expression(transpose_class_time)) model = new_model to_dense_prediction_model(model) elif cfg.TRAINING.MODEL.lower() == 'shallow': model = Shallow(in_chans=in_channels, n_classes=num_classes, input_time_length=cfg.TRAINING.CROP_LEN, final_conv_length=2).create_network() # remove softmax new_model = nn.Sequential() for name, module in model.named_children(): if name == 'softmax': break new_model.add_module(name, module) # remove empty final dimension and permute output shape new_model.add_module('squeeze', Expression(squeeze_out)) to_dense_prediction_model(model) elif cfg.TRAINING.MODEL.lower() == 'hybrid': model = Hybrid(in_channels=in_channels) elif cfg.TRAINING.MODEL.lower() == 'tcn': raise NotImplementedError else: assert False, f"Unknown Model {cfg.TRAINING.MODEL}" optimizer = optim.Adam(model.parameters(), lr=cfg.OPTIMIZATION.BASE_LR, weight_decay=cfg.OPTIMIZATION.WEIGHT_DECAY) scheduler = CosineAnnealingLR(optimizer, T_max=cfg.TRAINING.MAX_EPOCHS) if cuda: model.cuda() model.eval() metric = lambda targets, predictions: np.corrcoef(targets, predictions)[0, 1] loss_fun = mse_loss logger.info(model) return model, optimizer, scheduler, loss_fun, metric
def create_network(self): if self.stride_before_pool: conv_stride = self.pool_time_stride else: conv_stride = 1 model = nn.Sequential() if self.split_first_layer: model.add_module("dimshuffle", Expression(_transpose_time_to_spat)) model.add_module( "conv_time", nn.Conv2d( 1, self.n_filters_time, (self.filter_time_length, 1), stride=1, ), ) model.add_module( "conv_spat", nn.Conv2d( self.n_filters_time, self.n_filters_spat, (1, self.in_chans), stride=1, bias=not self.batch_norm, ), ) n_filters_conv = self.n_filters_spat n_filters_op = self.n_filters_spat * ( self.input_time_length - 4) # semi-hardcoded at the moment else: model.add_module( "conv_time", nn.Conv2d( self.in_chans, self.n_filters_time, (self.filter_time_length, 1), stride=1, bias=not self.batch_norm, ), ) n_filters_conv = self.n_filters_time n_filters_op = self.n_filters_time * ( self.input_time_length - 4) # semi-hardcoded at the moment if self.batch_norm: model.add_module( "bnorm", nn.BatchNorm2d(n_filters_conv, momentum=self.batch_norm_alpha, affine=True), ) model.add_module("conv_nonlin", Expression(self.conv_nonlin)) model.add_module("drop", nn.Dropout(p=self.drop_prob)) def add_conv_pool_block(model, n_filters_before, n_filters, filter_length, block_nr): model.add_module( f"conv_{block_nr}", nn.Conv2d(n_filters_before, n_filters, (filter_length, 1), stride=(conv_stride, 1), bias=not self.batch_norm)) if self.batch_norm: model.add_module( f"bnorm_{block_nr}", nn.BatchNorm2d(n_filters, momentum=self.batch_norm_alpha, affine=True, eps=1e-5)) model.add_module(f"nonlin_{block_nr}", Expression(self.conv_nonlin)) model.add_module(f"drop_{block_nr}", nn.Dropout(p=self.drop_prob)) if self.structure == "deep": add_conv_pool_block(model, n_filters_conv, self.n_filters_2, self.filter_length_2, 2) n_filters_op = self.n_filters_2 * ( self.input_time_length - 23) # semi-hardcoded at the moment model.add_module('reshape', Expression(reshape_tensor)) model.add_module( 'fc_1', nn.Linear(n_filters_op, self.fc1_out_features, bias=True)) # Initialization is xavier for initial layers init.xavier_uniform_(model.conv_time.weight, gain=1) # maybe no bias in case of no split layer and batch norm if self.split_first_layer or (not self.batch_norm): init.constant_(model.conv_time.bias, 0) if self.split_first_layer: init.xavier_uniform_(model.conv_spat.weight, gain=1) if not self.batch_norm: init.constant_(model.conv_spat.bias, 0) if self.batch_norm: init.constant_(model.bnorm.weight, 1) init.constant_(model.bnorm.bias, 0) param_dict = dict(list(model.named_parameters())) if self.structure == "deep": conv_weight = param_dict['conv_2.weight'] init.kaiming_normal_(conv_weight) # He initialization if not self.batch_norm: conv_bias = param_dict['conv_2.bias'] init.constant_(conv_bias, 0) else: bnorm_weight = param_dict['bnorm_2.weight'] bnorm_bias = param_dict['bnorm_2.bias'] init.constant_(bnorm_weight, 1) init.constant_(bnorm_bias, 0) fc_weight = param_dict['fc_1.weight'] init.kaiming_uniform_(fc_weight) # model.eval() return model
def __init__(self, n_classes, in_chans_1, input_time_1, SubNet_1_params, in_chans_2, input_time_2, SubNet_2_params, linear_dims, drop_prob, nonlin, fc1_out_features, fc2_out_features, gru_hidden_size, gru_n_layers=1): """ BiModal CNN network receiving 2 different data types corresponding to a single ground truth (e.g. EEG and fNIRS) Two SubNets are initialised and the forward pass of both is performed before their outputs are fed into the remainder of the network to be fused and applied to GRU and linear layers before log softmax classification. Parameters :param: n_classes (int) number of classes in classification task :param: in_chans_1 (int) number of channels in data :param: input_time_1 (int) number of time samples in data :param: SubNet_1_params (dict) parameters for initiating subnet 1 :param: in_chans_2 (int) number of channels in data :param: input_time_2 (int) number of time samples in data :param: SubNet_2_params (dict) parameters for initiating subnet 2 :param: linear_dims (int) dimension of linear layer :param: drop_prob (float) dropout probability :param: nonlin (th.nn.functional) activation function :param: fc1_out_features (int) output dimension of subnet 1 linear layer :param: fc2_out_features (int) output dimension of subnet 2 linear layer :param: gru_hidden_size (int) size of GRU hidden layer :param: gru_n_layers (int) number of GRU hidden layers """ self.n_classes = n_classes self.in_chans_1 = in_chans_1 self.input_time_1 = input_time_1 for key in SubNet_1_params: setattr(self, f"SN1_{key}", SubNet_1_params[key]) self.in_chans_2 = in_chans_2 self.input_time_2 = input_time_2 for key in SubNet_2_params: setattr(self, f"SN2_{key}", SubNet_2_params[key]) self.linear_dims = linear_dims self.drop_prob = drop_prob self.fc1_out_features = fc1_out_features self.fc2_out_features = fc2_out_features self.fused_dimension = fc1_out_features + fc2_out_features self.gru_hidden_size = gru_hidden_size self.gru_n_layers = gru_n_layers super(BiModalNet, self).__init__() model = nn.Sequential() self.subnet_1 = SubNet( in_chans=self.in_chans_1, n_classes=self.n_classes, input_time_length=self.input_time_1, n_filters_time=self.SN1_n_filters_time, filter_time_length=self.SN1_filter_time_length, n_filters_spat=self.SN1_n_filters_spat, n_filters_2=self.SN1_n_filters_2, filter_length_2=self.SN1_filter_length_2, pool_time_length=self.SN1_pool_time_length, pool_time_stride=self.SN1_pool_time_stride, final_conv_length='auto', conv_nonlin=self.SN1_conv_nonlin, pool_mode=self.SN1_pool_mode, pool_nonlin=self.SN1_pool_nonlin, split_first_layer=self.SN1_split_first_layer, batch_norm=self.SN1_batch_norm, batch_norm_alpha=self.SN1_batch_norm_alpha, drop_prob=self.SN1_drop_prob, structure=self.SN1_structure, fc1_out_features=self.fc1_out_features).create_network() self.subnet_2 = SubNet( in_chans=self.in_chans_2, n_classes=self.n_classes, input_time_length=self.input_time_2, n_filters_time=self.SN2_n_filters_time, filter_time_length=self.SN2_filter_time_length, n_filters_spat=self.SN2_n_filters_spat, n_filters_2=self.SN2_n_filters_2, filter_length_2=self.SN2_filter_length_2, pool_time_length=self.SN2_pool_time_length, pool_time_stride=self.SN2_pool_time_stride, final_conv_length='auto', conv_nonlin=self.SN2_conv_nonlin, pool_mode=self.SN2_pool_mode, pool_nonlin=self.SN2_pool_nonlin, split_first_layer=self.SN2_split_first_layer, batch_norm=self.SN2_batch_norm, batch_norm_alpha=self.SN2_batch_norm_alpha, drop_prob=self.SN2_drop_prob, structure=self.SN2_structure, fc2_out_features=self.fc2_out_features).create_network() self.reshape_tensor = reshape_4_lstm # works for GRU also self.gru = nn.GRU(input_size=self.fused_dimension, hidden_size=self.gru_hidden_size, num_layers=self.gru_n_layers, batch_first=True) self.nonlin = nonlin self.fused_dp = nn.Dropout(p=self.drop_prob) self.fused_linear = nn.Linear(self.gru_hidden_size, self.n_classes, bias=True) self.softmax = nn.LogSoftmax(dim=1) self.size = Expression( tensor_size ) # useful for debugging tensor/kernel dimension mismatches
def create_network(self): print('creating ResNet!') model = nn.Sequential() if self.split_first_layer: model.add_module('dimshuffle', Expression(_transpose_time_to_spat)) model.add_module( 'conv_time', nn.Conv2d(1, self.n_first_filters, (self.first_filter_length, 1), stride=1, padding=(self.first_filter_length // 2, 0))) model.add_module( 'conv_spat', nn.Conv2d(self.n_first_filters, self.n_first_filters, (1, self.in_chans), stride=(1, 1), bias=False)) else: model.add_module( 'conv_time', nn.Conv2d( self.in_chans, self.n_first_filters, (self.first_filter_length, 1), stride=(1, 1), padding=(self.first_filter_length // 2, 0), bias=False, )) n_filters_conv = self.n_first_filters model.add_module( 'bnorm', nn.BatchNorm2d(n_filters_conv, momentum=self.batch_norm_alpha, affine=True, eps=1e-5), ) model.add_module('conv_nonlin', Expression(self.nonlinearity)) cur_dilation = np.array([1, 1]) n_cur_filters = n_filters_conv i_block = 1 for i_layer in range(self.n_layers_per_block): model.add_module( 'res_{:d}_{:d}'.format(i_block, i_layer), ResidualBlock(n_cur_filters, n_cur_filters, dilation=cur_dilation)) i_block += 1 cur_dilation[0] *= 2 n_out_filters = int(2 * n_cur_filters) model.add_module( 'res_{:d}_{:d}'.format(i_block, 0), ResidualBlock( n_cur_filters, n_out_filters, dilation=cur_dilation, )) n_cur_filters = n_out_filters for i_layer in range(1, self.n_layers_per_block): model.add_module( 'res_{:d}_{:d}'.format(i_block, i_layer), ResidualBlock(n_cur_filters, n_cur_filters, dilation=cur_dilation)) i_block += 1 cur_dilation[0] *= 2 n_out_filters = int(1.5 * n_cur_filters) model.add_module( 'res_{:d}_{:d}'.format(i_block, 0), ResidualBlock( n_cur_filters, n_out_filters, dilation=cur_dilation, )) n_cur_filters = n_out_filters for i_layer in range(1, self.n_layers_per_block): model.add_module( 'res_{:d}_{:d}'.format(i_block, i_layer), ResidualBlock(n_cur_filters, n_cur_filters, dilation=cur_dilation)) i_block += 1 cur_dilation[0] *= 2 model.add_module( 'res_{:d}_{:d}'.format(i_block, 0), ResidualBlock( n_cur_filters, n_cur_filters, dilation=cur_dilation, )) for i_layer in range(1, self.n_layers_per_block): model.add_module( 'res_{:d}_{:d}'.format(i_block, i_layer), ResidualBlock(n_cur_filters, n_cur_filters, dilation=cur_dilation)) i_block += 1 cur_dilation[0] *= 2 model.add_module( 'res_{:d}_{:d}'.format(i_block, 0), ResidualBlock( n_cur_filters, n_cur_filters, dilation=cur_dilation, )) for i_layer in range(1, self.n_layers_per_block): model.add_module( 'res_{:d}_{:d}'.format(i_block, i_layer), ResidualBlock(n_cur_filters, n_cur_filters, dilation=cur_dilation)) i_block += 1 cur_dilation[0] *= 2 model.add_module( 'res_{:d}_{:d}'.format(i_block, 0), ResidualBlock( n_cur_filters, n_cur_filters, dilation=cur_dilation, )) for i_layer in range(1, self.n_layers_per_block): model.add_module( 'res_{:d}_{:d}'.format(i_block, i_layer), ResidualBlock(n_cur_filters, n_cur_filters, dilation=cur_dilation)) i_block += 1 cur_dilation[0] *= 2 model.add_module( 'res_{:d}_{:d}'.format(i_block, 0), ResidualBlock( n_cur_filters, n_cur_filters, dilation=cur_dilation, )) model.eval() if self.final_pool_length == 'auto': print('Final Pool length is auto!') out = model( np_to_var( np.ones((1, self.in_chans, self.input_time_length, 1), dtype=np.float32))) n_out_time = out.cpu().data.numpy().shape[2] self.final_pool_length = n_out_time # model.add_module('mean_pool', AvgPool2dWithConv( # (self.final_pool_length, 1), (1,1), dilation=(int(cur_dilation[0]), # int(cur_dilation[1])))) # model.add_module('conv_classifier', # nn.Conv2d(n_cur_filters, self.n_classes, # (1, 1), bias=True)) # start added code martin model.add_module( 'conv_classifier', nn.Conv2d(n_cur_filters, self.n_classes, (self.final_pool_length, 1), bias=True)) #end added code martin model.add_module('softmax', nn.LogSoftmax()) model.add_module('squeeze', Expression(_squeeze_final_output)) # Initialize all weights model.apply( lambda module: weights_init(module, self.conv_weight_init_fn)) # Start in eval mode model.eval() return model
input_time_length=input_time_length).create_network() # remove softmax new_model = nn.Sequential() for name, module in model.named_children(): if name == 'softmax': continue new_model.add_module(name, module) # lets remove empty final dimension def squeeze_out(x): # Remove single "class" dimension assert x.size()[1] == 1 return x[:, 0] new_model.add_module('squeeze_again', Expression(squeeze_out)) model = new_model if cuda: model.cuda() if not ResNet: to_dense_prediction_model(model) start_param_values = deepcopy(new_model.state_dict()) # %% setup optimizer -> new for each x-val fold from torch import optim # %% # determine output size from braindecode.torch_ext.util import np_to_var
def create_network(self): pool_class = dict(max=nn.MaxPool2d, mean=nn.AvgPool2d)[self.pool_mode] if self.split_first_layer: self.add_module("dimshuffle", Expression(_transpose_time_to_spat)) self.add_module( "conv_time", nn.Conv2d( 1, self.n_filters_time, (self.filter_time_length, 1), stride=1, ), ) self.add_module( "conv_spat", nn.Conv2d( self.n_filters_time, self.n_filters_spat, (1, self.in_chans), stride=1, bias=not self.batch_norm, ), ) n_filters_conv = self.n_filters_spat else: self.add_module( "conv_time", nn.Conv2d( self.in_chans, self.n_filters_time, (self.filter_time_length, 1), stride=1, bias=not self.batch_norm, ), ) n_filters_conv = self.n_filters_time if self.batch_norm: self.add_module( "bnorm", nn.BatchNorm2d( n_filters_conv, momentum=self.batch_norm_alpha, affine=True ), ) self.add_module("conv_nonlin", Expression(self.conv_nonlin_func)) self.add_module( "pool", pool_class( kernel_size=(self.pool_time_length, 1), stride=(self.pool_time_stride, 1), ), ) self.add_module("pool_nonlin", Expression(self.pool_nonlin_func)) self.add_module("drop", nn.Dropout(p=self.drop_prob)) if self.final_conv_length == "auto": out = self( np_to_var( np.ones( (1, self.in_chans, self.input_time_length, 1), dtype=np.float32 ) ) ) n_out_time = out.cpu().data.numpy().shape[2] self.final_conv_length = n_out_time self.add_module( "conv_classifier", nn.Conv2d( n_filters_conv, self.n_classes, (self.final_conv_length, 1), bias=True ), ) self.add_module("softmax", nn.LogSoftmax(dim=1)) self.add_module("squeeze", Expression(_squeeze_final_output)) # Initialization, xavier is same as in paper... init.xavier_uniform_(self.conv_time.weight, gain=1) # maybe no bias in case of no split layer and batch norm if self.split_first_layer or (not self.batch_norm): init.constant_(self.conv_time.bias, 0) if self.split_first_layer: init.xavier_uniform_(self.conv_spat.weight, gain=1) if not self.batch_norm: init.constant_(self.conv_spat.bias, 0) if self.batch_norm: init.constant_(self.bnorm.weight, 1) init.constant_(self.bnorm.bias, 0) init.xavier_uniform_(self.conv_classifier.weight, gain=1) init.constant_(self.conv_classifier.bias, 0)
def run_exp(test_on_eval, sensor_types, n_chans, max_recording_mins, test_recording_mins, n_recordings, sec_to_cut_at_start, sec_to_cut_at_end, duration_recording_mins, max_abs_val, clip_before_resample, sampling_freq, divisor, n_folds, i_test_fold, shuffle, merge_train_valid, model_name, n_start_chans, n_chan_factor, input_time_length, final_conv_length, stride_before_pool, optimizer, learning_rate, weight_decay, scheduler, model_constraint, batch_size, max_epochs, log_dir, only_return_exp, np_th_seed): cuda = True if ('smac' in model_name) and (input_time_length is None): input_time_length = 12000 fix_input_length_for_smac = True else: fix_input_length_for_smac = False set_random_seeds(seed=np_th_seed, cuda=cuda) n_classes = 2 if model_name == 'shallow': model = ShallowFBCSPNet( in_chans=n_chans, n_classes=n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, final_conv_length=final_conv_length).create_network() elif model_name == 'deep': model = Deep4Net( n_chans, n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, n_filters_2=int(n_start_chans * n_chan_factor), n_filters_3=int(n_start_chans * (n_chan_factor**2.0)), n_filters_4=int(n_start_chans * (n_chan_factor**3.0)), final_conv_length=final_conv_length, stride_before_pool=stride_before_pool).create_network() elif (model_name == 'deep_smac') or (model_name == 'deep_smac_bnorm'): if model_name == 'deep_smac': do_batch_norm = False else: assert model_name == 'deep_smac_bnorm' do_batch_norm = True double_time_convs = False drop_prob = 0.244445 filter_length_2 = 12 filter_length_3 = 14 filter_length_4 = 12 filter_time_length = 21 final_conv_length = 1 first_nonlin = elu first_pool_mode = 'mean' first_pool_nonlin = identity later_nonlin = elu later_pool_mode = 'mean' later_pool_nonlin = identity n_filters_factor = 1.679066 n_filters_start = 32 pool_time_length = 1 pool_time_stride = 2 split_first_layer = True n_chan_factor = n_filters_factor n_start_chans = n_filters_start model = Deep4Net(n_chans, n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, n_filters_2=int(n_start_chans * n_chan_factor), n_filters_3=int(n_start_chans * (n_chan_factor**2.0)), n_filters_4=int(n_start_chans * (n_chan_factor**3.0)), final_conv_length=final_conv_length, batch_norm=do_batch_norm, double_time_convs=double_time_convs, drop_prob=drop_prob, filter_length_2=filter_length_2, filter_length_3=filter_length_3, filter_length_4=filter_length_4, filter_time_length=filter_time_length, first_nonlin=first_nonlin, first_pool_mode=first_pool_mode, first_pool_nonlin=first_pool_nonlin, later_nonlin=later_nonlin, later_pool_mode=later_pool_mode, later_pool_nonlin=later_pool_nonlin, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, split_first_layer=split_first_layer, stride_before_pool=True).create_network() elif model_name == 'shallow_smac': conv_nonlin = identity do_batch_norm = True drop_prob = 0.328794 filter_time_length = 56 final_conv_length = 22 n_filters_spat = 73 n_filters_time = 24 pool_mode = 'max' pool_nonlin = identity pool_time_length = 84 pool_time_stride = 3 split_first_layer = True model = ShallowFBCSPNet( in_chans=n_chans, n_classes=n_classes, n_filters_time=n_filters_time, n_filters_spat=n_filters_spat, input_time_length=input_time_length, final_conv_length=final_conv_length, conv_nonlin=conv_nonlin, batch_norm=do_batch_norm, drop_prob=drop_prob, filter_time_length=filter_time_length, pool_mode=pool_mode, pool_nonlin=pool_nonlin, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, split_first_layer=split_first_layer, ).create_network() elif model_name == 'deep_smac_new': from torch.nn.functional import elu, relu, relu6, tanh from braindecode.torch_ext.functions import identity, square, safe_log n_filters_factor = 1.9532637176784269 n_filters_start = 61 deep_kwargs = { "batch_norm": False, "double_time_convs": False, "drop_prob": 0.3622676569047184, "filter_length_2": 9, "filter_length_3": 6, "filter_length_4": 10, "filter_time_length": 17, "final_conv_length": 5, "first_nonlin": elu, "first_pool_mode": "max", "first_pool_nonlin": identity, "later_nonlin": relu6, "later_pool_mode": "max", "later_pool_nonlin": identity, "n_filters_time": n_filters_start, "n_filters_spat": n_filters_start, "n_filters_2": int(n_filters_start * n_filters_factor), "n_filters_3": int(n_filters_start * (n_filters_factor**2.0)), "n_filters_4": int(n_filters_start * (n_filters_factor**3.0)), "pool_time_length": 1, "pool_time_stride": 4, "split_first_layer": True, "stride_before_pool": True, } model = Deep4Net(n_chans, n_classes, input_time_length=input_time_length, **deep_kwargs).create_network() elif model_name == 'shallow_smac_new': from torch.nn.functional import elu, relu, relu6, tanh from braindecode.torch_ext.functions import identity, square, safe_log shallow_kwargs = { "conv_nonlin": square, "batch_norm": True, "drop_prob": 0.10198630723385381, "filter_time_length": 51, "final_conv_length": 1, "n_filters_spat": 200, "n_filters_time": 76, "pool_mode": "max", "pool_nonlin": safe_log, "pool_time_length": 139, "pool_time_stride": 49, "split_first_layer": True, } model = ShallowFBCSPNet(in_chans=n_chans, n_classes=n_classes, input_time_length=input_time_length, **shallow_kwargs).create_network() elif model_name == 'linear': model = nn.Sequential() model.add_module("conv_classifier", nn.Conv2d(n_chans, n_classes, (600, 1))) model.add_module('softmax', nn.LogSoftmax()) model.add_module('squeeze', Expression(lambda x: x.squeeze(3))) elif model_name == '3path': virtual_chan_1x1_conv = True mean_across_features = False drop_prob = 0.5 n_start_filters = 10 early_bnorm = False n_classifier_filters = 100 later_kernel_len = 5 extra_conv_stride = 4 # dont forget to reset n_preds_per_blabla model = create_multi_start_path_net( in_chans=n_chans, virtual_chan_1x1_conv=virtual_chan_1x1_conv, n_start_filters=n_start_filters, early_bnorm=early_bnorm, later_kernel_len=later_kernel_len, extra_conv_stride=extra_conv_stride, mean_across_features=mean_across_features, n_classifier_filters=n_classifier_filters, drop_prob=drop_prob) else: assert False, "unknown model name {:s}".format(model_name) if not model_name == '3path': to_dense_prediction_model(model) log.info("Model:\n{:s}".format(str(model))) time_cut_off_sec = np.inf start_time = time.time() # fix input time length in case of smac models if fix_input_length_for_smac: assert ('smac' in model_name) and (input_time_length == 12000) if cuda: model.cuda() test_input = np_to_var( np.ones((2, n_chans, input_time_length, 1), dtype=np.float32)) if cuda: test_input = test_input.cuda() try: out = model(test_input) except: raise ValueError("Model receptive field too large...") n_preds_per_input = out.cpu().data.numpy().shape[2] n_receptive_field = input_time_length - n_preds_per_input input_time_length = 2 * n_receptive_field exp = common.run_exp( max_recording_mins, n_recordings, sec_to_cut_at_start, sec_to_cut_at_end, duration_recording_mins, max_abs_val, clip_before_resample, sampling_freq, divisor, n_folds, i_test_fold, shuffle, merge_train_valid, model, input_time_length, optimizer, learning_rate, weight_decay, scheduler, model_constraint, batch_size, max_epochs, only_return_exp, time_cut_off_sec, start_time, test_on_eval, test_recording_mins, sensor_types, log_dir, np_th_seed, ) return exp
def __init__(self, in_chans, n_classes, input_time_length=None, n_filters_time=40, filter_time_length=25, n_filters_spat=40, pool_time_length=75, pool_time_stride=15, final_conv_length=30, conv_nonlin=square, pool_mode='mean', pool_nonlin=safe_log, split_first_layer=True, batch_norm=True, batch_norm_alpha=0.1, drop_prob=0.5, siamese=False, i_feature_alignment_layer=None): super(ShallowConvNet, self).__init__() if i_feature_alignment_layer is None: i_feature_alignment_layer = 1 # default alignment layer if final_conv_length == 'auto': assert input_time_length is not None self.__dict__.update(locals()) del self.self pool_class = dict(max=nn.MaxPool2d, mean=nn.AvgPool2d)[self.pool_mode] n_filters_conv = self.n_filters_spat self.temporal_conv = nn.Sequential( Expression(_transpose_time_to_spat), nn.Conv2d(1, self.n_filters_time, (self.filter_time_length, 1), stride=1)) self.spatial_conv = nn.Sequential( nn.Conv2d(self.n_filters_time, self.n_filters_spat, (1, self.in_chans), stride=1, bias=not self.batch_norm), nn.BatchNorm2d(n_filters_conv, momentum=self.batch_norm_alpha, affine=True), Expression(self.conv_nonlin), pool_class(kernel_size=(self.pool_time_length, 1), stride=(self.pool_time_stride, 1)), Expression(self.pool_nonlin)) if self.final_conv_length == 'auto': out = np_to_var( np.ones((1, self.in_chans, self.input_time_length, 1), dtype=np.float32)) out = self.forward_once(out) n_out_time = out.cpu().data.numpy().shape[2] self.final_conv_length = n_out_time self.conv_cls = nn.Sequential( nn.Dropout(p=self.drop_prob), nn.Conv2d(n_filters_conv, self.n_classes, (self.final_conv_length, 1), bias=True), nn.LogSoftmax(dim=1), Expression(_squeeze_final_output)) # Initialize weights of the network self.apply(glorot_weight_zero_bias) # Set feature space alignment layer, used in siamese training/testing assert 0 <= self.i_feature_alignment_layer < len(self._modules), \ "Given feature space alignment layer does not " \ "exist for current model" self.feature_alignment_layer = \ list(self._modules.items())[self.i_feature_alignment_layer][0]
def create_network(self): pool_class = dict(max=nn.MaxPool2d, mean=nn.AvgPool2d)[self.pool_mode] model = nn.Sequential() n_filters_1 = 16 model.add_module( 'conv_1', nn.Conv2d(self.in_chans, n_filters_1, (1, 1), stride=1, bias=True)) model.add_module( 'bnorm_1', nn.BatchNorm2d(n_filters_1, momentum=0.01, affine=True, eps=1e-3), ) model.add_module('elu_1', Expression(elu)) # transpose to examples x 1 x (virtual, not EEG) channels x time model.add_module('permute_1', Expression(lambda x: x.permute(0, 3, 1, 2))) model.add_module('drop_1', nn.Dropout(p=self.drop_prob)) n_filters_2 = 4 # keras padds unequal padding more in front, so padding # too large should be ok. # Not padding in time so that croped training makes sense # https://stackoverflow.com/questions/43994604/padding-with-even-kernel-size-in-a-convolutional-layer-in-keras-theano model.add_module( 'conv_2', nn.Conv2d(1, n_filters_2, self.second_kernel_size, stride=1, padding=(self.second_kernel_size[0] // 2, 0), bias=True)) model.add_module( 'bnorm_2', nn.BatchNorm2d(n_filters_2, momentum=0.01, affine=True, eps=1e-3), ) model.add_module('elu_2', Expression(elu)) model.add_module('pool_2', pool_class(kernel_size=(2, 4), stride=(2, 4))) model.add_module('drop_2', nn.Dropout(p=self.drop_prob)) n_filters_3 = 4 model.add_module( 'conv_3', nn.Conv2d(n_filters_2, n_filters_3, self.third_kernel_size, stride=1, padding=(self.third_kernel_size[0] // 2, 0), bias=True)) model.add_module( 'bnorm_3', nn.BatchNorm2d(n_filters_3, momentum=0.01, affine=True, eps=1e-3), ) model.add_module('elu_3', Expression(elu)) model.add_module('pool_3', pool_class(kernel_size=(2, 4), stride=(2, 4))) model.add_module('drop_3', nn.Dropout(p=self.drop_prob)) out = model( np_to_var( np.ones((1, self.in_chans, self.input_time_length, 1), dtype=np.float32))) n_out_virtual_chans = out.cpu().data.numpy().shape[2] if self.final_conv_length == 'auto': n_out_time = out.cpu().data.numpy().shape[3] self.final_conv_length = n_out_time model.add_module( 'conv_classifier', nn.Conv2d(n_filters_3, self.n_classes, ( n_out_virtual_chans, self.final_conv_length, ), bias=True)) model.add_module('softmax', nn.LogSoftmax()) # Transpose back to the the logic of braindecode, # so time in third dimension (axis=2) model.add_module('permute_2', Expression(lambda x: x.permute(0, 1, 3, 2))) model.add_module('squeeze', Expression(_squeeze_final_output)) glorot_weight_zero_bias(model) return model
def create_multi_start_path_net(in_chans, virtual_chan_1x1_conv, n_start_filters, early_bnorm, later_kernel_len, extra_conv_stride, mean_across_features, n_classifier_filters, drop_prob): model = nn.Sequential() if virtual_chan_1x1_conv: model.add_module('virtual_chan_filter', nn.Conv2d(in_chans, in_chans, (1, 1))) # maybe problem that there is no batch norm? # for the gradients etc.? model.add_module('dimshuffle', Expression(lambda x: x.permute(0, 3, 2, 1))) model.add_module('start_block', SplitStartBlock(n_start_filters, early_bnorm)) model.add_module('conv_3', nn.Conv2d(n_start_filters * 3, 48, (1, in_chans))) model.add_module('bnorm_3', nn.BatchNorm2d(48)) model.add_module('nonlin_3', Expression(lambda x: sat_elu(x, threshold=7))) if extra_conv_stride is not None: model.add_module( 'conv_3_extra', nn.Conv2d(48, 48, (extra_conv_stride, 1), stride=(extra_conv_stride, 1))) if drop_prob > 0: model.add_module('conv_4_drop', nn.Dropout(p=drop_prob)) model.add_module('conv_4', nn.Conv2d(48, 48, (later_kernel_len, 1))) model.add_module('bnorm_4', nn.BatchNorm2d(48)) model.add_module('nonlin_4', Expression(elu)) if extra_conv_stride is not None: model.add_module( 'conv_4_extra', nn.Conv2d(48, 48, (extra_conv_stride, 1), stride=(extra_conv_stride, 1))) if drop_prob > 0: model.add_module('conv_5_drop', nn.Dropout(p=drop_prob)) model.add_module('conv_5', nn.Conv2d(48, 64, (later_kernel_len, 1))) model.add_module('bnorm_5', nn.BatchNorm2d(64)) model.add_module('nonlin_5', Expression(elu)) if n_classifier_filters is not None: if drop_prob > 0: model.add_module('conv_features_drop', nn.Dropout(p=drop_prob)) model.add_module('conv_features', nn.Conv2d(64, n_classifier_filters, (1, 1))) model.add_module('bnorm_features', nn.BatchNorm2d(n_classifier_filters)) model.add_module('nonlin_features', Expression(elu)) if drop_prob > 0: model.add_module('classifier_drop', nn.Dropout(p=drop_prob)) if mean_across_features: model.add_module('feature_mean', Expression(lambda x: th.mean(x, dim=2, keepdim=True))) n_features_now = n_classifier_filters if n_features_now is None: n_features_now = 64 model.add_module('classifier', nn.Conv2d(n_features_now, 2, (1, 1))) model.add_module('softmax', nn.LogSoftmax()) model.add_module('squeeze', Expression(lambda x: x.squeeze(3))) return model
def runModel(mode): cudnn.benchmark = True start = time.time() #mode = str(sys.argv[1]) #X,y,test_X,test_y = loadSubNormData(mode='all') #X,y,test_X,test_y = loadNEDCdata(mode=mode) #data = np.load('sessionsData/data%s-sessions.npy'%mode[:3]) #labels = np.load('sessionsData/labels%s-sessions.npy'%mode[:3]) data = np.load('data%s.npy' % mode[:3]) labels = np.load('labels%s.npy' % mode[:3]) X, y, test_X, test_y = splitDataRandom_Loaded(data, labels, mode) print('Mode - %s Total n: %d, Test n: %d' % (mode, len(y) + len(test_y), len(test_y))) #return 0 #X = addDataNoise(X,band=[1,4]) #test_X = addDataNoise(test_X,band=[1,4]) max_shape = np.max([list(x.shape) for x in X], axis=0) assert max_shape[1] == int(config.duration_recording_mins * config.sampling_freq * 60) n_classes = 2 n_recordings = None # set to an integer, if you want to restrict the set size sensor_types = ["EEG"] n_chans = 19 #21 max_recording_mins = 35 # exclude larger recordings from training set sec_to_cut = 60 # cut away at start of each recording duration_recording_mins = 5 #20 # how many minutes to use per recording test_recording_mins = 5 #20 max_abs_val = 800 # for clipping sampling_freq = 100 divisor = 10 # divide signal by this test_on_eval = True # teston evaluation set or on training set # in case of test on eval, n_folds and i_testfold determine # validation fold in training set for training until first stop n_folds = 10 i_test_fold = 9 shuffle = True model_name = 'linear' #'deep'#'shallow' 'linear' n_start_chans = 25 n_chan_factor = 2 # relevant for deep model only input_time_length = 6000 final_conv_length = 1 model_constraint = 'defaultnorm' init_lr = 1e-3 batch_size = 64 max_epochs = 35 # until first stop, the continue train on train+valid cuda = True # False if model_name == 'shallow': model = ShallowFBCSPNet( in_chans=n_chans, n_classes=n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, final_conv_length=final_conv_length).create_network() elif model_name == 'deep': model = Deep4Net(n_chans, n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, n_filters_2=int(n_start_chans * n_chan_factor), n_filters_3=int(n_start_chans * (n_chan_factor**2.0)), n_filters_4=int(n_start_chans * (n_chan_factor**3.0)), final_conv_length=final_conv_length, stride_before_pool=True).create_network() elif (model_name == 'deep_smac'): if model_name == 'deep_smac': do_batch_norm = False else: assert model_name == 'deep_smac_bnorm' do_batch_norm = True double_time_convs = False drop_prob = 0.244445 filter_length_2 = 12 filter_length_3 = 14 filter_length_4 = 12 filter_time_length = 21 final_conv_length = 1 first_nonlin = elu first_pool_mode = 'mean' first_pool_nonlin = identity later_nonlin = elu later_pool_mode = 'mean' later_pool_nonlin = identity n_filters_factor = 1.679066 n_filters_start = 32 pool_time_length = 1 pool_time_stride = 2 split_first_layer = True n_chan_factor = n_filters_factor n_start_chans = n_filters_start model = Deep4Net(n_chans, n_classes, n_filters_time=n_start_chans, n_filters_spat=n_start_chans, input_time_length=input_time_length, n_filters_2=int(n_start_chans * n_chan_factor), n_filters_3=int(n_start_chans * (n_chan_factor**2.0)), n_filters_4=int(n_start_chans * (n_chan_factor**3.0)), final_conv_length=final_conv_length, batch_norm=do_batch_norm, double_time_convs=double_time_convs, drop_prob=drop_prob, filter_length_2=filter_length_2, filter_length_3=filter_length_3, filter_length_4=filter_length_4, filter_time_length=filter_time_length, first_nonlin=first_nonlin, first_pool_mode=first_pool_mode, first_pool_nonlin=first_pool_nonlin, later_nonlin=later_nonlin, later_pool_mode=later_pool_mode, later_pool_nonlin=later_pool_nonlin, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, split_first_layer=split_first_layer, stride_before_pool=True).create_network() elif model_name == 'shallow_smac': conv_nonlin = identity do_batch_norm = True drop_prob = 0.328794 filter_time_length = 56 final_conv_length = 22 n_filters_spat = 73 n_filters_time = 24 pool_mode = 'max' pool_nonlin = identity pool_time_length = 84 pool_time_stride = 3 split_first_layer = True model = ShallowFBCSPNet( in_chans=n_chans, n_classes=n_classes, n_filters_time=n_filters_time, n_filters_spat=n_filters_spat, input_time_length=input_time_length, final_conv_length=final_conv_length, conv_nonlin=conv_nonlin, batch_norm=do_batch_norm, drop_prob=drop_prob, filter_time_length=filter_time_length, pool_mode=pool_mode, pool_nonlin=pool_nonlin, pool_time_length=pool_time_length, pool_time_stride=pool_time_stride, split_first_layer=split_first_layer, ).create_network() elif model_name == 'linear': model = nn.Sequential() model.add_module("conv_classifier", nn.Conv2d(n_chans, n_classes, (600, 1))) model.add_module('softmax', nn.LogSoftmax(dim=1)) model.add_module('squeeze', Expression(lambda x: x.squeeze(3))) else: assert False, "unknown model name {:s}".format(model_name) to_dense_prediction_model(model) if config.cuda: model.cuda() test_input = np_to_var( np.ones((2, config.n_chans, config.input_time_length, 1), dtype=np.float32)) if config.cuda: test_input = test_input.cuda() out = model(test_input) n_preds_per_input = out.cpu().data.numpy().shape[2] iterator = CropsFromTrialsIterator( batch_size=config.batch_size, input_time_length=config.input_time_length, n_preds_per_input=n_preds_per_input) #model.add_module('softmax', nn.LogSoftmax(dim=1)) model.eval() mode[2] = str(mode[2]) mode[3] = str(mode[3]) modelName = '-'.join(mode[:4]) #params = th.load('sessionsData/%sModel%s-sessions.pt'%(modelName,mode[4])) #params = th.load('%sModel%s.pt'%(modelName,mode[4])) params = th.load('linear/%sModel%s.pt' % (modelName, mode[4])) model.load_state_dict(params) if config.test_on_eval: #test_X, test_y = test_dataset.load() #test_X, test_y = loadNEDCdata(mode='eval') max_shape = np.max([list(x.shape) for x in test_X], axis=0) assert max_shape[1] == int(config.test_recording_mins * config.sampling_freq * 60) if not config.test_on_eval: splitter = TrainValidTestSplitter(config.n_folds, config.i_test_fold, shuffle=config.shuffle) train_set, valid_set, test_set = splitter.split(X, y) else: splitter = TrainValidSplitter(config.n_folds, i_valid_fold=config.i_test_fold, shuffle=config.shuffle) train_set, valid_set = splitter.split(X, y) test_set = SignalAndTarget(test_X, test_y) del test_X, test_y del X, y # shouldn't be necessary, but just to make sure datasets = OrderedDict( (('train', train_set), ('valid', valid_set), ('test', test_set))) for setname in ('train', 'valid', 'test'): #setname = 'test' #print("Compute predictions for {:s}...".format(setname)) dataset = datasets[setname] if config.cuda: preds_per_batch = [ var_to_np(model(np_to_var(b[0]).cuda())) for b in iterator.get_batches(dataset, shuffle=False) ] else: preds_per_batch = [ var_to_np(model(np_to_var(b[0]))) for b in iterator.get_batches(dataset, shuffle=False) ] preds_per_trial = compute_preds_per_trial( preds_per_batch, dataset, input_time_length=iterator.input_time_length, n_stride=iterator.n_preds_per_input) mean_preds_per_trial = [ np.mean(preds, axis=(0, 2)) for preds in preds_per_trial ] mean_preds_per_trial = np.array(mean_preds_per_trial) all_pred_labels = np.argmax(mean_preds_per_trial, axis=1).squeeze() all_target_labels = dataset.y acc_per_class = [] for i_class in range(n_classes): mask = all_target_labels == i_class acc = np.mean(all_pred_labels[mask] == all_target_labels[mask]) acc_per_class.append(acc) misclass = 1 - np.mean(acc_per_class) #print('Acc:{}, Class 0:{}, Class 1:{}'.format(np.mean(acc_per_class),acc_per_class[0],acc_per_class[1])) if setname == 'test': testResult = np.mean(acc_per_class) return testResult