def replace_dense_softmax_by_dense_linear(all_layers, n_features, nonlin_before_merge, batch_norm_before_merge): """Replace dense/conv (n_classes) -> reshape -> softmax by dense/conv (n_features) -> reshape""" reshape_layer = [l for l in all_layers if l.__class__.__name__ == 'FinalReshapeLayer'] assert len(reshape_layer) == 1 reshape_layer = reshape_layer[0] input_to_reshape = reshape_layer.input_layer # We expect a linear conv2d as "final dense" before the reshape... assert input_to_reshape.__class__.__name__ == 'Conv2DLayer', ( "expect conv before reshape") assert input_to_reshape.nonlinearity.func_name == 'linear' # recreate with different number of filters assert input_to_reshape.stride == (1,1) new_input_to_reshape = Conv2DLayer(input_to_reshape.input_layer, num_filters=n_features, filter_size=input_to_reshape.filter_size, nonlinearity=nonlin_before_merge, name='final_dense') if batch_norm_before_merge: new_input_to_reshape = batch_norm(new_input_to_reshape, alpha=0.1,epsilon=0.01) new_reshape_l = FinalReshapeLayer(new_input_to_reshape) return lasagne.layers.get_all_layers(new_reshape_l)
def replace_dense_softmax_by_dense_linear(all_layers, n_features, nonlin_before_merge, batch_norm_before_merge): """Replace dense/conv (n_classes) -> reshape -> softmax by dense/conv (n_features) -> reshape""" reshape_layer = [l for l in all_layers if l.__class__.__name__ == 'FinalReshapeLayer'] assert len(reshape_layer) == 1 reshape_layer = reshape_layer[0] input_to_reshape = reshape_layer.input_layer # We expect a linear conv2d as "final dense" before the reshape... assert input_to_reshape.__class__.__name__ == 'Conv2DLayer', ( "expect conv before reshape") assert input_to_reshape.nonlinearity.func_name == 'linear' # recreate with different number of filters assert input_to_reshape.stride == (1,1) new_input_to_reshape = Conv2DLayer(input_to_reshape.input_layer, num_filters=n_features, filter_size=input_to_reshape.filter_size, nonlinearity=nonlin_before_merge, name='final_dense') if batch_norm_before_merge: new_input_to_reshape = batch_norm(new_input_to_reshape, alpha=0.1,epsilon=0.01) new_reshape_l = FinalReshapeLayer(new_input_to_reshape) return lasagne.layers.get_all_layers(new_reshape_l)
def get_layers(self): def resnet_residual_block(model, increase_units_factor=None, half_time=False): """Calling residual_block function with correct attributes from this object. Parameters ---------- model : increase_units_factor : (Default value = None) half_time : (Default value = False) Returns ------- Final layer of created residual block. """ return residual_block(model, batch_norm_epsilon=self.batch_norm_epsilon, batch_norm_alpha=self.batch_norm_alpha, increase_units_factor=increase_units_factor, half_time=half_time, nonlinearity=self.nonlinearity, projection=self.projection, survival_prob=self.survival_prob, add_after_nonlin=self.add_after_nonlin, reduction_method=self.reduction_method, reduction_pool_mode=self.reduction_pool_mode) model = InputLayer([None, self.in_chans, self.input_time_length, 1]) if self.split_first_layer: # shift channel dim out model = DimshuffleLayer(model, (0,3,2,1)) # first timeconv model = Conv2DLayer(model, num_filters=self.n_first_filters, filter_size=(self.first_filter_length,1), stride=(1,1), nonlinearity=identity, pad='same', W=lasagne.init.HeNormal(gain='relu')) # now spatconv model = batch_norm(Conv2DLayer(model, num_filters=self.n_first_filters, filter_size=(1,self.in_chans), stride=(1,1), nonlinearity=self.nonlinearity, pad=0, W=lasagne.init.HeNormal(gain='relu')), epsilon=self.batch_norm_epsilon, alpha=self.batch_norm_alpha) else: model = batch_norm(Conv2DLayer(model, num_filters=self.n_first_filters, filter_size=(self.first_filter_length,1), stride=(1,1), nonlinearity=self.nonlinearity, pad='same', W=lasagne.init.HeNormal(gain='relu')), epsilon=self.batch_norm_epsilon, alpha=self.batch_norm_alpha) for _ in range(self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, increase_units_factor=2, half_time=True) for _ in range(1,self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, increase_units_factor=1.5, half_time=True) for _ in range(1,self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, half_time=True) for _ in range(1,self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, half_time=True) for _ in range(1,self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, half_time=True) for _ in range(1,self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, half_time=True) if self.drop_before_pool: model = DropoutLayer(model, p=0.5) # Replacement for global mean pooling if self.final_aggregator == 'pool': model = Pool2DLayer(model, pool_size=(self.final_pool_length,1), stride=(1,1), mode='average_exc_pad') model = Conv2DLayer(model, filter_size=(1,1), num_filters=4, W=lasagne.init.HeNormal(), nonlinearity=identity) elif self.final_aggregator == 'conv': model = Conv2DLayer(model, filter_size=(self.final_pool_length,1), num_filters=4, W=lasagne.init.HeNormal(), nonlinearity=identity) else: raise ValueError("Unknown final aggregator {:s}".format( self.final_aggregator)) model = FinalReshapeLayer(model) model = NonlinearityLayer(model, nonlinearity=self.final_nonlin) model = set_survival_probs_to_linear_decay(model, self.survival_prob) return lasagne.layers.get_all_layers(model)
def residual_block( l, batch_norm_alpha, batch_norm_epsilon, nonlinearity, survival_prob, add_after_nonlin, reduction_method, reduction_pool_mode, increase_units_factor=None, half_time=False, projection=False, ): assert survival_prob <= 1 and survival_prob >= 0 input_num_filters = l.output_shape[1] if increase_units_factor is not None: out_num_filters = int(input_num_filters * increase_units_factor) assert (out_num_filters - input_num_filters) % 2 == 0, ( "Need even " "number of extra channels in order to be able to pad correctly") else: out_num_filters = input_num_filters if (not half_time) or (reduction_method == 'conv'): stack_1 = batch_norm(Conv2DLayer(l, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=nonlinearity, pad='same', W=lasagne.init.HeNormal(gain='relu')), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha) else: assert half_time and reduction_method == 'pool' stack_1 = Pool2DLayer(l, pool_size=(3, 1), stride=(1, 1), pad=(1, 0), mode=reduction_pool_mode) # 1x1 conv here, therefore can do stride later without problems # otherwise would have to do stride here before # and make extra if condition later (only reshape with stride # in case of reduction method conv)... stack_1 = batch_norm(Conv2DLayer(stack_1, num_filters=out_num_filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=nonlinearity, pad='same', W=lasagne.init.HeNormal(gain='relu')), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha) if half_time: stack_1 = StrideReshapeLayer(stack_1, n_stride=2) stack_2 = batch_norm(Conv2DLayer(stack_1, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu')), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha) # add shortcut connections shortcut = l if half_time: # note since we are only reshaping # this is ok both for later identity and later projection # 1x1 conv of projection is same if we do it before or after this reshape # (would not be true if it was anything but 1x1 conv(!)) shortcut = StrideReshapeLayer(shortcut, n_stride=2) if increase_units_factor is not None: if projection: # projection shortcut, as option B in paper shortcut = batch_norm(Conv2DLayer(shortcut, num_filters=out_num_filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', b=None), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha) else: # identity shortcut, as option A in paper n_extra_chans = out_num_filters - input_num_filters shortcut = PadLayer(shortcut, [n_extra_chans // 2, 0, 0], batch_ndim=1) if add_after_nonlin: stack_2 = NonlinearityLayer(stack_2) block = ElemwiseSumLayer([stack_2, shortcut]) else: block = NonlinearityLayer(ElemwiseSumLayer([stack_2, shortcut]), nonlinearity=nonlinearity) if survival_prob != 1: # Hack to make both be broadcastable along empty third dim # Otherwise I get an error that they are of different type: # shortcut: TensorType(False,False,False,True) # block: TensorType4d(32) or sth shortcut = ExpressionLayer(shortcut, lambda x: T.addbroadcast(x, 3)) block = ExpressionLayer(block, lambda x: T.addbroadcast(x, 3)) block = RandomSwitchLayer(block, shortcut, survival_prob) return block
def get_layers(self): def resnet_residual_block(model, increase_units_factor=None, half_time=False): """Calling residual_block function with correct attributes from this object. Parameters ---------- model : increase_units_factor : (Default value = None) half_time : (Default value = False) Returns ------- Final layer of created residual block. """ return residual_block(model, batch_norm_epsilon=self.batch_norm_epsilon, batch_norm_alpha=self.batch_norm_alpha, increase_units_factor=increase_units_factor, half_time=half_time, nonlinearity=self.nonlinearity, projection=self.projection, survival_prob=self.survival_prob, add_after_nonlin=self.add_after_nonlin, reduction_method=self.reduction_method, reduction_pool_mode=self.reduction_pool_mode) model = InputLayer([None, self.in_chans, self.input_time_length, 1]) if self.split_first_layer: # shift channel dim out model = DimshuffleLayer(model, (0, 3, 2, 1)) # first timeconv model = Conv2DLayer(model, num_filters=self.n_first_filters, filter_size=(self.first_filter_length, 1), stride=(1, 1), nonlinearity=identity, pad='same', W=lasagne.init.HeNormal(gain='relu')) # now spatconv model = batch_norm(Conv2DLayer( model, num_filters=self.n_first_filters, filter_size=(1, self.in_chans), stride=(1, 1), nonlinearity=self.nonlinearity, pad=0, W=lasagne.init.HeNormal(gain='relu')), epsilon=self.batch_norm_epsilon, alpha=self.batch_norm_alpha) else: model = batch_norm(Conv2DLayer( model, num_filters=self.n_first_filters, filter_size=(self.first_filter_length, 1), stride=(1, 1), nonlinearity=self.nonlinearity, pad='same', W=lasagne.init.HeNormal(gain='relu')), epsilon=self.batch_norm_epsilon, alpha=self.batch_norm_alpha) for _ in range(self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, increase_units_factor=2, half_time=True) for _ in range(1, self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, increase_units_factor=1.5, half_time=True) for _ in range(1, self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, half_time=True) for _ in range(1, self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, half_time=True) for _ in range(1, self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, half_time=True) for _ in range(1, self.n_layers_per_block): model = resnet_residual_block(model) model = resnet_residual_block(model, half_time=True) if self.drop_before_pool: model = DropoutLayer(model, p=0.5) # Replacement for global mean pooling if self.final_aggregator == 'pool': model = Pool2DLayer(model, pool_size=(self.final_pool_length, 1), stride=(1, 1), mode='average_exc_pad') model = Conv2DLayer(model, filter_size=(1, 1), num_filters=4, W=lasagne.init.HeNormal(), nonlinearity=identity) elif self.final_aggregator == 'conv': model = Conv2DLayer(model, filter_size=(self.final_pool_length, 1), num_filters=4, W=lasagne.init.HeNormal(), nonlinearity=identity) else: raise ValueError("Unknown final aggregator {:s}".format( self.final_aggregator)) model = FinalReshapeLayer(model) model = NonlinearityLayer(model, nonlinearity=self.final_nonlin) model = set_survival_probs_to_linear_decay(model, self.survival_prob) return lasagne.layers.get_all_layers(model)
def residual_block( l, batch_norm_alpha, batch_norm_epsilon, nonlinearity, survival_prob, add_after_nonlin, reduction_method, reduction_pool_mode, increase_units_factor=None, half_time=False, projection=False, ): assert survival_prob <= 1 and survival_prob >= 0 input_num_filters = l.output_shape[1] if increase_units_factor is not None: out_num_filters = int(input_num_filters * increase_units_factor) assert (out_num_filters - input_num_filters) % 2 == 0, ( "Need even " "number of extra channels in order to be able to pad correctly" ) else: out_num_filters = input_num_filters if (not half_time) or (reduction_method == "conv"): stack_1 = batch_norm( Conv2DLayer( l, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=nonlinearity, pad="same", W=lasagne.init.HeNormal(gain="relu"), ), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha, ) else: assert half_time and reduction_method == "pool" stack_1 = Pool2DLayer(l, pool_size=(3, 1), stride=(1, 1), pad=(1, 0), mode=reduction_pool_mode) # 1x1 conv here, therefore can do stride later without problems # otherwise would have to do stride here before # and make extra if condition later (only reshape with stride # in case of reduction method conv)... stack_1 = batch_norm( Conv2DLayer( stack_1, num_filters=out_num_filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=nonlinearity, pad="same", W=lasagne.init.HeNormal(gain="relu"), ), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha, ) if half_time: stack_1 = StrideReshapeLayer(stack_1, n_stride=2) stack_2 = batch_norm( Conv2DLayer( stack_1, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad="same", W=lasagne.init.HeNormal(gain="relu"), ), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha, ) # add shortcut connections shortcut = l if half_time: # note since we are only reshaping # this is ok both for later identity and later projection # 1x1 conv of projection is same if we do it before or after this reshape # (would not be true if it was anything but 1x1 conv(!)) shortcut = StrideReshapeLayer(shortcut, n_stride=2) if increase_units_factor is not None: if projection: # projection shortcut, as option B in paper shortcut = batch_norm( Conv2DLayer( shortcut, num_filters=out_num_filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad="same", b=None, ), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha, ) else: # identity shortcut, as option A in paper n_extra_chans = out_num_filters - input_num_filters shortcut = PadLayer(shortcut, [n_extra_chans // 2, 0, 0], batch_ndim=1) if add_after_nonlin: stack_2 = NonlinearityLayer(stack_2) block = ElemwiseSumLayer([stack_2, shortcut]) else: block = NonlinearityLayer(ElemwiseSumLayer([stack_2, shortcut]), nonlinearity=nonlinearity) if survival_prob != 1: # Hack to make both be broadcastable along empty third dim # Otherwise I get an error that they are of different type: # shortcut: TensorType(False,False,False,True) # block: TensorType4d(32) or sth shortcut = ExpressionLayer(shortcut, lambda x: T.addbroadcast(x, 3)) block = ExpressionLayer(block, lambda x: T.addbroadcast(x, 3)) block = RandomSwitchLayer(block, shortcut, survival_prob) return block