def maxoutDense(layer, num_units, ds): layer = layers.DenseLayer(layer, num_units=num_units, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify) layer = layers.FeaturePoolLayer(layer, pool_size=ds, axis=1, pool_function=T.max) return layer
def maxoutConv(layer, num_filters, ds, filter_size, stride, pad): layer = layers.Conv2DLayer(layer, num_filters=num_filters, filter_size=filter_size, stride=stride, pad=pad, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify) layer = layers.FeaturePoolLayer(layer, pool_size=ds, axis=1, pool_function=T.max) return layer
def __init__( self, image_shape, filter_shape, num_class, conv_type, kernel_size, kernel_pool_size, dropout_rate, ): """ """ self.filter_shape = filter_shape self.n_visible = numpy.prod(image_shape) self.n_layers = len(filter_shape) self.rng = RandomStreams(123) self.x = T.matrix() self.y = T.ivector() self.conv_layers = [] NoiseLayer = layers.DropoutLayer dropout_rate = float(dropout_rate) self.l_input = layers.InputLayer((None, self.n_visible), self.x) this_layer = layers.ReshapeLayer(self.l_input, ([0], ) + image_shape) for l in range(self.n_layers): activation = lasagne.nonlinearities.rectify if len(filter_shape[l]) == 3: if conv_type == 'double' and filter_shape[l][1] > kernel_size: this_layer = DoubleConvLayer( this_layer, filter_shape[l][0], filter_shape[l][1:], pad='same', nonlinearity=activation, kernel_size=kernel_size, kernel_pool_size=kernel_pool_size) this_layer = layers.batch_norm(this_layer) elif conv_type == 'maxout': this_layer = layers.Conv2DLayer(this_layer, filter_shape[l][0], filter_shape[l][1:], b=None, pad='same', nonlinearity=None) this_layer = layers.FeaturePoolLayer( this_layer, pool_size=kernel_pool_size**2) this_layer = layers.BatchNormLayer(this_layer) this_layer = layers.NonlinearityLayer( this_layer, activation) elif conv_type == 'cyclic': this_layers = [] this_layers.append( layers.Conv2DLayer(this_layer, filter_shape[l][0], filter_shape[l][1:], b=None, pad='same', nonlinearity=None)) for _ in range(3): W = this_layers[-1].W.dimshuffle(0, 1, 3, 2)[:, :, :, ::-1] this_layers.append( layers.Conv2DLayer(this_layer, filter_shape[l][0], filter_shape[l][1:], W=W, b=None, pad='same', nonlinearity=None)) this_layer = layers.ElemwiseMergeLayer( this_layers, T.maximum) this_layer = layers.BatchNormLayer(this_layer) this_layer = layers.NonlinearityLayer( this_layer, activation) elif conv_type == 'standard' \ or (conv_type == 'double' and filter_shape[l][1] <= kernel_size): this_layer = layers.Conv2DLayer(this_layer, filter_shape[l][0], filter_shape[l][1:], pad='same', nonlinearity=activation) this_layer = layers.batch_norm(this_layer) else: raise NotImplementedError self.conv_layers.append(this_layer) elif len(filter_shape[l]) == 2: this_layer = layers.MaxPool2DLayer(this_layer, filter_shape[l]) this_layer = NoiseLayer(this_layer, dropout_rate) elif len(filter_shape[l]) == 1: raise NotImplementedError self.top_conv_layer = this_layer this_layer = layers.GlobalPoolLayer(this_layer, T.mean) self.clf_layer = layers.DenseLayer(this_layer, num_class, W=lasagne.init.Constant(0.), nonlinearity=T.nnet.softmax) self.params = layers.get_all_params(self.clf_layer, trainable=True) self.params_all = layers.get_all_params(self.clf_layer)
nonlinearity=leaky_relu, border_mode='same') pool4 = dnn.MaxPool2DDNNLayer(conv4, (3, 3), stride=(2, 2)) # W=lasagne.init.Orthogonal(gain='relu'), border_mode='same', nonlinearity=leaky_relu) conv5_dropout = lasagne.layers.DropoutLayer(pool4, p=0.4) conv5a = dnn.Conv2DDNNLayer(conv5_dropout, num_filters=128, filter_size=(3, 3), W=lasagne.init.Orthogonal(gain='relu'), nonlinearity=leaky_relu, border_mode='same') conv5 = layers.FeaturePoolLayer(conv5a, 2) # conv6_dropout = lasagne.layers.DropoutLayer(conv5, p=0.1) # conv6 = layers.Conv2DLayer(conv6_dropout, # num_filters=128, # filter_size=(3, 3), # W=lasagne.init.Orthogonal(gain='relu'), # nonlinearity=leaky_relu, # border_mode='same') pool6 = dnn.MaxPool2DDNNLayer(conv5, (3, 3), stride=(2, 2)) pool3 = dnn.MaxPool2DDNNLayer(conv3, (3, 3), stride=(2, 2)) merge = RotateMergeLayer(pool3) merge = RotateMergeLayer(pool6) dense1_dropout = lasagne.layers.DropoutLayer(merge, p=0.5)
def build_1Dregression_v1(input_var=None, input_width=None, nin_units=12, h_num_units=[64, 64], h_grad_clip=1.0, output_width=1): """ A stacked bidirectional RNN network for regression, alternating with dense layers and merging of the two directions, followed by a feature mean pooling in the time direction, with a linear dim-reduction layer at the start Args: input_var (theano 3-tensor): minibatch of input sequence vectors input_width (int): length of input sequences nin_units (list): number of NIN features h_num_units (int list): no. of units in hidden layer in each stack from bottom to top h_grad_clip (float): gradient clipping maximum value output_width (int): size of output layer (e.g. =1 for 1D regression) Returns: output layer (Lasagne layer object) """ # Non-linearity hyperparameter nonlin = lasagne.nonlinearities.LeakyRectify(leakiness=0.15) # Input layer l_in = LL.InputLayer(shape=(None, 22, input_width), input_var=input_var) batchsize = l_in.input_var.shape[0] # NIN-layer l_in = LL.NINLayer(l_in, num_units=nin_units, nonlinearity=lasagne.nonlinearities.linear) l_in_1 = LL.DimshuffleLayer(l_in, (0, 2, 1)) # RNN layers for h in h_num_units: # Forward layers l_forward_0 = LL.RecurrentLayer(l_in_1, nonlinearity=nonlin, num_units=h, backwards=False, learn_init=True, grad_clipping=h_grad_clip, unroll_scan=True, precompute_input=True) l_forward_0a = LL.ReshapeLayer(l_forward_0, (-1, h)) l_forward_0b = LL.DenseLayer(l_forward_0a, num_units=h, nonlinearity=nonlin) l_forward_0c = LL.ReshapeLayer(l_forward_0b, (batchsize, input_width, h)) # Backward layers l_backward_0 = LL.RecurrentLayer(l_in_1, nonlinearity=nonlin, num_units=h, backwards=True, learn_init=True, grad_clipping=h_grad_clip, unroll_scan=True, precompute_input=True) l_backward_0a = LL.ReshapeLayer(l_backward_0, (-1, h)) l_backward_0b = LL.DenseLayer(l_backward_0a, num_units=h, nonlinearity=nonlin) l_backward_0c = LL.ReshapeLayer(l_backward_0b, (batchsize, input_width, h)) l_in_1 = LL.ElemwiseSumLayer([l_forward_0c, l_backward_0c]) # Output layers network_0a = LL.ReshapeLayer(l_in_1, (-1, h_num_units[-1])) network_0b = LL.DenseLayer(network_0a, num_units=output_width, nonlinearity=nonlin) network_0c = LL.ReshapeLayer(network_0b, (batchsize, input_width, output_width)) output_net_1 = LL.FlattenLayer(network_0c, outdim=2) output_net_2 = LL.FeaturePoolLayer(output_net_1, pool_size=input_width, pool_function=T.mean) return output_net_2
def _setup_model(self, num_features, num_rows): if self.fit_intercept: b = lasagne.init.Constant(0.) else: b = None X_sym = T.matrix() y_sym = T.ivector() bag_labels = T.ivector() input_layer = layers.InputLayer(shape=(num_rows, num_features), input_var=X_sym) if self.hidden_units <= 1: instance_log_odds = layers.DenseLayer(input_layer, num_units=1, W=lasagne.init.Constant(0.), b=b, nonlinearity=lasagne.nonlinearities.linear) else: instance_log_odds = layers.DenseLayer(input_layer, num_units=self.hidden_units, W=lasagne.init.GlorotUniform(1.0), b=b, nonlinearity=lasagne.nonlinearities.linear) instance_log_odds = layers.FeaturePoolLayer(instance_log_odds, pool_size=self.hidden_units, pool_function=T.max) instance_log_odds = layers.FlattenLayer(instance_log_odds, outdim=1) instance_log_odds_output = layers.get_output(instance_log_odds, X_sym) instance_probs_output = T.nnet.sigmoid(instance_log_odds_output) self.all_params = layers.get_all_params(instance_log_odds, trainable=True) bag_mapper = T.transpose(T.extra_ops.to_one_hot(bag_labels, T.max(bag_labels)+1)) # if previous layers were probabilities: # bag_probs = 1 - T.exp(T.dot(bag_mapper, T.log(1 - instance_probs_output))) # if previous layers were log odds: bag_probs = 1 - T.exp(T.dot(bag_mapper, -T.nnet.softplus(instance_log_odds_output))) if self.C is None: regularization = 0 else: # I scale the penalty by num_rows since the likelihood # term is the average over instances, instead of the sum # (like sklearn). This is to make the learning rate not # depend on the dataset (or minibatch) size, but it means # we have to know the minibatch size here in order for C # to be the same as for sklearn. # # Note: this applies the same regularization to all # "regularizable" parameters in the whole network # (everything but the bias terms). I need to think more # about whether this makes sense for the deep networks, # though it's probably a reasonable starting point. regularization = 1.0/self.C/num_rows * lasagne.regularization.regularize_network_params(instance_log_odds, self.penalty) # This chunk is a bit repetitive and could be simplified: bag_loss = T.mean(lasagne.objectives.binary_crossentropy(bag_probs, y_sym)) + regularization self.f_train_bag = theano.function([X_sym, y_sym, bag_labels], [bag_loss], updates=self.updater(bag_loss, self.all_params, learning_rate=self.learning_rate)) nobag_loss = T.mean(lasagne.objectives.binary_crossentropy(instance_probs_output, y_sym)) + regularization self.f_train_nobag = theano.function([X_sym, y_sym], [nobag_loss], updates=self.updater(nobag_loss, self.all_params, learning_rate=self.learning_rate)) self.f_bag_logprobs = theano.function([X_sym, bag_labels], T.log(bag_probs)) self.f_logprobs = theano.function([X_sym], T.log(instance_probs_output))
# num_filters=128, # filter_size=(3, 3), # W=lasagne.init.Orthogonal(gain='relu'), # nonlinearity=leaky_relu, # border_mode='same') pool6 = dnn.MaxPool2DDNNLayer(conv5, (3, 3), stride=(2, 2)) merge = RotateMergeLayer(pool6) dense1_dropout = lasagne.layers.DropoutLayer(merge, p=0.5) dense1a = layers.DenseLayer(dense1_dropout, num_units=512, W=lasagne.init.Normal(), nonlinearity=None) dense1 = layers.FeaturePoolLayer(dense1a, 2) dense2_dropout = lasagne.layers.DropoutLayer(dense1, p=0.5) dense2a = layers.DenseLayer(dense2_dropout, num_units=512, W=lasagne.init.Normal(), nonlinearity=None) dense2 = layers.FeaturePoolLayer(dense2a, 2) out_dropout = lasagne.layers.DropoutLayer(dense2, p=0.5) output = layers.DenseLayer(out_dropout, num_units=4, nonlinearity=nonlinearities.sigmoid) # collect layers to save them later all_layers = [
dropout = lambda p=0.1, rescale=True: lambda incoming: \ layers.DropoutLayer(incoming, p=p, rescale=rescale) if p is not None else incoming batch_norm = lambda axes='auto': lambda incoming: layers.BatchNormLayer( incoming, axes=axes) class Select(object): def __getitem__(self, item): return lambda incomings: incomings[item] select = Select() take = select nonlinearity = lambda f=None: lambda incoming: layers.NonlinearityLayer( incoming, (nonlinearities.LeakyRectify(0.05) if f is None else f)) elementwise = lambda f=T.add: lambda incomings: layers.ElemwiseMergeLayer( incomings, f) elementwise_sum = lambda: lambda incomings: layers.ElemwiseMergeLayer( incomings, T.add) elementwise_mean = lambda: lambda incomings: \ nonlinearity(f=lambda x: x / len(incomings))(layers.ElemwiseMergeLayer(incomings, T.add)) flatten = lambda outdim=2: lambda incoming: layers.FlattenLayer(incoming, outdim=outdim) feature_pool = lambda pool_size=4, axis=1, f=T.max: lambda incoming: \ layers.FeaturePoolLayer(incoming, pool_size=pool_size, axis=axis, pool_function=f)
def build_model(self, model_spec, leak_alpha, pad, filter_shape): print("Building model from JSON...") def get_nonlinearity(layer): default_nonlinear = "ReLU" # for all Conv2DLayer, Conv2DCCLayer, and DenseLayer req = layer.get("nonlinearity") or default_nonlinear return { "LReLU": LeakyRectify(1. / leak_alpha), "None": None, "sigmoid": nonlinearities.sigmoid, "ReLU": nonlinearities.rectify, "softmax": nonlinearities.softmax, "tanh": nonlinearities.tanh }[req] def get_init(layer): default_init = "GlorotUniform" # for both Conv2DLayer and DenseLayer (Conv2DCCLayer is None) req = layer.get("init") or default_init return { "Normal": lasagne.init.Normal(), "Orthogonal": lasagne.init.Orthogonal(gain='relu'), "GlorotUniform": lasagne.init.GlorotUniform() }[req] def get_custom(layer): return { "Fold4xChannelsLayer": Fold4xChannelsLayer, "Fold4xBatchesLayer": Fold4xBatchesLayer, "Unfold4xBatchesLayer": Unfold4xBatchesLayer }[layer] all_layers = [ layers.InputLayer(shape=(self.batch_size, model_spec[0]["channels"], model_spec[0]["size"], model_spec[0]["size"])) ] if filter_shape == 'c01b': all_layers.append( layers.cuda_convnet.ShuffleBC01ToC01BLayer(all_layers[-1])) dimshuffle = False if filter_shape == 'c01b' else True for i in xrange(1, len(model_spec)): cs = model_spec[i] # current spec if cs["type"] == "CONV": border_mode = 'full' if pad else 'valid' if cs.get("dropout"): all_layers.append( lasagne.layers.DropoutLayer(all_layers[-1], p=cs["dropout"])) all_layers.append( layers.cuda_convnet.Conv2DCCLayer( all_layers[-1], num_filters=cs["num_filters"], filter_size=(cs["filter_size"], cs["filter_size"]), border_mode=border_mode, W=get_init(cs), nonlinearity=get_nonlinearity(cs), dimshuffle=dimshuffle)) if cs.get("pool_size"): all_layers.append( layers.cuda_convnet.MaxPool2DCCLayer( all_layers[-1], pool_size=(cs["pool_size"], cs["pool_size"]), stride=(cs["pool_stride"], cs["pool_stride"]), dimshuffle=dimshuffle)) elif cs["type"] == "FC": if (model_spec[i - 1]["type"] == "CONV") and (filter_shape == 'c01b'): all_layers.append( layers.cuda_convnet.ShuffleC01BToBC01Layer( all_layers[-1])) if cs.get("dropout"): all_layers.append( lasagne.layers.DropoutLayer(all_layers[-1], p=cs["dropout"])) all_layers.append( layers.DenseLayer(all_layers[-1], num_units=cs["num_units"], W=get_init(cs), nonlinearity=get_nonlinearity(cs))) if cs.get("pool_size"): all_layers.append( layers.FeaturePoolLayer(all_layers[-1], cs["pool_size"])) elif cs["type"] == "OUTPUT": if cs.get("dropout"): all_layers.append( lasagne.layers.DropoutLayer(all_layers[-1], p=cs["dropout"])) all_layers.append( layers.DenseLayer(all_layers[-1], num_units=self.num_output_classes, W=get_init(cs), nonlinearity=get_nonlinearity(cs))) elif cs["type"] == "CUSTOM": custom_layer = get_custom(cs["name"]) all_layers.append(custom_layer(all_layers[-1])) else: raise NotImplementedError() return all_layers
def build_model(): net = OrderedDict() net['input'] = layers.InputLayer(shape=(train_params['batch_size'], 1, train_params['image_size'], train_params['image_size'])) net['conv11'] = dnn.Conv2DDNNLayer(net['input'], num_filters=64, filter_size=(3, 3), W=lasagne.init.Orthogonal(gain='relu'), pad='same', nonlinearity=very_leaky_rectify) net['conv12'] = dnn.Conv2DDNNLayer(net['conv11'], num_filters=64, filter_size=(3, 3), pad='same', W=lasagne.init.Orthogonal(gain='relu'), nonlinearity=very_leaky_rectify) net['pool1'] = dnn.MaxPool2DDNNLayer(net['conv12'], (3, 3), stride=(2, 2)) net['conv21'] = dnn.Conv2DDNNLayer(net['pool1'], num_filters=96, filter_size=(3, 3), pad='same', W=lasagne.init.Orthogonal(gain='relu'), nonlinearity=very_leaky_rectify) net['conv22'] = dnn.Conv2DDNNLayer(net['conv21'], num_filters=96, filter_size=(3, 3), pad='same', W=lasagne.init.Orthogonal(gain='relu'), nonlinearity=very_leaky_rectify) net['pool2'] = dnn.MaxPool2DDNNLayer(net['conv22'], (3, 3), stride=(2, 2)) net['conv31'] = dnn.Conv2DDNNLayer(net['pool2'], num_filters=128, filter_size=(3, 3), pad='same', W=lasagne.init.Orthogonal(gain='relu'), nonlinearity=very_leaky_rectify) net['conv32'] = dnn.Conv2DDNNLayer(net['conv31'], num_filters=128, filter_size=(3, 3), pad='same', W=lasagne.init.Orthogonal(gain='relu'), nonlinearity=very_leaky_rectify) net['conv33'] = dnn.Conv2DDNNLayer(net['conv32'], num_filters=128, filter_size=(3, 3), pad='same', W=lasagne.init.Orthogonal(gain='relu'), nonlinearity=very_leaky_rectify) net['pool3'] = dnn.MaxPool2DDNNLayer(net['conv33'], (3, 3), stride=(2, 2)) net['conv41'] = dnn.Conv2DDNNLayer(net['pool3'], num_filters=196, filter_size=(3, 3), pad='same', W=lasagne.init.Orthogonal(gain='relu'), nonlinearity=very_leaky_rectify) net['conv42'] = dnn.Conv2DDNNLayer(net['conv41'], num_filters=196, filter_size=(3, 3), pad='same', W=lasagne.init.Orthogonal(gain='relu'), nonlinearity=very_leaky_rectify) net['conv43'] = dnn.Conv2DDNNLayer(net['conv42'], num_filters=256, filter_size=(3, 3), pad='same', W=lasagne.init.Orthogonal(gain='relu'), nonlinearity=very_leaky_rectify) net['pool4'] = dnn.MaxPool2DDNNLayer(net['conv43'], (3, 3), stride=(2, 2)) #net['drop0'] = lasagne.layers.DropoutLayer(net['pool4'], p=0.5) net['fc1'] = layers.DenseLayer(net['pool4'], num_units=256, W=lasagne.init.Normal(), nonlinearity=None) net['maxout1'] = layers.FeaturePoolLayer(net['fc1'], 2) net['drop1'] = lasagne.layers.DropoutLayer(net['maxout1'], p=0.5) net['fc2'] = layers.DenseLayer(net['drop1'], num_units=256, W=lasagne.init.Normal(), nonlinearity=None) net['maxout2'] = layers.FeaturePoolLayer(net['fc2'], 2) net['drop2'] = lasagne.layers.DropoutLayer(net['maxout2'], p=0.5) net['output'] = layers.DenseLayer(net['drop2'], num_units=2, nonlinearity=None) return net