def build_cnn(num_filters, filter_sizes, strides, hidden_sizes): print("Building CNN") input_var = T.tensor4(name='input', dtype=theano.config.floatX) # float32 l_in = L.InputLayer(shape=(None,) + S.IMG, input_var=input_var) S.print_shape(L.get_output_shape(l_in)) l_hid = l_in for n_filt, filt_size, stride in zip(num_filters, filter_sizes, strides): l_hid = L.Conv2DLayer(l_hid, num_filters=n_filt, filter_size=filt_size, stride=stride) S.print_shape(L.get_output_shape(l_hid)) for h_size in hidden_sizes: l_hid = L.DenseLayer(l_hid, num_units=h_size) S.print_shape(L.get_output_shape(l_hid)) l_out = L.DenseLayer(l_hid, num_units=S.OUTPUT, nonlinearity=lasagne.nonlinearities.softmax) S.print_shape(L.get_output_shape(l_out)) variables = L.get_all_params(l_out) for v in variables: print("variable: ", v, " dtype: ", v.dtype) return l_out, input_var
def test_slice_layer(): from lasagne.layers import SliceLayer, InputLayer, get_output_shape,\ get_output from numpy.testing import assert_array_almost_equal as aeq in_shp = (3, 5, 2) l_inp = InputLayer(in_shp) l_slice_ax0 = SliceLayer(l_inp, axis=0, indices=0) l_slice_ax1 = SliceLayer(l_inp, axis=1, indices=slice(3, 5)) l_slice_ax2 = SliceLayer(l_inp, axis=-1, indices=-1) x = np.arange(np.prod(in_shp)).reshape(in_shp).astype('float32') x1 = x[0] x2 = x[:, 3:5] x3 = x[:, :, -1] assert get_output_shape(l_slice_ax0) == x1.shape assert get_output_shape(l_slice_ax1) == x2.shape assert get_output_shape(l_slice_ax2) == x3.shape aeq(get_output(l_slice_ax0, x).eval(), x1) aeq(get_output(l_slice_ax1, x).eval(), x2) aeq(get_output(l_slice_ax2, x).eval(), x3) # test slicing None dimension in_shp = (2, None, 2) l_inp = InputLayer(in_shp) l_slice_ax1 = SliceLayer(l_inp, axis=1, indices=slice(3, 5)) assert get_output_shape(l_slice_ax1) == (2, None, 2) aeq(get_output(l_slice_ax1, x).eval(), x2)
def dense_residual(net, last_layer, name, nonlinearity=nonlinearities.rectify, dropout=None): # initial residual unit shape = layers.get_output_shape(net[last_layer]) num_units = shape[1] # original residual unit shape = layers.get_output_shape(net[last_layer]) num_filters = shape[1] net[name+'_1resid'] = layers.DenseLayer(net[last_layer], num_units=num_units, W=init.GlorotUniform(), b=None, nonlinearity=None) net[name+'_1resid_norm'] = layers.BatchNormLayer(net[name+'_1resid']) net[name+'_1resid_active'] = layers.NonlinearityLayer(net[name+'_1resid_norm'], nonlinearity=nonlinearity) if dropout: net[name+'_dropout'] = layers.DropoutLayer(net[name+'_1resid_active'], p=dropout) last_layer = name+'_dropout' else: last_layer = name+'_1resid_active' # bottleneck residual layer net[name+'_2resid'] = layers.DenseLayer(net[last_layer], num_units=num_units, W=init.GlorotUniform(), b=None, nonlinearity=None) net[name+'_2resid_norm'] = layers.BatchNormLayer(net[name+'_2resid']) # combine input with residuals net[name+'_residual'] = layers.ElemwiseSumLayer([net[last_layer], net[name+'_2resid_norm']]) net[name+'_resid'] = layers.NonlinearityLayer(net[name+'_residual'], nonlinearity=nonlinearity) return net
def _invert_layer(self, layer, feeder): layer_type = type(layer) if L.get_output_shape(feeder) != L.get_output_shape(layer): feeder = L.ReshapeLayer(feeder, (-1, ) + L.get_output_shape(layer)[1:]) if layer_type is L.InputLayer: return self._invert_InputLayer(layer, feeder) elif layer_type is L.FlattenLayer: return self._invert_FlattenLayer(layer, feeder) elif layer_type is L.DenseLayer: return self._invert_DenseLayer(layer, feeder) elif layer_type is L.Conv2DLayer: return self._invert_Conv2DLayer(layer, feeder) elif layer_type is L.DropoutLayer: return self._invert_DropoutLayer(layer, feeder) elif layer_type in [L.MaxPool2DLayer, L.MaxPool1DLayer]: return self._invert_MaxPoolingLayer(layer, feeder) elif layer_type is L.PadLayer: return self._invert_PadLayer(layer, feeder) elif layer_type is L.SliceLayer: return self._invert_SliceLayer(layer, feeder) elif layer_type is L.LocalResponseNormalization2DLayer: return self._invert_LocalResponseNormalisation2DLayer( layer, feeder) elif layer_type is L.GlobalPoolLayer: return self._invert_GlobalPoolLayer(layer, feeder) else: return self._invert_UnknownLayer(layer, feeder)
def test_clone(): clone_op = clone(conv(13)) net1 = net((None, 3, 16, 16))(clone_op) net2 = net((None, 3, 17, 17))(clone_op) assert get_output_shape(net1.outputs[0]) == (None, 13, 14, 14), get_output_shape( net1.outputs[0]) assert get_output_shape(net2.outputs[0]) == (None, 13, 15, 15), get_output_shape( net2.outputs[0]) X = T.ftensor4() predict1 = theano.function([X], net1(X)[0]) predict2 = theano.function([X], net2(X)[0]) for _ in range(16): X = np.random.uniform(size=(15, 3, 17, 17)).astype('float32') X_ = X[:, :, :-1, :-1] assert predict1(X_).shape == (15, 13, 14, 14), predict1(X_).shape assert predict2(X).shape == (15, 13, 15, 15), predict2(X).shape assert np.allclose( predict1(X_), predict2(X)[:, :, :-1, :-1], atol=1.0e-6), np.max( np.abs(predict1(X_) - predict2(X)[:, :, :-1, :-1]))
def __init__(self, values, ref_img, kern_std, norm_type="sym", name=None, trainable_kernels=False, _bilateral=False): assert (norm_type in ["sym", "pre", "post", None]) super(GaussianFilterLayer, self).__init__(incomings=[values, ref_img], name=name) self.val_dim = ll.get_output_shape(values)[1] self.ref_dim = ll.get_output_shape(ref_img)[1] if None in (self.val_dim, self.ref_dim): raise ValueError("Gaussian filtering requires known channel \ dimensions for all inputs.") self.norm_type = norm_type if _bilateral: self.ref_dim += 2 if len(kern_std) != self.ref_dim: raise ValueError("Number of kernel weights must match reference \ dimensionality. Got %d weights for %d reference dims." % (len(kern_std), self.ref_dim)) self.kern_std = self.add_param(kern_std, (self.ref_dim, ), name="kern_std", trainable=trainable_kernels, regularizable=False)
def build_residual_block(incoming_layer, ratio_n_filter=1.0, ratio_size=1.0, has_left_branch=False, upscale_factor=4, ix=''): simple_block_name_pattern = [ 'res%s_branch%i%s', 'bn%s_branch%i%s', 'res%s_branch%i%s_relu' ] net = OrderedDict() # right branch net_tmp, last_layer_name = build_simple_block( incoming_layer, map(lambda s: s % (ix, 2, 'a'), simple_block_name_pattern), int(layers.get_output_shape(incoming_layer)[1] * ratio_n_filter), 1, int(1.0 / ratio_size), 0) net.update(net_tmp) net_tmp, last_layer_name = build_simple_block( net[last_layer_name], map(lambda s: s % (ix, 2, 'b'), simple_block_name_pattern), layers.get_output_shape(net[last_layer_name])[1], 3, 1, 1) net.update(net_tmp) net_tmp, last_layer_name = build_simple_block( net[last_layer_name], map(lambda s: s % (ix, 2, 'c'), simple_block_name_pattern), layers.get_output_shape(net[last_layer_name])[1] * upscale_factor, 1, 1, 0, nonlin=None) net.update(net_tmp) right_tail = net[last_layer_name] left_tail = incoming_layer # left branch if has_left_branch: net_tmp, last_layer_name = build_simple_block( incoming_layer, map(lambda s: s % (ix, 1, ''), simple_block_name_pattern), int( layers.get_output_shape(incoming_layer)[1] * 4 * ratio_n_filter), 1, int(1.0 / ratio_size), 0, nonlin=None) net.update(net_tmp) left_tail = net[last_layer_name] net['res%s' % ix] = ElemwiseSumLayer([left_tail, right_tail], coeffs=1) net['res%s_relu' % ix] = NonlinearityLayer(net['res%s' % ix], nonlinearity=rectify) return net, 'res%s_relu' % ix
def pretty_print_network(self): info_list = [] working_layer = self.final_layer while True: if isinstance(working_layer, (InputLayer, DenseLayer, Conv2DLayer)): if isinstance(working_layer, InputLayer): info = ('Layer name: {}\n' '\tinput shape: {}\n' '\toutput shape {}' ''.format(working_layer.name, working_layer.shape, get_output_shape(working_layer))) elif isinstance(working_layer, Conv2DLayer): name = working_layer.name input_shape = working_layer.input_shape output_shape = get_output_shape(working_layer) num_filters = working_layer.num_filters filter_size = working_layer.filter_size pad = working_layer.pad stride = working_layer.stride info = ('Layer name: {}\n' '\tinput shape: {}\n' '\toutput shape: {}\n' '\tnum filters: {}\n' '\tkernel shape: {}\n' '\tpadding: {}\n' '\tstride: {}' ''.format(name, input_shape, output_shape, num_filters, filter_size, pad, stride)) elif isinstance(working_layer, DenseLayer): name = working_layer.name input_shape = working_layer.input_shape output_shape = get_output_shape(working_layer) num_units = working_layer.num_units info = ('Layer name: {}\n' '\tinput shape: {}\n' '\toutput shape: {}\n' '\tnum units: {}' ''.format( name, input_shape, output_shape, num_units, )) info_list.append(info) if not hasattr(working_layer, 'input_layer'): break working_layer = working_layer.input_layer for item in reversed(info_list): print(item) print() depth = len(info_list) - 1 print('Total network depth {} layers'.format(depth)) print('Depth is total of convolutional and fully connected layers.')
def build_pi_model(): log.i('BUILDING RASBPERRY PI MODEL...') # Random Seed lasagne_random.set_rng(cfg.getRandomState()) # Input layer for images net = l.InputLayer((None, cfg.IM_DIM, cfg.IM_SIZE[1], cfg.IM_SIZE[0])) # Convolutinal layer groups for i in range(len(cfg.FILTERS)): # 3x3 Convolution + Stride net = batch_norm( l.Conv2DLayer(net, num_filters=cfg.FILTERS[i], filter_size=cfg.KERNEL_SIZES[i], num_groups=cfg.NUM_OF_GROUPS[i], pad='same', stride=2, W=initialization(cfg.NONLINEARITY), nonlinearity=nonlinearity(cfg.NONLINEARITY))) log.i(('\tGROUP', i + 1, 'OUT SHAPE:', l.get_output_shape(net))) # Fully connected layers + dropout layers net = l.DenseLayer(net, cfg.DENSE_UNITS, nonlinearity=nonlinearity(cfg.NONLINEARITY), W=initialization(cfg.NONLINEARITY)) net = l.DropoutLayer(net, p=cfg.DROPOUT) net = l.DenseLayer(net, cfg.DENSE_UNITS, nonlinearity=nonlinearity(cfg.NONLINEARITY), W=initialization(cfg.NONLINEARITY)) net = l.DropoutLayer(net, p=cfg.DROPOUT) # Classification Layer (Softmax) net = l.DenseLayer(net, len(cfg.CLASSES), nonlinearity=nonlinearity('softmax'), W=initialization('softmax')) log.i(("\tFINAL NET OUT SHAPE:", l.get_output_shape(net))) log.i("...DONE!") # Model stats log.i(("MODEL HAS", (sum(hasattr(layer, 'W') for layer in l.get_all_layers(net))), "WEIGHTED LAYERS")) log.i(("MODEL HAS", l.count_params(net), "PARAMS")) return net
def __init__(self, unary, ref, sxy_bf=70, sc_bf=10, compat_bf=6, sxy_spatial=2, compat_spatial=2, num_iter=5, normalize_final_iter=True, trainable_kernels=False, name=None): super(CRFasRNNLayer, self).__init__(incomings=[unary, ref], name=name) self.sxy_bf = sxy_bf self.sc_bf = sc_bf self.compat_bf = compat_bf self.sxy_spatial = sxy_spatial self.compat_spatial = compat_spatial self.num_iter = num_iter self.normalize_final_iter = normalize_final_iter if ll.get_output_shape(ref)[1] not in [1, 3]: raise ValueError( "Reference image must be either color or greyscale \ (1 or 3 channels).") self.val_dim = ll.get_output_shape(unary)[1] # +2 for bilateral grid self.ref_dim = ll.get_output_shape(ref)[1] + 2 if self.ref_dim == 5: kstd_bf = np.array([sxy_bf, sxy_bf, sc_bf, sc_bf, sc_bf], np.float32) else: kstd_bf = np.array([sxy_bf, sxy_bf, sc_bf], np.float32) self.kstd_bf = self.add_param(kstd_bf, (self.ref_dim, ), name="kern_std", trainable=trainable_kernels, regularizable=False) gk = gkern(sxy_spatial, self.val_dim) self.W_spatial = self.add_param(gk, gk.shape, name="spatial_kernel", trainable=trainable_kernels, regularizable=False) if None in (self.val_dim, self.ref_dim): raise ValueError("CRF RNN requires known channel dimensions for \ all inputs.")
def test_stochastic_layer_network(): learning_rate = 0.1 momentum = 0.9 num_epoch = 1000 input = T.fmatrix('input') output = T.fmatrix('output') print 'FF-Layer: (Batch_size, n_features)' print 'Building stochastic layer model' l_in = L.InputLayer(shape=(1, 10), input_var=input) l_2 = L.DenseLayer(l_in, num_units=10, nonlinearity=lasagne.nonlinearities.softmax, W=lasagne.init.Constant(0.)) print 'Input Layer shape: ', L.get_output_shape(l_in) print 'Dense Layer shape: ', L.get_output_shape(l_2) l_stochastic_layer = StochasticLayer(l_2, estimator='ST') print 'Stochastic Layer shape: ', L.get_output_shape(l_stochastic_layer) l_out = L.DenseLayer(l_stochastic_layer, num_units=10, b=lasagne.init.Constant(0.)) print 'Final Dense Layer shape: ', L.get_output_shape(l_out) network_output = L.get_output(l_out) print 'Building loss function...' loss = lasagne.objectives.squared_error(network_output, output) loss = loss.mean() params = L.get_all_params(l_out, trainable=True) updates = nesterov_momentum(loss, params, learning_rate, momentum) train = theano.function([input, output], loss, updates=updates, allow_input_downcast=True) output_fn = theano.function([input], network_output, allow_input_downcast=True) test_X = np.ones((1, 10)) test_Y = np.ones((1, 10)) losses = [] mean_losses = [] for epoch in range(num_epoch): print 'Epoch number: ', epoch losses.append(train(test_X, test_Y)) print 'epoch {} mean loss {}'.format(epoch, np.mean(losses)) print 'Current Output: ', output_fn(test_X) mean_losses.append(np.mean(losses)) plt.title("Mean loss") plt.xlabel("Training examples") plt.ylabel("Loss") plt.plot(mean_losses, label="train") plt.grid() plt.legend() plt.draw()
def _invert_DenseLayer(self, layer, feeder): # Warning they are swapped here feeder = self._put_rectifiers(feeder, layer) feeder = self._get_normalised_relevance_layer(layer, feeder) output_units = np.prod(L.get_output_shape(layer.input_layer)[1:]) output_layer = L.DenseLayer(feeder, num_units=output_units) W = output_layer.W tmp_shape = np.asarray((-1, ) + L.get_output_shape(output_layer)[1:]) x_layer = L.ReshapeLayer(layer.input_layer, tmp_shape.tolist()) output_layer = L.ElemwiseMergeLayer(incomings=[x_layer, output_layer], merge_function=T.mul) output_layer.W = W return output_layer
def _invert_Conv2DLayer(self, layer, feeder): # Warning they are swapped here feeder = self._put_rectifiers(feeder, layer) feeder = self._get_normalised_relevance_layer(layer, feeder) f_s = layer.filter_size if layer.pad == 'same': pad = 'same' elif layer.pad == 'valid' or layer.pad == (0, 0): pad = 'full' else: raise RuntimeError("Define your padding as full or same.") # By definition the # Flip filters must be on to be a proper deconvolution. num_filters = L.get_output_shape(layer.input_layer)[1] if layer.stride == (4, 4): # Todo: similar code gradient based explainers. Merge. feeder = L.Upscale2DLayer(feeder, layer.stride, mode='dilate') output_layer = L.Conv2DLayer(feeder, num_filters=num_filters, filter_size=f_s, stride=1, pad=pad, nonlinearity=None, b=None, flip_filters=True) conv_layer = output_layer tmp = L.SliceLayer(output_layer, slice(0, -3), axis=3) output_layer = L.SliceLayer(tmp, slice(0, -3), axis=2) output_layer.W = conv_layer.W else: output_layer = L.Conv2DLayer(feeder, num_filters=num_filters, filter_size=f_s, stride=1, pad=pad, nonlinearity=None, b=None, flip_filters=True) W = output_layer.W # Do the multiplication. x_layer = L.ReshapeLayer(layer.input_layer, (-1, ) + L.get_output_shape(output_layer)[1:]) output_layer = L.ElemwiseMergeLayer(incomings=[x_layer, output_layer], merge_function=T.mul) output_layer.W = W return output_layer
def build_convpool_lstm(input_vars): convnets = [] W_init = None for i in range(numTimeWin): if i == 0: convnet, W_init = build_cnn(input_vars[i]) else: convnet, _ = build_cnn(input_vars[i], W_init) convnets.append(FlattenLayer(convnet)) # at this point convnets shape is [numTimeWin][n_samples, features] # we want the shape to be [n_samples, features, numTimeWin] convpool = ConcatLayer(convnets) # convpool = ReshapeLayer(convpool, ([0], -1, numTimeWin)) convpool = ReshapeLayer(convpool, ([0], numTimeWin, get_output_shape(convnets[0])[1])) # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features) convpool = LSTMLayer(convpool, num_units=128, grad_clipping=GRAD_CLIP, nonlinearity=lasagne.nonlinearities.tanh) # After LSTM layer you either need to reshape or slice it (depending on whether you # want to keep all predictions or just the last prediction. # http://lasagne.readthedocs.org/en/latest/modules/layers/recurrent.html # https://github.com/Lasagne/Recipes/blob/master/examples/lstm_text_generation.py convpool = SliceLayer(convpool, -1, 1) # Selecting the last prediction # A fully-connected layer of 256 units with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=512, nonlinearity=lasagne.nonlinearities.rectify) # We only need the final prediction, we isolate that quantity and feed it # to the next layer. # And, finally, the 10-unit output layer with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=4, nonlinearity=lasagne.nonlinearities.softmax) return convpool
def get_conv_xy(layer, deterministic=True): w_np = layer.W.get_value() input_layer = layer.input_layer if layer.pad == 'same': input_layer = L.PadLayer(layer.input_layer, width=np.array(w_np.shape[2:])/2, batch_ndim=2) input_shape = L.get_output_shape(input_layer) max_x = input_shape[2] - w_np.shape[2] max_y = input_shape[3] - w_np.shape[3] srng = RandomStreams() patch_x = srng.random_integers(low=0, high=max_x) patch_y = srng.random_integers(low=0, high=max_y) #print("input_shape shape: ", input_shape) #print("pad: \"%s\""% (layer.pad,)) #print(" stride: " ,layer.stride) #print("max_x %d max_y %d"%(max_x,max_y)) x = L.get_output(input_layer, deterministic=deterministic) x = x[:, :, patch_x:patch_x + w_np.shape[2], patch_y:patch_y + w_np.shape[3]] x = T.flatten(x, 2) # N,D w = layer.W if layer.flip_filters: w = w[:, :, ::-1, ::-1] w = T.flatten(w, outdim=2).T # D,O y = T.dot(x, w) # N,O if layer.b is not None: y += T.shape_padaxis(layer.b, axis=0) return x, y
def build_convpool_conv1d(input_vars): """ Builds the complete network with 1D-conv layer to integrate time from sequences of EEG images. :param input_vars: list of EEG images (one image per time window) :return: a pointer to the output of last layer """ convnets = [] W_init = None # Build 7 parallel CNNs with shared weights for i in range(numTimeWin): if i == 0: convnet, W_init = build_cnn(input_vars[i]) else: convnet, _ = build_cnn(input_vars[i], W_init) convnets.append(FlattenLayer(convnet)) # at this point convnets shape is [numTimeWin][n_samples, features] # we want the shape to be [n_samples, features, numTimeWin] convpool = ConcatLayer(convnets) convpool = ReshapeLayer(convpool, ([0], numTimeWin, get_output_shape(convnets[0])[1])) convpool = DimshuffleLayer(convpool, (0, 2, 1)) # convpool = ReshapeLayer(convpool, (-1, numTimeWin)) # input to 1D convlayer should be in (batch_size, num_input_channels, input_length) convpool = Conv1DLayer(convpool, 64, 3) # A fully-connected layer of 512 units with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=512, nonlinearity=lasagne.nonlinearities.rectify) # And, finally, the output layer with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=nb_classes, nonlinearity=lasagne.nonlinearities.softmax) return convpool
def build_lasagne_model(architecture, dim, input_tensor, shape): last_layer = InputLayer(shape, input_var=input_tensor) print('Model shape:') for i, e in enumerate(architecture): dropout, layers = e print(' Layer {}:'.format(i)) ll = [] for l in layers: filter_size, num_filters, nonlinearity, pad = l cl = Conv1DLayer(last_layer, num_filters=num_filters, filter_size=(filter_size), nonlinearity=nonlinearity_mapping[nonlinearity], pad=pad) ll.append(cl) print(' - size: {}\tnum: {}'.format(filter_size, num_filters, get_output_shape(cl))) c = ConcatLayer(ll, axis=1) last_layer = DropoutLayer(c, p=dropout) print(' - dropout: {}'.format(dropout)) return Conv1DLayer(last_layer, num_filters=1, filter_size=3, nonlinearity=sigmoid, pad='same')
def __init__( self, network, num_classes, boxes=[(5.,.5), (.25,.5), (.5,.25), (.3,.3)], use_custom_cost=False ): assert('detection' in network and 'input' in network) super(Yolo2ObjectDetector, self).__init__() self.network = network self.num_classes = num_classes self.boxes = boxes self.use_custom_cost = use_custom_cost self.input = network['input'].input_var self.output_shape = layers.get_output_shape(network['detection'])[-2:] self.input_shape = network['input'].shape[-2:] # self._hyperparameters = {'ratios': ratios, 'smin': smin, 'smax': smax} # set objectness predictor nonlinearity to sigmoid and # class predictor to softmax self.output = self._format_output(layers.get_output(network['detection'], deterministic=False)) self.output_test = self._format_output(layers.get_output(network['detection'], deterministic=True)) return
def build_convpool_conv1d(input_vars, nb_classes, imsize=32, n_colors=3, n_timewin=3): """ Builds the complete network with 1D-conv layer to integrate time from sequences of EEG images. :param input_vars: list of EEG images (one image per time window) :param nb_classes: number of classes :param imsize: size of the input image (assumes a square input) :param n_colors: number of color channels in the image :param n_timewin: number of time windows in the snippet :return: a pointer to the output of last layer """ convnets = [] w_init = None # Build 7 parallel CNNs with shared weights for i in range(n_timewin): if i == 0: convnet, w_init = build_cnn(input_vars[i], imsize=imsize, n_colors=n_colors) else: convnet, _ = build_cnn(input_vars[i], w_init=w_init, imsize=imsize, n_colors=n_colors) convnets.append(FlattenLayer(convnet)) # at this point convnets shape is [numTimeWin][n_samples, features] # we want the shape to be [n_samples, features, numTimeWin] convpool = ConcatLayer(convnets) convpool = ReshapeLayer(convpool, ([0], n_timewin, get_output_shape(convnets[0])[1])) convpool = DimshuffleLayer(convpool, (0, 2, 1)) # input to 1D convlayer should be in (batch_size, num_input_channels, input_length) convpool = Conv1DLayer(convpool, 64, 3) # A fully-connected layer of 512 units with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=512, nonlinearity=lasagne.nonlinearities.rectify) # And, finally, the output layer with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=nb_classes, nonlinearity=lasagne.nonlinearities.softmax) return convpool
def createResizeLayer(input_layer_4d, resize_ratio, name=None): input_layer_shape = get_output_shape(input_layer_4d) batch_size = input_layer_shape[0] new_height = int(np.round(resize_ratio * input_layer_shape[2])) new_width = int(np.round(resize_ratio * input_layer_shape[3])) # ds = 1.0 / resize_ratio # rf = resize_ratio * ds rf = 1.0 # since we basically down sample with new_height, new_width rescaleA = np.tile( np.asarray([[rf, 0], [0, rf], [0, 0]], dtype=floatX).T.reshape([1, 2, 3]), [batch_size, 1, 1]).reshape([-1, 6]) param_layer = InputLayer( (batch_size, 6), input_var=theano.shared(rescaleA)) resize_layer = TransformerLayer(input_layer_4d, param_layer, new_height, new_width, name=name) return resize_layer
def conv2D_residual(net, last_layer, name, filter_size, nonlinearity=nonlinearities.rectify, dropout=None): if not isinstance(filter_size, (list, tuple)): filter_size = (filter_size, filter_size) # original residual unit shape = layers.get_output_shape(net[last_layer]) num_filters = shape[1] net[name+'_1resid'] = layers.Conv2DLayer(net[last_layer], num_filters=num_filters, filter_size=filter_size, stride=(1, 1), # 1000 W=init.GlorotUniform(), b=None, nonlinearity=None, pad='same') net[name+'_1resid_norm'] = layers.BatchNormLayer(net[name+'_1resid']) net[name+'_1resid_active'] = layers.NonlinearityLayer(net[name+'_1resid_norm'], nonlinearity=nonlinearity) if dropout: net[name+'_dropout'] = layers.DropoutLayer(net[name+'_1resid_active'], p=dropout) last_layer = name+'_dropout' else: last_layer = name+'_1resid_active' # bottleneck residual layer net[name+'_2resid'] = layers.Conv2DLayer(net[last_layer], num_filters=num_filters, filter_size=filter_size, stride=(1, 1), # 1000 W=init.GlorotUniform(), b=None, nonlinearity=None, pad='same') net[name+'_2resid_norm'] = layers.BatchNormLayer(net[name+'_2resid']) # combine input with residuals net[name+'_residual'] = layers.ElemwiseSumLayer([net[last_layer], net[name+'_2resid_norm']]) net[name+'_resid'] = layers.NonlinearityLayer(net[name+'_residual'], nonlinearity=nonlinearity) return net
def createConvtScaleSpaceLayer(input_layer_4d, resize_ratio_list, name=None): input_layer_shape = get_output_shape(input_layer_4d) batch_size = input_layer_shape[0] # patch_size = input_layer_shape[2] # orig_c = (float(input_layer_shape[2]) - 1.0) * 0.5 scale_space_layer_list = [] for resize_ratio in resize_ratio_list: rf = resize_ratio c = 0 # the implementation already works on 0-center coordinate system rescaleA = np.tile( np.asarray([[rf, 0], [0, rf], [c, c]], dtype=floatX).T.reshape([1, 2, 3]), [batch_size, 1, 1]).reshape([-1, 6]) param_layer = InputLayer((batch_size, 6), input_var=theano.shared(rescaleA)) resize_layer = TransformerLayer(input_layer_4d, param_layer, input_layer_shape[2], input_layer_shape[3]) scale_space_layer_list += [ # ReshapeLayer(resize_layer, # tuple([v for v in input_layer_shape] + [1])) DimshuffleLayer(resize_layer, (0, 1, 2, 3, 'x')) ] scale_space_layer = ConcatLayer(scale_space_layer_list, axis=4, name=name) return scale_space_layer
def createConvtScaleSpaceLayer(input_layer_4d, resize_ratio_list, name=None): input_layer_shape = get_output_shape(input_layer_4d) batch_size = input_layer_shape[0] # patch_size = input_layer_shape[2] # orig_c = (float(input_layer_shape[2]) - 1.0) * 0.5 scale_space_layer_list = [] for resize_ratio in resize_ratio_list: rf = resize_ratio c = 0 # the implementation already works on 0-center coordinate system rescaleA = np.tile( np.asarray([[rf, 0], [0, rf], [c, c]], dtype=floatX).T.reshape([1, 2, 3]), [batch_size, 1, 1]).reshape([-1, 6]) param_layer = InputLayer( (batch_size, 6), input_var=theano.shared(rescaleA)) resize_layer = TransformerLayer(input_layer_4d, param_layer, input_layer_shape[2], input_layer_shape[3]) scale_space_layer_list += [ # ReshapeLayer(resize_layer, # tuple([v for v in input_layer_shape] + [1])) DimshuffleLayer(resize_layer, (0, 1, 2, 3, 'x')) ] scale_space_layer = ConcatLayer(scale_space_layer_list, axis=4, name=name) return scale_space_layer
def build_convpool_lstm(input_vars, input_shape=None): """ Builds the complete network with LSTM layer to integrate time from sequences of EEG images. :param input_vars: list of EEG images (one image per time window) :return: a pointer to the output of last layer """ convnets = [] W_init = None # Build 7 parallel CNNs with shared weights for i in range(input_shape[0]): if i == 0: convnet, W_init = build_cnn(input_vars[i], input_shape) else: convnet, _ = build_cnn(input_vars[i], input_shape, W_init) convnets.append(FlattenLayer(convnet)) # at this point convnets shape is [numTimeWin][n_samples, features] # we want the shape to be [n_samples, features, numTimeWin] convpool = ConcatLayer(convnets) # convpool = ReshapeLayer(convpool, ([0], -1, numTimeWin)) convpool = ReshapeLayer( convpool, ([0], input_shape[0], get_output_shape(convnets[0])[1])) # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features) convpool = LSTMLayer(convpool, num_units=32, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.sigmoid) #convpool = lasagne.layers.dropout(convpool, p=.3) convpool = LSTMLayer(convpool, num_units=32, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.sigmoid) # After LSTM layer you either need to reshape or slice it (depending on whether you # want to keep all predictions or just the last prediction. # http://lasagne.readthedocs.org/en/latest/modules/layers/recurrent.html # https://github.com/Lasagne/Recipes/blob/master/examples/lstm_text_generation.py convpool = SliceLayer(convpool, -1, 1) # Selecting the last prediction # A fully-connected layer of 256 units with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=256, nonlinearity=lasagne.nonlinearities.rectify) # We only need the final prediction, we isolate that quantity and feed it # to the next layer. # And, finally, the output layer with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=num_classes, nonlinearity=lasagne.nonlinearities.softmax) return convpool
def _add_context_encoder(self): self._net['batched_enc'] = reshape( self._net['enc'], (self._batch_size, self._input_context_size, get_output_shape(self._net['enc'])[-1])) self._net['context_enc'] = GRULayer(incoming=self._net['batched_enc'], num_units=self._hidden_layer_dim, grad_clipping=self._grad_clip, only_return_final=True, name='context_encoder') self._net['switch_enc_to_tv'] = T.iscalar(name='switch_enc_to_tv') self._net['thought_vector'] = InputLayer( shape=(None, self._hidden_layer_dim), input_var=T.fmatrix(name='thought_vector'), name='thought_vector') self._net['enc_result'] = SwitchLayer( incomings=[self._net['thought_vector'], self._net['context_enc']], condition=self._net['switch_enc_to_tv']) # We need the following to pass as 'givens' argument when compiling theano functions: self._default_thoughts_vector = T.zeros( (self._batch_size, self._hidden_layer_dim)) self._default_input_x = T.zeros( shape=(self._net['thought_vector'].input_var.shape[0], 1, 1), dtype=np.int32)
def createResizeLayer(input_layer_4d, resize_ratio, name=None): input_layer_shape = get_output_shape(input_layer_4d) batch_size = input_layer_shape[0] new_height = int(np.round(resize_ratio * input_layer_shape[2])) new_width = int(np.round(resize_ratio * input_layer_shape[3])) # ds = 1.0 / resize_ratio # rf = resize_ratio * ds rf = 1.0 # since we basically down sample with new_height, new_width rescaleA = np.tile( np.asarray([[rf, 0], [0, rf], [0, 0]], dtype=floatX).T.reshape([1, 2, 3]), [batch_size, 1, 1]).reshape([-1, 6]) param_layer = InputLayer((batch_size, 6), input_var=theano.shared(rescaleA)) resize_layer = TransformerLayer(input_layer_4d, param_layer, new_height, new_width, name=name) return resize_layer
def buildSectorNet(self): sectorNet = InputLayer(self.inputShape, self.inputVar) for i, layer in enumerate(self.layerCategory): self.logger.debug('Build {}th conv layer'.format(i)) self.logger.debug('The output shape of {}th layer equal {}'.format( i - 1, get_output_shape(sectorNet))) kernelXDim = int(layer[-1]) kernelDim = (kernelXDim, ) * 3 conv3D = batch_norm( Conv3DLayer(incoming=sectorNet, num_filters=self.numOfFMs[i], filter_size=kernelDim, W=HeUniform(gain='relu'), nonlinearity=rectify, name='Conv3D')) self.logger.debug( 'The shape of {}th conv3D layer equals {}'.format( i, get_output_shape(conv3D))) sectorNet = ConcatLayer( [conv3D, sectorNet], 1, cropping=['center', 'None', 'center', 'center', 'center']) self.logger.debug( 'The shape of {}th concat layer equals {}'.format( i, get_output_shape(sectorNet))) assert get_output_shape(sectorNet) == (None, sum(self.numOfFMs) + 1, 1, 1, 1) sectorNet = batch_norm( Conv3DLayer(incoming=sectorNet, num_filters=2, filter_size=(1, 1, 1), W=HeUniform(gain='relu'))) self.logger.debug('The shape of last con3D layer equals {}'.format( get_output_shape(sectorNet))) sectorNet = ReshapeLayer(sectorNet, ([0], -1)) self.logger.debug('The shape of ReshapeLayer equals {}'.format( get_output_shape(sectorNet))) sectorNet = NonlinearityLayer(sectorNet, softmax) self.logger.debug( 'The shape of output layer, i.e. NonlinearityLayer, equals {}'. format(get_output_shape(sectorNet))) assert get_output_shape(sectorNet) == (None, self.numOfOutputClass) return sectorNet
def print_layer_shapes(self): print '\n', '-'*100 print 'Net shapes:\n' layers = get_all_layers(self.net['l_dist']) for l in layers: print '%-20s \t%s' % (l.name, get_output_shape(l)) print '\n', '-'*100
def print_layer_shapes(self): _logger.info('-' * 100) _logger.info('Net shapes:') layers = get_all_layers(self.net['l_dist']) for l in layers: _logger.info('%-20s \t%s' % (l.name, get_output_shape(l))) _logger.info('-' * 100)
def test_stochastic_layer_forward_pass(): print 'FF-Layer: (Batch_size, n_features)' print 'Building stochastic layer model' l_in = L.InputLayer(shape=(10, 10)) l_2 = L.DenseLayer(l_in, num_units=3, nonlinearity=lasagne.nonlinearities.softmax) print 'Input Layer shape: ', L.get_output_shape(l_in) print 'Dense Layer shape: ', L.get_output_shape(l_2) l_stochastic_layer = StochasticLayer(l_2) print 'Stochastic Layer shape: ', L.get_output_shape(l_stochastic_layer) l_out = L.DenseLayer(l_stochastic_layer, num_units=1) network_output = L.get_output(l_out) print 'Building function...' function = theano.function([l_in.input_var], network_output) test_example = np.ones((10, 10)) print 'Sample output: ', function(test_example)
def build_convpool_mix(input_vars, nb_classes, grad_clip=110, imsize=32, n_colors=3, n_timewin=7): """ Builds the complete network with LSTM and 1D-conv layers combined :param input_vars: list of EEG images (one image per time window) :param nb_classes: number of classes :param grad_clip: the gradient messages are clipped to the given value during the backward pass. :param imsize: size of the input image (assumes a square input) :param n_colors: number of color channels in the image :param n_timewin: number of time windows in the snippet :return: a pointer to the output of last layer """ convnets = [] w_init = None # Build 7 parallel CNNs with shared weights for i in range(n_timewin): if i == 0: convnet, w_init = build_cnn(input_vars[i], imsize=imsize, n_colors=n_colors) else: convnet, _ = build_cnn(input_vars[i], w_init=w_init, imsize=imsize, n_colors=n_colors) convnets.append(FlattenLayer(convnet)) # at this point convnets shape is [numTimeWin][n_samples, features] # we want the shape to be [n_samples, features, numTimeWin] convpool = ConcatLayer(convnets) convpool = ReshapeLayer(convpool, ([0], n_timewin, get_output_shape(convnets[0])[1])) reformConvpool = DimshuffleLayer(convpool, (0, 2, 1)) # input to 1D convlayer should be in (batch_size, num_input_channels, input_length) conv_out = Conv1DLayer(reformConvpool, 64, 3) conv_out = FlattenLayer(conv_out) # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features) lstm = LSTMLayer(convpool, num_units=128, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh) lstm_out = SliceLayer(lstm, -1, 1) # Merge 1D-Conv and LSTM outputs dense_input = ConcatLayer([conv_out, lstm_out]) # A fully-connected layer of 256 units with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(dense_input, p=.5), num_units=512, nonlinearity=lasagne.nonlinearities.rectify) # And, finally, the 10-unit output layer with 50% dropout on its inputs: convpool = DenseLayer(convpool, num_units=nb_classes, nonlinearity=lasagne.nonlinearities.softmax) return convpool
def buildModel(mtype=1): print "BUILDING MODEL TYPE", mtype, "..." #default settings (Model 1) filters = 64 first_stride = 2 last_filter_multiplier = 16 #specific model type settings (see working notes for details) if mtype == 2: first_stride = 1 elif mtype == 3: filters = 32 last_filter_multiplier = 8 #input layer net = l.InputLayer((None, IM_DIM, IM_SIZE[1], IM_SIZE[0])) #conv layers net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=7, pad='same', stride=first_stride, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) if mtype == 2: net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 2, filter_size=5, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 4, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * 8, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) net = l.batch_norm(l.Conv2DLayer(net, num_filters=filters * last_filter_multiplier, filter_size=3, pad='same', stride=1, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.MaxPool2DLayer(net, pool_size=2) print "\tFINAL POOL OUT SHAPE:", l.get_output_shape(net) #dense layers net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) net = l.batch_norm(l.DenseLayer(net, 512, W=init.HeNormal(gain=INIT_GAIN), nonlinearity=NONLINEARITY)) #Classification Layer if MULTI_LABEL: net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.sigmoid, W=init.HeNormal(gain=1)) else: net = l.DenseLayer(net, NUM_CLASSES, nonlinearity=nonlinearities.softmax, W=init.HeNormal(gain=1)) print "...DONE!" #model stats print "MODEL HAS", (sum(hasattr(layer, 'W') for layer in l.get_all_layers(net))), "WEIGHTED LAYERS" print "MODEL HAS", l.count_params(net), "PARAMS" return net
def _invert_DenseLayer(self, layer, feeder): # Warning they are swapped here feeder = self._put_rectifiers(feeder, layer) output_units = np.prod(L.get_output_shape(layer.input_layer)[1:]) output_layer = L.DenseLayer(feeder, num_units=output_units, nonlinearity=None, b=None) return output_layer
def _invert_GlobalPoolLayer(self, layer, feeder): assert isinstance(layer, L.GlobalPoolLayer) assert layer.pool_function == T.mean assert len(L.get_output_shape(layer.input_layer)) == 4 target_shape = L.get_output_shape(feeder) + (1, 1) if target_shape[0] is None: target_shape = (-1, ) + target_shape[1:] feeder = L.ReshapeLayer(feeder, target_shape) upscaling = L.get_output_shape(layer.input_layer)[2:] feeder = L.Upscale2DLayer(feeder, upscaling) def expression(x): return x / np.prod(upscaling).astype(theano.config.floatX) feeder = L.ExpressionLayer(feeder, expression) return feeder
def build_convpool_lstm(input_vars, nb_classes, grad_clip=110, imsize=32, n_colors=3, n_timewin=7): """ Builds the complete network with LSTM layer to integrate time from sequences of EEG images. :param input_vars: list of EEG images (one image per time window) :param nb_classes: number of classes :param grad_clip: the gradient messages are clipped to the given value during the backward pass. :param imsize: size of the input image (assumes a square input) :param n_colors: number of color channels in the image :param n_timewin: number of time windows in the snippet :return: a pointer to the output of last layer """ convnets = [] w_init = None # Build 7 parallel CNNs with shared weights for i in range(n_timewin): if i == 0: convnet, w_init = build_cnn(input_vars[i], imsize=imsize, n_colors=n_colors) else: convnet, _ = build_cnn(input_vars[i], w_init=w_init, imsize=imsize, n_colors=n_colors) convnets.append(FlattenLayer(convnet)) # at this point convnets shape is [numTimeWin][n_samples, features] # we want the shape to be [n_samples, features, numTimeWin] convpool = ConcatLayer(convnets) convpool = ReshapeLayer(convpool, ([0], n_timewin, get_output_shape(convnets[0])[1])) # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features) convpool = LSTMLayer(convpool, num_units=128, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh) # We only need the final prediction, we isolate that quantity and feed it # to the next layer. convpool = SliceLayer(convpool, -1, 1) # Selecting the last prediction # A fully-connected layer of 256 units with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=256, nonlinearity=lasagne.nonlinearities.rectify) # And, finally, the output layer with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=nb_classes, nonlinearity=lasagne.nonlinearities.softmax) return convpool
def _invert_PadLayer(self, layer, feeder): assert isinstance(layer, L.PadLayer) assert layer.batch_ndim == 2 assert len(L.get_output_shape(layer)) == 4. tmp = L.SliceLayer(feeder, slice(layer.width[0][0], -layer.width[0][1]), axis=2) return L.SliceLayer(tmp, slice(layer.width[1][0], -layer.width[1][1]), axis=3)
def build_convpool_mix(input_vars, numTimeWin, nb_classes, GRAD_CLIP=100): """ Builds the complete network with LSTM and 1D-conv layers combined to integrate time from sequences of EEG images. :param input_vars: list of EEG images (one image per time window) :param numTimeWin: number of time windows :param nb_classes: number of classes :param GRAD_CLIP: the gradient messages are clipped to the given value during the backward pass. :return: a pointer to the output of last layer """ convnets = [] W_init = None # Build 7 parallel CNNs with shared weights for i in range(numTimeWin): if i == 0: convnet, W_init = build_cnn(input_vars[i]) else: convnet, _ = build_cnn(input_vars[i], W_init) convnets.append(FlattenLayer(convnet)) # at this point convnets shape is [numTimeWin][n_samples, features] # we want the shape to be [n_samples, features, numTimeWin] convpool = ConcatLayer(convnets) # convpool = ReshapeLayer(convpool, ([0], -1, numTimeWin)) convpool = ReshapeLayer(convpool, ([0], numTimeWin, get_output_shape(convnets[0])[1])) reformConvpool = DimshuffleLayer(convpool, (0, 2, 1)) # input to 1D convlayer should be in (batch_size, num_input_channels, input_length) conv_out = Conv1DLayer(reformConvpool, 64, 3) conv_out = FlattenLayer(conv_out) # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features) lstm = LSTMLayer(convpool, num_units=128, grad_clipping=GRAD_CLIP, nonlinearity=lasagne.nonlinearities.tanh) # After LSTM layer you either need to reshape or slice it (depending on whether you # want to keep all predictions or just the last prediction. # http://lasagne.readthedocs.org/en/latest/modules/layers/recurrent.html # https://github.com/Lasagne/Recipes/blob/master/examples/lstm_text_generation.py # lstm_out = SliceLayer(convpool, -1, 1) # bypassing LSTM lstm_out = SliceLayer(lstm, -1, 1) # Merge 1D-Conv and LSTM outputs dense_input = ConcatLayer([conv_out, lstm_out]) # A fully-connected layer of 256 units with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(dense_input, p=.5), num_units=512, nonlinearity=lasagne.nonlinearities.rectify) # We only need the final prediction, we isolate that quantity and feed it # to the next layer. # And, finally, the 10-unit output layer with 50% dropout on its inputs: convpool = DenseLayer(convpool, num_units=nb_classes, nonlinearity=lasagne.nonlinearities.softmax) return convpool
def __init__(self, incoming, num_filters, filter_size, stride=1, pad=0, untie_biases=False, W=init.GlorotUniform(), b=init.Constant(0.), nonlinearity=None, flip_filters=True, in_shape=None, **kwargs): if in_shape is None: tensor_shape = get_output(incoming).shape in_shape = get_output_shape( incoming)[:2] + (tensor_shape[2],) + (tensor_shape[3],) self.in_shape = in_shape super(TransposeConv2DLayer, self).__init__( incoming, num_filters, filter_size, stride, pad, untie_biases, W, b, nonlinearity, flip_filters, **kwargs)
def createXYZTCropLayer(input_layer_4d, xyz_layer, theta_layer, max_scale, out_width, name=None): input_layer_shape = get_output_shape(input_layer_4d) batch_size = input_layer_shape[0] new_width = out_width new_height = out_width # ratio to reduce to patch size from original reduc_ratio = (np.cast[floatX](out_width) / np.cast[floatX](input_layer_shape[3])) # merge xyz and t layers together to form xyzt xyzt_layer = ConcatLayer([xyz_layer, theta_layer]) # create a param layer from xyz layer def xyzt_2_param(xyzt): # get individual xyz dx = xyzt[:, 0] # x and y are already between -1 and 1 dy = xyzt[:, 1] # x and y are already between -1 and 1 z = xyzt[:, 2] t = xyzt[:, 3] # compute the resize from the largest scale image dr = (np.cast[floatX](reduc_ratio) * np.cast[floatX] (2.0)**z / np.cast[floatX](max_scale)) # dimshuffle before concatenate params = [dr * T.cos(t), -dr * T.sin(t), dx, dr * T.sin(t), dr * T.cos(t), dy] params = [_p.flatten().dimshuffle(0, 'x') for _p in params] # concatenate to have (1 0 0 0 1 0) when identity transform return T.concatenate(params, axis=1) param_layer = ExpressionLayer(xyzt_layer, xyzt_2_param, output_shape=(batch_size, 6)) resize_layer = TransformerLayer(input_layer_4d, param_layer, new_height, new_width, name=name) return resize_layer
def build_convpool_lstm(input_vars, nb_classes, GRAD_CLIP=100, imSize=32, n_colors=3, n_timewin=3): """ Builds the complete network with LSTM layer to integrate time from sequences of EEG images. :param input_vars: list of EEG images (one image per time window) :param nb_classes: number of classes :param GRAD_CLIP: the gradient messages are clipped to the given value during the backward pass. :return: a pointer to the output of last layer """ convnets = [] W_init = None # Build 7 parallel CNNs with shared weights for i in range(n_timewin): if i == 0: convnet, W_init = build_cnn(input_vars[i], imSize=imSize, n_colors=n_colors) else: convnet, _ = build_cnn(input_vars[i], W_init=W_init, imSize=imSize, n_colors=n_colors) convnets.append(FlattenLayer(convnet)) # at this point convnets shape is [numTimeWin][n_samples, features] # we want the shape to be [n_samples, features, numTimeWin] convpool = ConcatLayer(convnets) # convpool = ReshapeLayer(convpool, ([0], -1, numTimeWin)) convpool = ReshapeLayer(convpool, ([0], n_timewin, get_output_shape(convnets[0])[1])) # Input to LSTM should have the shape as (batch size, SEQ_LENGTH, num_features) convpool = LSTMLayer(convpool, num_units=128, grad_clipping=GRAD_CLIP, nonlinearity=lasagne.nonlinearities.tanh) # After LSTM layer you either need to reshape or slice it (depending on whether you # want to keep all predictions or just the last prediction. # http://lasagne.readthedocs.org/en/latest/modules/layers/recurrent.html # https://github.com/Lasagne/Recipes/blob/master/examples/lstm_text_generation.py convpool = SliceLayer(convpool, -1, 1) # Selecting the last prediction # A fully-connected layer of 256 units with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=256, nonlinearity=lasagne.nonlinearities.rectify) # We only need the final prediction, we isolate that quantity and feed it # to the next layer. # And, finally, the output layer with 50% dropout on its inputs: convpool = DenseLayer(lasagne.layers.dropout(convpool, p=.5), num_units=nb_classes, nonlinearity=lasagne.nonlinearities.softmax) return convpool
def residual_block(net, last_layer, name, filter_size, nonlinearity=nonlinearities.rectify): # original residual unit shape = layers.get_output_shape(net[last_layer]) num_filters = shape[1] net[name+'_1resid'] = layers.Conv2DLayer(net[last_layer], num_filters=num_filters, filter_size=filter_size, stride=(1, 1), # 1000 W=init.HeUniform(), b=None, nonlinearity=None, pad='same') net[name+'_1resid_norm'] = layers.BatchNormLayer(net[name+'_1resid']) net[name+'_1resid_active'] = layers.NonlinearityLayer(net[name+'_1resid_norm'], nonlinearity=nonlinearity) net[name+'_1resid_dropout'] = layers.DropoutLayer(net[name+'_1resid_active'], p=0.1) # bottleneck residual layer net[name+'_2resid'] = layers.Conv2DLayer(net[name+'_1resid_dropout'], num_filters=num_filters, filter_size=filter_size, stride=(1, 1), # 1000 W=init.HeUniform(), b=None, nonlinearity=None, pad='same') net[name+'_2resid_norm'] = layers.BatchNormLayer(net[name+'_2resid']) # combine input with residuals net[name+'_residual'] = layers.ElemwiseSumLayer([net[last_layer], net[name+'_2resid_norm']]) net[name+'_resid'] = layers.NonlinearityLayer(net[name+'_residual'], nonlinearity=nonlinearity) return net
def get_activations(self, layer, X, batch_size=500): """get the feature maps of a given convolutional layer""" # setup theano function to get feature map of a given layer num_data = len(X) feature_maps = theano.function([self.placeholders['inputs']], layers.get_output(self.network[layer], deterministic=True), allow_input_downcast=True) map_shape = layers.get_output_shape(self.network[layer]) # get feature maps in batches for speed (large batches may be too much memory for GPU) num_batches = num_data // batch_size shape = list(map_shape) shape[0] = num_data fmaps = np.empty(tuple(shape)) for i in range(num_batches): index = range(i*batch_size, (i+1)*batch_size) fmaps[index] = feature_maps(X[index]) # get the rest of the feature maps index = range(num_batches*batch_size, num_data) if index: fmaps[index] = feature_maps(X[index]) return fmaps
#print bfeatures.eval().shape return bfeatures def train_SVM(X, y): clf = svm.SVC() clf.fit(X, y) if __name__ == '__main__': import sys #fname = sys.argv[1] fname1 = "/Users/ingeborg/model1/model_0.pkl" fname2 = "/Users/ingeborg/model2/model_0.pkl" fsamples = "/Users/ingeborg/Desktop/output.npz" dataset = np.load(fsamples) X, y, labels = dataset['X'], dataset['y'], dataset['labels'] model = load_model(fname1) for layer in get_all_layers(model): print layer.name, get_output_shape(layer) b_features1 = get_bottleneck_features(model, X) #TODO: fix bug: ValueError: setting an array element with a sequence. #train_SVM(b_features1, y) model = load_model(fname2) for layer in get_all_layers(model): print layer.name, get_output_shape(layer) b_features2 = get_bottleneck_features(model, X) #train_SVM(b_features1, y)
def build_model(vmap, # input vocab mapping num_classes, # number classes to predict K=300, # dimensionality of embeddings num_hidden=256, # number of hidden_units batchsize=None, # size of each batch (None for variable size) input_var=None, # theano variable for input mask_var=None, # theano variable for input mask bidirectional=True, # whether to use bi-directional LSTM mean_pooling=True, grad_clip=100., # gradients above this will be clipped max_seq_len=MAX_SEQ, # maximum lenght of a sequence ini_word2vec=False, # whether to initialize with word2vec word2vec_file='/iesl/canvas/tbansal/glove.twitter.27B.200d.txt', # location of trained word vectors ): V = len(vmap) # basic input layer (batchsize, SEQ_LENGTH), # None lets us use variable bs # use a mask to outline the actual input sequence if ini_word2vec: print('loading embeddings from file %s' % word2vec_file) word2vec_model = word2vec.Word2Vec.load_word2vec_format(word2vec_file, binary=False) print 'done.' K = word2vec_model[word2vec_model.vocab.keys()[0]].size # override dim print('embedding dim: %d' % K) W = np.zeros((V, K), dtype=np.float32) no_vectors = 0 for w in vmap: if w in word2vec_model.vocab: W[vmap[w]] = np.asarray(word2vec_model[w], dtype=np.float32) else: W[vmap[w]] = np.random.normal(scale=0.01, size=K) no_vectors += 1 W = theano.shared(W) print " Initialized with word2vec. Couldn't find", no_vectors, "words!" else: W = lasagne.init.Normal() # Input Layer l_in = lasagne.layers.InputLayer((batchsize, max_seq_len), input_var=input_var) l_mask = lasagne.layers.InputLayer((batchsize, max_seq_len), input_var=mask_var) HYPOTHETICALLY = {l_in: (200, 140), l_mask: (200, 140)} print('Input Layer Shape:') LIN = get_output_shape(l_in, HYPOTHETICALLY) print 'input:', HYPOTHETICALLY print 'output:', LIN print # Embedding layer l_emb = lasagne.layers.EmbeddingLayer(l_in, input_size=V, output_size=K, W=W) print('Embedding Layer Shape:') print 'input:', LIN print 'output:', get_output_shape(l_emb, HYPOTHETICALLY) print # add droput # l_emb = lasagne.layers.DropoutLayer(l_emb, p=0.2) # Use orthogonal Initialization for LSTM gates gate_params = lasagne.layers.recurrent.Gate( W_in=lasagne.init.Orthogonal(), W_hid=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.) ) cell_params = lasagne.layers.recurrent.Gate( W_in=lasagne.init.Orthogonal(), W_hid=lasagne.init.Orthogonal(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.tanh ) l_fwd = lasagne.layers.LSTMLayer( l_emb, num_units=num_hidden, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh, mask_input=l_mask, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True ) print('Forward LSTM Shape:') print 'input:', get_output_shape(l_emb, HYPOTHETICALLY) print 'output:', get_output_shape(l_fwd, HYPOTHETICALLY) print # add droput # l_fwd = lasagne.layers.DropoutLayer(l_fwd, p=0.5) if bidirectional: # add a backwards LSTM layer for bi-directional l_bwd = lasagne.layers.LSTMLayer( l_emb, num_units=num_hidden, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh, mask_input=l_mask, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True, backwards=True ) print('Backward LSTM Shape:') print 'input:', get_output_shape(l_emb, HYPOTHETICALLY) print 'output:', get_output_shape(l_bwd, HYPOTHETICALLY) print # print "backward layer:", lasagne.layers.get_output_shape( # l_bwd, {l_in: (200, 140), l_mask: (200, 140)}) # concatenate forward and backward LSTM l_concat = lasagne.layers.ConcatLayer([l_fwd, l_bwd]) print('Concat Layer Shape:') print 'input:', get_output_shape(l_fwd, HYPOTHETICALLY), get_output_shape(l_bwd, HYPOTHETICALLY) print 'output:', get_output_shape(l_concat, HYPOTHETICALLY) print else: l_concat = l_fwd print('Concat Layer Shape:') print 'input:', get_output_shape(l_fwd, HYPOTHETICALLY) print 'output:', get_output_shape(l_concat, HYPOTHETICALLY) print # add droput l_concat = lasagne.layers.DropoutLayer(l_concat, p=0.5) l_lstm2 = lasagne.layers.LSTMLayer( l_concat, num_units=num_hidden, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh, mask_input=l_mask, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True, only_return_final=True ) print('LSTM Layer #2 Shape:') print 'input:', get_output_shape(l_concat, HYPOTHETICALLY) print 'output:', get_output_shape(l_lstm2, HYPOTHETICALLY) print # add dropout l_lstm2 = lasagne.layers.DropoutLayer(l_lstm2, p=0.6) # Mean Pooling Layer pool_size = 16 l_pool = lasagne.layers.FeaturePoolLayer(l_lstm2, pool_size) print('Mean Pool Layer Shape:') print 'input:', get_output_shape(l_lstm2, HYPOTHETICALLY) print 'output:', get_output_shape(l_pool, HYPOTHETICALLY) print # Dense Layer network = lasagne.layers.DenseLayer( l_pool, num_units=num_classes, nonlinearity=lasagne.nonlinearities.softmax ) print('Dense Layer Shape:') print 'input:', get_output_shape(l_pool, HYPOTHETICALLY) print 'output:', get_output_shape(network, HYPOTHETICALLY) return network
def build_network_MyModel( args, input1_var, input1_mask_var, input2_var, intut2_mask_var, wordEmbeddings, maxlen=36, reg=0.5 * 1e-4 ): # need use theano.scan print ("Building model LSTM + Featue Model + 2D Convolution +MLP") vocab_size = wordEmbeddings.shape[1] wordDim = wordEmbeddings.shape[0] GRAD_CLIP = wordDim input_1 = InputLayer((None, maxlen), input_var=input1_var) batchsize, seqlen = input_1.input_var.shape input_1_mask = InputLayer((None, maxlen), input_var=input1_mask_var) emb_1 = EmbeddingLayer(input_1, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) emb_1.params[emb_1.W].remove("trainable") lstm_1 = LSTMLayer( emb_1, num_units=args.lstmDim, mask_input=input_1_mask, grad_clipping=GRAD_CLIP, nonlinearity=tanh ) input_2 = InputLayer((None, maxlen), input_var=input2_var) input_2_mask = InputLayer((None, maxlen), input_var=input2_mask_var) emb_2 = EmbeddingLayer(input_2, input_size=vocab_size, output_size=wordDim, W=wordEmbeddings.T) emb_2.params[emb_2.W].remove("trainable") lstm_2 = LSTMLayer( emb_2, num_units=args.lstmDim, mask_input=input_2_mask, grad_clipping=GRAD_CLIP, nonlinearity=tanh ) # print "LSTM shape", get_output_shape(lstm_2) # LSTM shape (None, 36, 150) cos_feats = CosineSimLayer([lstm_1, lstm_2]) print "SSSS", get_output_shape(cos_feats) # lstm_1 = SliceLayer(lstm_1, indices=slice(-6, None), axis=1) # lstm_2 = SliceLayer(lstm_2, indices=slice(-6, None), axis=1) # concat = ConcatLayer([lstm_1, lstm_2],axis=2) #(None, 36, 300) """ num_filters = 32 stride = 1 """ filter_size = (10, 10) pool_size = (4, 4) """ filter_size=(3, 10) pool_size=(2,2) reshape = ReshapeLayer(concat, (batchsize, 1, 6, 2*args.lstmDim)) conv2d = Conv2DLayer(reshape, num_filters=num_filters, filter_size=filter_size, nonlinearity=rectify,W=GlorotUniform()) maxpool = MaxPool2DLayer(conv2d, pool_size=pool_size) #(None, 32, 6, 72) """ # Another convolution with 32 5x5 kernels, and another 2x2 pooling: # conv2d = Conv2DLayer(maxpool, num_filters=32, filter_size=(5, 5), nonlinearity=rectify) # maxpool = MaxPool2DLayer(conv2d, pool_size=(2, 2)) # A fully-connected layer of 256 units with 50% dropout on its inputs: # hid = DenseLayer(DropoutLayer(maxpool, p=.2),num_units=128,nonlinearity=rectify) hid = DenseLayer(cos_feats, num_units=10, nonlinearity=sigmoid) if args.task == "sts": network = DenseLayer(hid, num_units=5, nonlinearity=logsoftmax) elif args.task == "ent": network = DenseLayer(hid, num_units=3, nonlinearity=logsoftmax) layers = {lstm_1: reg, hid: reg, network: reg} penalty = regularize_layer_params_weighted(layers, l2) input_dict = { input_1: input1_var, input_2: input2_var, input_1_mask: input1_mask_var, input_2_mask: input2_mask_var, } return network, penalty, input_dict
def createXYZMapLayer(scoremap_cut_layer, fScaleList, req_boundary, name=None, fCoMStrength=10.0, eps=1e-8): """Creates the mapping layer for transforming the cut scores to xyz. Parameters ---------- scoremap_cut_layer: lasange layer The input layer to the mapping. Typically myNet.layers[idxSiam]['kp-scoremap-cut'] fScaleList: ndarray, flaot Array of scales that the layer operates with. Given by the configuration. req_boundary: int The number of pixels at each boundary that was cut from the original input patch size to get the valid scormap. name: str Name of the mapping layer. fCoMStrength: float Strength of the soft CoM style argmax. Negative value means hard argmax. eps: float The epsilon that is added to the denominator of CoM to prevent numerical problemsu """ fCoMStrength = np.cast[floatX](fCoMStrength) eps = np.cast[floatX](eps) scoremap_shape = get_output_shape(scoremap_cut_layer) num_scale = len(fScaleList) # scale_space_min changes according to scoremap_shape num_scale_after = scoremap_shape[4] new_min_idx = (num_scale - num_scale_after) // 2 scale_space_min = fScaleList[new_min_idx] if num_scale >= 2: scale_space_step = (fScaleList[num_scale - 1] / fScaleList[num_scale // 2])**( 1.0 / float(num_scale // 2)) else: scale_space_step = np.cast[floatX](1) # mapping from score map to x,y,z def map2xyz(out, fCoMStrength, scoremap_shape, eps, scale_space_min, scale_space_step, req_boundary): # In case of soft argmax if fCoMStrength > 0: # x = softargmax(T.sum(out, axis=[1, 2, 4]), axis=1, # softargmax_strength=fCoMStrength) # y = softargmax(T.sum(out, axis=[1, 3, 4]), axis=1, # softargmax_strength=fCoMStrength) # z = softargmax(T.sum(out, axis=[1, 2, 3]), axis=1, # softargmax_strength=fCoMStrength) od = len(scoremap_shape) # CoM to get the coordinates pos_array_x = T.arange(scoremap_shape[3], dtype=floatX) pos_array_y = T.arange(scoremap_shape[2], dtype=floatX) pos_array_z = T.arange(scoremap_shape[4], dtype=floatX) # max_out = T.max(T.maximum(out, 0), # axis=list(range(1, od)), keepdims=True) max_out = T.max(out, axis=list(range(1, od)), keepdims=True) o = T.exp(fCoMStrength * (out - max_out)) # o = T.exp(fCoMStrength * T.maximum(out, 0) + np.cast[floatX]( # 1.0)) - np.cast[floatX](1.0) x = T.sum( o * pos_array_x.dimshuffle(['x', 'x', 'x', 0, 'x']), axis=list(range(1, od)) ) / (T.sum(o, axis=list(range(1, od)))) y = T.sum( o * pos_array_y.dimshuffle(['x', 'x', 0, 'x', 'x']), axis=list(range(1, od)) ) / (T.sum(o, axis=list(range(1, od)))) z = T.sum( o * pos_array_z.dimshuffle(['x', 'x', 'x', 'x', 0]), axis=list(range(1, od)) ) / (T.sum(o, axis=list(range(1, od)))) # -------------- # Turn x, and y into range -1 to 1, where the patch size is # mapped to -1 and 1 orig_patch_width = ( scoremap_shape[3] + np.cast[floatX](req_boundary * 2.0)) orig_patch_height = ( scoremap_shape[2] + np.cast[floatX](req_boundary * 2.0)) x = ((x + np.cast[floatX](req_boundary)) / np.cast[floatX]( (orig_patch_width - 1.0) * 0.5) - np.cast[floatX](1.0)).dimshuffle([0, 'x']) y = ((y + np.cast[floatX](req_boundary)) / np.cast[floatX]( (orig_patch_height - 1.0) * 0.5) - np.cast[floatX](1.0)).dimshuffle([0, 'x']) # -------------- # Turn z into log2 scale, where z==0 is the center # scale. e.g. z == -1 would mean that it is actuall scale # of 0.5 x center scale # z = np.cast[floatX](scale_space_min) * ( # np.cast[floatX](scale_space_step)**z) z = np.cast[floatX](np.log2(scale_space_min)) + \ np.cast[floatX](np.log2(scale_space_step)) * z z = z.dimshuffle([0, 'x']) # In case of hard argmax else: raise RuntimeError('The hard argmax does not have derivatives!') # x = T.cast( # T.argmax(T.sum(out, axis=[1, 2, 4]), axis=1), dtype=floatX) # y = T.cast( # T.argmax(T.sum(out, axis=[1, 3, 4]), axis=1), dtype=floatX) # z = T.cast( # T.argmax(T.sum(out, axis=[1, 2, 3]), axis=1), dtype=floatX) x = softargmax(T.sum(out, axis=[1, 2, 4]), axis=1, softargmax_strength=-fCoMStrength) y = softargmax(T.sum(out, axis=[1, 3, 4]), axis=1, softargmax_strength=-fCoMStrength) z = softargmax(T.sum(out, axis=[1, 2, 3]), axis=1, softargmax_strength=-fCoMStrength) # -------------- # Turn x, and y into range -1 to 1, where the patch size is # mapped to -1 and 1 orig_patch_width = ( scoremap_shape[3] + np.cast[floatX](req_boundary * 2.0)) orig_patch_height = ( scoremap_shape[2] + np.cast[floatX](req_boundary * 2.0)) x = ((x + np.cast[floatX](req_boundary)) / np.cast[floatX]( (orig_patch_width - 1.0) * 0.5) - np.cast[floatX](1.0)).dimshuffle([0, 'x']) y = ((y + np.cast[floatX](req_boundary)) / np.cast[floatX]( (orig_patch_height - 1.0) * 0.5) - np.cast[floatX](1.0)).dimshuffle([0, 'x']) # -------------- # Turn z into log2 scale, where z==0 is the center # scale. e.g. z == -1 would mean that it is actuall scale # of 0.5 x center scale # z = np.cast[floatX](scale_space_min) * ( # np.cast[floatX](scale_space_step)**z) z = np.cast[floatX](np.log2(scale_space_min)) + \ np.cast[floatX](np.log2(scale_space_step)) * z z = z.dimshuffle([0, 'x']) return T.concatenate([x, y, z], axis=1) def mapfunc(out): return map2xyz(out, fCoMStrength, scoremap_shape, eps, scale_space_min, scale_space_step, req_boundary) return ExpressionLayer( scoremap_cut_layer, mapfunc, output_shape='auto', name=name)
def occlusion_heatmap(net, x, target, square_length=7): """An occlusion test that checks an image for its critical parts. In this function, a square part of the image is occluded (i.e. set to 0) and then the net is tested for its propensity to predict the correct label. One should expect that this propensity shrinks of critical parts of the image are occluded. If not, this indicates overfitting. Depending on the depth of the net and the size of the image, this function may take awhile to finish, since one prediction for each pixel of the image is made. Currently, all color channels are occluded at the same time. Also, this does not really work if images are randomly distorted by the batch iterator. See paper: Zeiler, Fergus 2013 Parameters ---------- net : NeuralNet instance The neural net to test. x : np.array The input data, should be of shape (1, c, x, y). Only makes sense with image data. target : int The true value of the image. If the net makes several predictions, say 10 classes, this indicates which one to look at. square_length : int (default=7) The length of the side of the square that occludes the image. Must be an odd number. Results ------- heat_array : np.array (with same size as image) An 2D np.array that at each point (i, j) contains the predicted probability of the correct class if the image is occluded by a square with center (i, j). """ if (x.ndim != 4) or x.shape[0] != 1: raise ValueError("This function requires the input data to be of " "shape (1, c, x, y), instead got {}".format(x.shape)) if square_length % 2 == 0: raise ValueError("Square length has to be an odd number, instead " "got {}.".format(square_length)) num_classes = get_output_shape(net.layers_[-1])[1] img = x[0].copy() bs, col, s0, s1 = x.shape heat_array = np.zeros((s0, s1)) pad = square_length // 2 + 1 x_occluded = np.zeros((s1, col, s0, s1), dtype=img.dtype) probs = np.zeros((s0, s1, num_classes)) # generate occluded images for i in range(s0): # batch s1 occluded images for faster prediction for j in range(s1): x_pad = np.pad(img, ((0, 0), (pad, pad), (pad, pad)), 'constant') x_pad[:, i:i + square_length, j:j + square_length] = 0. x_occluded[j] = x_pad[:, pad:-pad, pad:-pad] y_proba = net.predict_proba(x_occluded) probs[i] = y_proba.reshape(s1, num_classes) # from predicted probabilities, pick only those of target class for i in range(s0): for j in range(s1): heat_array[i, j] = probs[i, j, target] return heat_array
def __init__(self, l_in, n_layers, pheight, pwidth, dim_proj, stack_sublayers, # outsampling out_upsampling_type, out_nfilters, out_filters_size, out_filters_stride, out_W_init=[lasagne.init.GlorotUniform()], out_b_init=lasagne.init.Constant(0.), out_nonlinearity=[lasagne.nonlinearities.identity], out_pad=[1], # common recurrent layer params RecurrentNet=lasagne.layers.GRULayer, nonlinearity=lasagne.nonlinearities.rectify, hid_init=lasagne.init.Constant(0.), grad_clipping=0, precompute_input=True, mask_input=None, # 1x1 Conv layer for dimensional reduction conv_dim_red=False, conv_dim_red_nonlinearity=lasagne.nonlinearities.identity, # GRU specific params gru_resetgate=lasagne.layers.Gate(W_cell=None), gru_updategate=lasagne.layers.Gate(W_cell=None), gru_hidden_update=lasagne.layers.Gate( W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), gru_hid_init=lasagne.init.Constant(0.), # LSTM specific params lstm_ingate=lasagne.layers.Gate(), lstm_forgetgate=lasagne.layers.Gate(), lstm_cell=lasagne.layers.Gate( W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), lstm_outgate=lasagne.layers.Gate(), # RNN specific params rnn_W_in_to_hid=lasagne.init.Uniform(), rnn_W_hid_to_hid=lasagne.init.Uniform(), rnn_b=lasagne.init.Constant(0.), # Special layers batch_norm=False, name=''): """A ReSeg layer The ReSeg layer is composed by multiple ReNet layers and an upsampling layer Parameters ---------- l_in : lasagne.layers.Layer The input layer, in bc01 format n_layers : int The number of layers pheight : tuple The height of the patches, for each layer pwidth : tuple The width of the patches, for each layer dim_proj : tuple The number of hidden units of each RNN, for each layer nclasses : int The number of classes of the data stack_sublayers : bool If True the bidirectional RNNs in the ReNet layers will be stacked one over the other. See ReNet for more details. out_upsampling_type : string The kind of upsampling to be used out_nfilters : int The number of hidden units of the upsampling layer out_filters_size : tuple The size of the upsampling filters, if any out_filters_stride : tuple The stride of the upsampling filters, if any out_W_init : Theano shared variable, numpy array or callable Initializer for W out_b_init : Theano shared variable, numpy array or callable Initializer for b out_nonlinearity : Theano shared variable, numpy array or callable The nonlinearity to be applied after the upsampling hypotetical_fm_size : float The hypotetical size of the feature map that would be input of the layer if the input image of the whole network was of size (100, 100) RecurrentNet : lasagne.layers.Layer A recurrent layer class nonlinearity : callable or None The nonlinearity that is applied to the output. If None is provided, no nonlinearity will be applied. hid_init : callable, np.ndarray, theano.shared or lasagne.layers.Layer Initializer for initial hidden state grad_clipping : float If nonzero, the gradient messages are clipped to the given value during the backward pass. precompute_input : bool If True, precompute input_to_hid before iterating through the sequence. This can result in a speedup at the expense of an increase in memory usage. mask_input : lasagne.layers.Layer Layer which allows for a sequence mask to be input, for when sequences are of variable length. Default None, which means no mask will be supplied (i.e. all sequences are of the same length). gru_resetgate : lasagne.layers.Gate Parameters for the reset gate, if RecurrentNet is GRU gru_updategate : lasagne.layers.Gate Parameters for the update gate, if RecurrentNet is GRU gru_hidden_update : lasagne.layers.Gate Parameters for the hidden update, if RecurrentNet is GRU gru_hid_init : callable, np.ndarray, theano.shared or lasagne.layers.Layer Initializer for initial hidden state, if RecurrentNet is GRU lstm_ingate : lasagne.layers.Gate Parameters for the input gate, if RecurrentNet is LSTM lstm_forgetgate : lasagne.layers.Gate Parameters for the forget gate, if RecurrentNet is LSTM lstm_cell : lasagne.layers.Gate Parameters for the cell computation, if RecurrentNet is LSTM lstm_outgate : lasagne.layers.Gate Parameters for the output gate, if RecurrentNet is LSTM rnn_W_in_to_hid : Theano shared variable, numpy array or callable Initializer for input-to-hidden weight matrix, if RecurrentNet is RecurrentLayer rnn_W_hid_to_hid : Theano shared variable, numpy array or callable Initializer for hidden-to-hidden weight matrix, if RecurrentNet is RecurrentLayer rnn_b : Theano shared variable, numpy array, callable or None Initializer for bias vector, if RecurrentNet is RecurrentLaye. If None is provided there will be no bias batch_norm: this add a batch normalization layer at the end of the network right after each Gradient Upsampling layers name : string The name of the layer, optional """ super(ReSegLayer, self).__init__(l_in, name) self.l_in = l_in self.n_layers = n_layers self.pheight = pheight self.pwidth = pwidth self.dim_proj = dim_proj self.stack_sublayers = stack_sublayers # upsampling self.out_upsampling_type = out_upsampling_type self.out_nfilters = out_nfilters self.out_filters_size = out_filters_size self.out_filters_stride = out_filters_stride self.out_W_init = out_W_init self.out_b_init = out_b_init self.out_nonlinearity = out_nonlinearity # common recurrent layer params self.RecurrentNet = RecurrentNet self.nonlinearity = nonlinearity self.hid_init = hid_init self.grad_clipping = grad_clipping self.precompute_input = precompute_input self.mask_input = mask_input # GRU specific params self.gru_resetgate = gru_resetgate self.gru_updategate = gru_updategate self.gru_hidden_update = gru_hidden_update self.gru_hid_init = gru_hid_init # LSTM specific params self.lstm_ingate = lstm_ingate self.lstm_forgetgate = lstm_forgetgate self.lstm_cell = lstm_cell self.lstm_outgate = lstm_outgate # RNN specific params self.rnn_W_in_to_hid = rnn_W_in_to_hid self.rnn_W_hid_to_hid = rnn_W_hid_to_hid self.name = name self.sublayers = [] # ReNet layers l_renet = l_in for lidx in xrange(n_layers): l_renet = ReNetLayer(l_renet, patch_size=(pwidth[lidx], pheight[lidx]), n_hidden=dim_proj[lidx], stack_sublayers=stack_sublayers[lidx], RecurrentNet=RecurrentNet, nonlinearity=nonlinearity, hid_init=hid_init, grad_clipping=grad_clipping, precompute_input=precompute_input, mask_input=mask_input, # GRU specific params gru_resetgate=gru_resetgate, gru_updategate=gru_updategate, gru_hidden_update=gru_hidden_update, gru_hid_init=gru_hid_init, # LSTM specific params lstm_ingate=lstm_ingate, lstm_forgetgate=lstm_forgetgate, lstm_cell=lstm_cell, lstm_outgate=lstm_outgate, # RNN specific params rnn_W_in_to_hid=rnn_W_in_to_hid, rnn_W_hid_to_hid=rnn_W_hid_to_hid, rnn_b=rnn_b, batch_norm=batch_norm, name=self.name + '_renet' + str(lidx)) self.sublayers.append(l_renet) # Print shape out_shape = get_output_shape(l_renet) if stack_sublayers: msg = 'ReNet: After 2 rnns {}x{}@{} and 2 rnns 1x1@{}: {}' print(msg.format(pheight[lidx], pwidth[lidx], dim_proj[lidx], dim_proj[lidx], out_shape)) else: print('ReNet: After 4 rnns {}x{}@{}: {}'.format( pheight[lidx], pwidth[lidx], dim_proj[lidx], out_shape)) # 1x1 conv layer : dimensionality reduction layer if conv_dim_red: l_renet = lasagne.layers.Conv2DLayer( l_renet, num_filters=dim_proj[lidx], filter_size=(1, 1), W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), pad='valid', nonlinearity=conv_dim_red_nonlinearity, name=self.name + '_1x1_conv_layer' + str(lidx) ) # Print shape out_shape = get_output_shape(l_renet) print('Dim reduction: After 1x1 convnet: {}'.format(out_shape)) # Upsampling if out_upsampling_type == 'autograd': pass elif out_upsampling_type == 'grad': l_upsampling = l_renet for i, (nf, f_size, stride, nonli, out_w, out_p) in enumerate(zip( out_nfilters, out_filters_size, out_filters_stride, out_nonlinearity, out_W_init, out_pad)): l_upsampling = TransposedConv2DLayer( l_upsampling, num_filters=nf, filter_size=f_size, stride=stride, crop=out_p, W=out_w, b=out_b_init, nonlinearity=nonli,) self.sublayers.append(l_upsampling) if batch_norm: l_upsampling = lasagne.layers.batch_norm( l_upsampling, axes='auto') self.sublayers.append(l_upsampling) print "Batch normalization after Grad layer " # Print shape out_shape = get_output_shape(l_upsampling) print('Transposed conv: {}x{} (str {}x{}) @ {}:{}'.format( f_size[0], f_size[1], stride[0], stride[1], nf, out_shape)) elif out_upsampling_type == 'linear': # Go to b01c l_upsampling = lasagne.layers.DimshuffleLayer( l_renet, (0, 2, 3, 1), name=self.name + '_grad_undimshuffle') self.sublayers.append(l_upsampling) expand_height = np.prod(pheight) expand_width = np.prod(pwidth) l_upsampling = LinearUpsamplingLayer(l_upsampling, expand_height, expand_width, 1, batch_norm=batch_norm, name="linear_upsample_layer") self.sublayers.append(l_upsampling) print('Linear upsampling') if batch_norm: l_upsampling = lasagne.layers.batch_norm( l_upsampling, axes=(0, 1, 2)) self.sublayers.append(l_upsampling) print "Batch normalization after Linear upsampling layer " # Go back to bc01 l_upsampling = lasagne.layers.DimshuffleLayer( l_upsampling, (0, 3, 1, 2), name=self.name + '_grad_undimshuffle') self.sublayers.append(l_upsampling) self.l_out = l_upsampling # HACK LASAGNE # This will set `self.input_layer`, which is needed by Lasagne to find # the layers with the get_all_layers() helper function in the # case of a layer with sublayers if isinstance(self.l_out, tuple): self.input_layer = None else: self.input_layer = self.l_out
def build_model(hyparams, vmap, log, nclasses=2, batchsize=None, invar=None, maskvar=None, maxlen=MAXLEN): embedding_dim = hyparams.embedding_dim nhidden = hyparams.nhidden bidirectional = hyparams.bidirectional pool = hyparams.pool grad_clip = hyparams.grad_clip init = hyparams.init net = OrderedDict() V = len(vmap) W = lasagne.init.Normal() gate_params = layer.recurrent.Gate( W_in=lasagne.init.Orthogonal(), W_hid=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.) ) cell_params = layer.recurrent.Gate( W_in=lasagne.init.Orthogonal(), W_hid=lasagne.init.Orthogonal(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.tanh ) net['input'] = layer.InputLayer((batchsize, maxlen), input_var=invar) net['mask'] = layer.InputLayer((batchsize, maxlen), input_var=maskvar) ASSUME = {net['input']: (200, 140), net['mask']: (200, 140)} net['emb'] = layer.EmbeddingLayer(net['input'], input_size=V, output_size=embedding_dim, W=W) net['fwd1'] = layer.LSTMLayer( net['emb'], num_units=nhidden, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh, mask_input=net['mask'], ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True ) if bidirectional: net['bwd1'] = layer.LSTMLayer( net['emb'], num_units=nhidden, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh, mask_input=net['mask'], ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True, backwards=True ) if pool == 'mean': def tmean(a, b): agg = theano.tensor.add(a, b) agg /= 2. return agg net['pool'] = layer.ElemwiseMergeLayer([net['fwd1'], net['bwd1']], tmean) elif pool == 'sum': net['pool'] = layer.ElemwiseSumLayer([net['fwd1'], net['bwd1']]) else: net['pool'] = layer.ConcatLayer([net['fwd1'], net['bwd1']]) else: net['pool'] = layer.ConcatLayer([net['fwd1']]) net['dropout1'] = layer.DropoutLayer(net['pool'], p=0.5) if init == 'identity': gate_params2 = layer.recurrent.Gate( W_in=np.eye(nhidden, dtype=np.float32), W_hid=np.eye(nhidden, dtype=np.float32), b=lasagne.init.Constant(0.) ) cell_params2 = layer.recurrent.Gate( W_in=np.eye(nhidden, dtype=np.float32), W_hid=np.eye(nhidden, dtype=np.float32), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify ) net['fwd2'] = layer.LSTMLayer( net['dropout1'], num_units=nhidden, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh, mask_input=net['mask'], ingate=gate_params2, forgetgate=gate_params2, cell=cell_params2, outgate=gate_params2, learn_init=True, only_return_final=True ) else: net['fwd2'] = layer.LSTMLayer( net['dropout1'], num_units=nhidden, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh, mask_input=net['mask'], ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True, only_return_final=True ) net['dropout2'] = layer.DropoutLayer(net['fwd2'], p=0.6) net['softmax'] = layer.DenseLayer( net['dropout2'], num_units=nclasses, nonlinearity=lasagne.nonlinearities.softmax ) logstr = '========== MODEL ========== \n' logstr += 'vocab size: %d\n' % V logstr += 'embedding dim: %d\n' % embedding_dim logstr += 'nhidden: %d\n' % nhidden logstr += 'pooling: %s\n' % pool for lname, lyr in net.items(): logstr += '%s %s\n' % (lname, str(get_output_shape(lyr, ASSUME))) logstr += '=========================== \n' print logstr log.write(logstr) log.flush() return net
def get_pydot_graph(layers, output_shape=True, verbose=False): """ Creates a PyDot graph of the network defined by the given layers. :parameters: - layers : list List of the layers, as obtained from lasange.layers.get_all_layers - output_shape: (default `True`) If `True`, the output shape of each layer will be displayed. - verbose: (default `False`) If `True`, layer attributes like filter shape, stride, etc. will be displayed. - verbose: :returns: - pydot_graph : PyDot object containing the graph """ pydot_graph = pydot.Dot('Network', graph_type='digraph') pydot_nodes = {} pydot_edges = [] for i, layer in enumerate(layers): layer_type = '{0}'.format(layer.__class__.__name__) key = repr(layer) label = "" color = get_hex_color(layer_type) if hasattr(layer, "name") and layer.name is not None: label += " {0}, ".format(layer.name) label += layer_type if verbose: for attr in ['num_filters', 'num_units', 'ds', 'filter_shape', 'stride', 'strides', 'p']: if hasattr(layer, attr): label += '\n' + \ '{0}: {1}'.format(attr, getattr(layer, attr)) if hasattr(layer, 'nonlinearity'): try: nonlinearity = layer.nonlinearity.__name__ except AttributeError: nonlinearity = layer.nonlinearity.__class__.__name__ label += '\n' + 'nonlinearity: {0}'.format(nonlinearity) if output_shape: label += '\n' + \ 'Output shape: {0}'.format(get_output_shape(layer)) pydot_nodes[key] = pydot.Node(key, label=label, shape='record', fillcolor=color, style='filled', ) if hasattr(layer, 'input_layers'): for input_layer in layer.input_layers: pydot_edges.append([repr(input_layer), key]) if hasattr(layer, 'input_layer'): pydot_edges.append([repr(layer.input_layer), key]) for node in pydot_nodes.values(): pydot_graph.add_node(node) for edge in pydot_edges: pydot_graph.add_edge( pydot.Edge(pydot_nodes[edge[0]], pydot_nodes[edge[1]])) return pydot_graph
def __init__(self, l_in, n_layers, pheight, pwidth, dim_proj, nclasses, stack_sublayers, # outsampling out_upsampling_type, out_nfilters, out_filters_size, out_filters_stride, out_W_init=lasagne.init.GlorotUniform(), out_b_init=lasagne.init.Constant(0.), out_nonlinearity=lasagne.nonlinearities.identity, hypotetical_fm_size=np.array((100.0, 100.0)), # input ConvLayers in_nfilters=None, in_filters_size=((3, 3), (3, 3)), in_filters_stride=((1, 1), (1, 1)), in_W_init=lasagne.init.GlorotUniform(), in_b_init=lasagne.init.Constant(0.), in_nonlinearity=lasagne.nonlinearities.rectify, in_vgg_layer='conv3_3', # common recurrent layer params RecurrentNet=lasagne.layers.GRULayer, nonlinearity=lasagne.nonlinearities.rectify, hid_init=lasagne.init.Constant(0.), grad_clipping=0, precompute_input=True, mask_input=None, # 1x1 Conv layer for dimensional reduction conv_dim_red=False, conv_dim_red_nonlinearity=lasagne.nonlinearities.identity, # GRU specific params gru_resetgate=lasagne.layers.Gate(W_cell=None), gru_updategate=lasagne.layers.Gate(W_cell=None), gru_hidden_update=lasagne.layers.Gate( W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), gru_hid_init=lasagne.init.Constant(0.), # LSTM specific params lstm_ingate=lasagne.layers.Gate(), lstm_forgetgate=lasagne.layers.Gate(), lstm_cell=lasagne.layers.Gate( W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), lstm_outgate=lasagne.layers.Gate(), # RNN specific params rnn_W_in_to_hid=lasagne.init.Uniform(), rnn_W_hid_to_hid=lasagne.init.Uniform(), rnn_b=lasagne.init.Constant(0.), # Special layers batch_norm=False, name=''): """A ReSeg layer The ReSeg layer is composed by multiple ReNet layers and an upsampling layer Parameters ---------- l_in : lasagne.layers.Layer The input layer, in bc01 format n_layers : int The number of layers pheight : tuple The height of the patches, for each layer pwidth : tuple The width of the patches, for each layer dim_proj : tuple The number of hidden units of each RNN, for each layer nclasses : int The number of classes of the data stack_sublayers : bool If True the bidirectional RNNs in the ReNet layers will be stacked one over the other. See ReNet for more details. out_upsampling_type : string The kind of upsampling to be used out_nfilters : int The number of hidden units of the upsampling layer out_filters_size : tuple The size of the upsampling filters, if any out_filters_stride : tuple The stride of the upsampling filters, if any out_W_init : Theano shared variable, numpy array or callable Initializer for W out_b_init : Theano shared variable, numpy array or callable Initializer for b out_nonlinearity : Theano shared variable, numpy array or callable The nonlinearity to be applied after the upsampling hypotetical_fm_size : float The hypotetical size of the feature map that would be input of the layer if the input image of the whole network was of size (100, 100) RecurrentNet : lasagne.layers.Layer A recurrent layer class nonlinearity : callable or None The nonlinearity that is applied to the output. If None is provided, no nonlinearity will be applied. hid_init : callable, np.ndarray, theano.shared or lasagne.layers.Layer Initializer for initial hidden state grad_clipping : float If nonzero, the gradient messages are clipped to the given value during the backward pass. precompute_input : bool If True, precompute input_to_hid before iterating through the sequence. This can result in a speedup at the expense of an increase in memory usage. mask_input : lasagne.layers.Layer Layer which allows for a sequence mask to be input, for when sequences are of variable length. Default None, which means no mask will be supplied (i.e. all sequences are of the same length). gru_resetgate : lasagne.layers.Gate Parameters for the reset gate, if RecurrentNet is GRU gru_updategate : lasagne.layers.Gate Parameters for the update gate, if RecurrentNet is GRU gru_hidden_update : lasagne.layers.Gate Parameters for the hidden update, if RecurrentNet is GRU gru_hid_init : callable, np.ndarray, theano.shared or lasagne.layers.Layer Initializer for initial hidden state, if RecurrentNet is GRU lstm_ingate : lasagne.layers.Gate Parameters for the input gate, if RecurrentNet is LSTM lstm_forgetgate : lasagne.layers.Gate Parameters for the forget gate, if RecurrentNet is LSTM lstm_cell : lasagne.layers.Gate Parameters for the cell computation, if RecurrentNet is LSTM lstm_outgate : lasagne.layers.Gate Parameters for the output gate, if RecurrentNet is LSTM rnn_W_in_to_hid : Theano shared variable, numpy array or callable Initializer for input-to-hidden weight matrix, if RecurrentNet is RecurrentLayer rnn_W_hid_to_hid : Theano shared variable, numpy array or callable Initializer for hidden-to-hidden weight matrix, if RecurrentNet is RecurrentLayer rnn_b : Theano shared variable, numpy array, callable or None Initializer for bias vector, if RecurrentNet is RecurrentLaye. If None is provided there will be no bias batch_norm: this add a batch normalization layer at the end of the network right after each Gradient Upsampling layers name : string The name of the layer, optional """ super(ReSegLayer, self).__init__(l_in, name) self.l_in = l_in self.n_layers = n_layers self.pheight = pheight self.pwidth = pwidth self.dim_proj = dim_proj self.nclasses = nclasses self.stack_sublayers = stack_sublayers # upsampling self.out_upsampling_type = out_upsampling_type self.out_nfilters = out_nfilters self.out_filters_size = out_filters_size self.out_filters_stride = out_filters_stride self.out_W_init = out_W_init self.out_b_init = out_b_init self.out_nonlinearity = out_nonlinearity self.hypotetical_fm_size = hypotetical_fm_size # input ConvLayers self.in_nfilters = in_nfilters self.in_filters_size = in_filters_size self.in_filters_stride = in_filters_stride self.in_W_init = in_W_init self.in_b_init = in_b_init self.in_nonlinearity = in_nonlinearity self.in_vgg_layer = in_vgg_layer # common recurrent layer params self.RecurrentNet = RecurrentNet self.nonlinearity = nonlinearity self.hid_init = hid_init self.grad_clipping = grad_clipping self.precompute_input = precompute_input self.mask_input = mask_input # GRU specific params self.gru_resetgate = gru_resetgate self.gru_updategate = gru_updategate self.gru_hidden_update = gru_hidden_update self.gru_hid_init = gru_hid_init # LSTM specific params self.lstm_ingate = lstm_ingate self.lstm_forgetgate = lstm_forgetgate self.lstm_cell = lstm_cell self.lstm_outgate = lstm_outgate # RNN specific params self.rnn_W_in_to_hid = rnn_W_in_to_hid self.rnn_W_hid_to_hid = rnn_W_hid_to_hid self.name = name self.sublayers = [] expand_height = expand_width = 1 # Input ConvLayers l_conv = l_in if isinstance(in_nfilters, Iterable) and not isinstance(in_nfilters, str): for i, (nf, f_size, stride) in enumerate( zip(in_nfilters, in_filters_size, in_filters_stride)): l_conv = ConvLayer( l_conv, num_filters=nf, filter_size=f_size, stride=stride, W=in_W_init, b=in_b_init, pad='valid', name=self.name + '_input_conv_layer' + str(i) ) self.sublayers.append(l_conv) self.hypotetical_fm_size = ( (self.hypotetical_fm_size - 1) * stride + f_size) # TODO This is right only if stride == filter... expand_height *= f_size[0] expand_width *= f_size[1] # Print shape out_shape = get_output_shape(l_conv) print('ConvNet: After in-convnet: {}'.format(out_shape)) # Pretrained vgg16 elif type(in_nfilters) == str: from vgg16 import Vgg16Layer l_conv = Vgg16Layer(l_in, self.in_nfilters, False, False) hypotetical_fm_size /= 8 expand_height = expand_width = 8 self.sublayers.append(l_conv) # Print shape out_shape = get_output_shape(l_conv) print('Vgg: After vgg: {}'.format(out_shape)) # ReNet layers l_renet = l_conv for lidx in xrange(n_layers): l_renet = ReNetLayer(l_renet, patch_size=(pwidth[lidx], pheight[lidx]), n_hidden=dim_proj[lidx], stack_sublayers=stack_sublayers[lidx], RecurrentNet=RecurrentNet, nonlinearity=nonlinearity, hid_init=hid_init, grad_clipping=grad_clipping, precompute_input=precompute_input, mask_input=mask_input, # GRU specific params gru_resetgate=gru_resetgate, gru_updategate=gru_updategate, gru_hidden_update=gru_hidden_update, gru_hid_init=gru_hid_init, # LSTM specific params lstm_ingate=lstm_ingate, lstm_forgetgate=lstm_forgetgate, lstm_cell=lstm_cell, lstm_outgate=lstm_outgate, # RNN specific params rnn_W_in_to_hid=rnn_W_in_to_hid, rnn_W_hid_to_hid=rnn_W_hid_to_hid, rnn_b=rnn_b, batch_norm=batch_norm, name=self.name + '_renet' + str(lidx)) self.sublayers.append(l_renet) self.hypotetical_fm_size /= (pwidth[lidx], pheight[lidx]) # Print shape out_shape = get_output_shape(l_renet) if stack_sublayers: msg = 'ReNet: After 2 rnns {}x{}@{} and 2 rnns 1x1@{}: {}' print(msg.format(pheight[lidx], pwidth[lidx], dim_proj[lidx], dim_proj[lidx], out_shape)) else: print('ReNet: After 4 rnns {}x{}@{}: {}'.format( pheight[lidx], pwidth[lidx], dim_proj[lidx], out_shape)) # 1x1 conv layer : dimensionality reduction layer if conv_dim_red: l_renet = lasagne.layers.Conv2DLayer( l_renet, num_filters=dim_proj[lidx], filter_size=(1, 1), W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.), pad='valid', nonlinearity=conv_dim_red_nonlinearity, name=self.name + '_1x1_conv_layer' + str(lidx) ) # Print shape out_shape = get_output_shape(l_renet) print('Dim reduction: After 1x1 convnet: {}'.format(out_shape)) # Upsampling if out_upsampling_type == 'autograd': raise NotImplementedError( 'This will not work as the dynamic cropping will crop ' 'part of the image.') nlayers = len(out_nfilters) assert nlayers > 1 # Compute the upsampling ratio and the corresponding params h2 = np.array((100., 100.)) up_ratio = (h2 / self.hypotetical_fm_size) ** (1. / nlayers) h1 = h2 / up_ratio h0 = h1 / up_ratio stride = to_int(ceildiv(h2 - h1, h1 - h0)) filter_size = to_int(ceildiv((h1 * (h1 - 1) + h2 - h2 * h0), (h1 - h0))) target_shape = get_output(l_renet).shape[2:] l_upsampling = l_renet for l in range(nlayers): target_shape = target_shape * up_ratio l_upsampling = TransposedConv2DLayer( l_upsampling, num_filters=out_nfilters[l], filter_size=filter_size, stride=stride, W=out_W_init, b=out_b_init, nonlinearity=out_nonlinearity) self.sublayers.append(l_upsampling) up_shape = get_output(l_upsampling).shape[2:] # Print shape out_shape = get_output_shape(l_upsampling) print('Transposed autograd: {}x{} (str {}x{}) @ {}:{}'.format( filter_size[0], filter_size[1], stride[0], stride[1], out_nfilters[l], out_shape)) # CROP # pad in TransposeConv2DLayer cannot be a tensor --> we cannot # crop unless we know in advance by how much! crop = T.max(T.stack([up_shape - target_shape, T.zeros(2)]), axis=0) crop = crop.astype('uint8') # round down l_upsampling = CropLayer( l_upsampling, crop, data_format='bc01') self.sublayers.append(l_upsampling) # Print shape print('Dynamic cropping') elif out_upsampling_type == 'grad': l_upsampling = l_renet for i, (nf, f_size, stride) in enumerate(zip( out_nfilters, out_filters_size, out_filters_stride)): l_upsampling = TransposedConv2DLayer( l_upsampling, num_filters=nf, filter_size=f_size, stride=stride, crop=0, W=out_W_init, b=out_b_init, nonlinearity=out_nonlinearity) self.sublayers.append(l_upsampling) if batch_norm: l_upsampling = lasagne.layers.batch_norm( l_upsampling, axes='auto') self.sublayers.append(l_upsampling) print "Batch normalization after Grad layer " # Print shape out_shape = get_output_shape(l_upsampling) print('Transposed conv: {}x{} (str {}x{}) @ {}:{}'.format( f_size[0], f_size[1], stride[0], stride[1], nf, out_shape)) elif out_upsampling_type == 'linear': # Go to b01c l_upsampling = lasagne.layers.DimshuffleLayer( l_renet, (0, 2, 3, 1), name=self.name + '_grad_undimshuffle') self.sublayers.append(l_upsampling) expand_height *= np.prod(pheight) expand_width *= np.prod(pwidth) l_upsampling = LinearUpsamplingLayer(l_upsampling, expand_height, expand_width, nclasses, batch_norm=batch_norm, name="linear_upsample_layer") self.sublayers.append(l_upsampling) print('Linear upsampling') if batch_norm: l_upsampling = lasagne.layers.batch_norm( l_upsampling, axes=(0, 1, 2)) self.sublayers.append(l_upsampling) print "Batch normalization after Linear upsampling layer " # Go back to bc01 l_upsampling = lasagne.layers.DimshuffleLayer( l_upsampling, (0, 3, 1, 2), name=self.name + '_grad_undimshuffle') self.sublayers.append(l_upsampling) self.l_out = l_upsampling # HACK LASAGNE # This will set `self.input_layer`, which is needed by Lasagne to find # the layers with the get_all_layers() helper function in the # case of a layer with sublayers if isinstance(self.l_out, tuple): self.input_layer = None else: self.input_layer = self.l_out
def __init__(self, l_in, patch_size=(2, 2), n_hidden=50, stack_sublayers=False, RecurrentNet=lasagne.layers.GRULayer, nonlinearity=lasagne.nonlinearities.rectify, hid_init=lasagne.init.Constant(0.), grad_clipping=0, precompute_input=True, mask_input=None, # GRU specific params gru_resetgate=lasagne.layers.Gate(W_cell=None), gru_updategate=lasagne.layers.Gate(W_cell=None), gru_hidden_update=lasagne.layers.Gate( W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), gru_hid_init=lasagne.init.Constant(0.), # LSTM specific params lstm_ingate=lasagne.layers.Gate(), lstm_forgetgate=lasagne.layers.Gate(), lstm_cell=lasagne.layers.Gate( W_cell=None, nonlinearity=lasagne.nonlinearities.tanh), lstm_outgate=lasagne.layers.Gate(), # RNN specific params rnn_W_in_to_hid=lasagne.init.Uniform(), rnn_W_hid_to_hid=lasagne.init.Uniform(), rnn_b=lasagne.init.Constant(0.), batch_norm=False, name='', **kwargs): """A ReNet layer Each ReNet layer is composed by 4 RNNs (or 2 bidirectional RNNs): * First SubLayer: 2 RNNs scan the image vertically (up and down) * Second Sublayer: 2 RNNs scan the image horizontally (left and right) The sublayers can be stacked one over the other or can scan the image in parallel Parameters ---------- l_in : lasagne.layers.Layer The input layer, in format batches, channels, rows, cols patch_size : tuple The size of the patch expressed as (pheight, pwidth). Optional n_hidden : int The number of hidden units of each RNN. Optional stack_sublayers : bool If True, the sublayers (i.e. the bidirectional RNNs) will be stacked one over the other, meaning that the second bidirectional RNN will read the feature map coming from the first bidirectional RNN. If False, all the RNNs will read the input. Optional RecurrentNet : lasagne.layers.Layer A recurrent layer class nonlinearity : callable or None The nonlinearity that is applied to the output. If None is provided, no nonlinearity will be applied. hid_init : callable, np.ndarray, theano.shared or lasagne.layers.Layer Initializer for initial hidden state grad_clipping : float If nonzero, the gradient messages are clipped to the given value during the backward pass. precompute_input : bool If True, precompute input_to_hid before iterating through the sequence. This can result in a speedup at the expense of an increase in memory usage. mask_input : lasagne.layers.Layer Layer which allows for a sequence mask to be input, for when sequences are of variable length. Default None, which means no mask will be supplied (i.e. all sequences are of the same length). gru_resetgate : lasagne.layers.Gate Parameters for the reset gate, if RecurrentNet is GRU gru_updategate : lasagne.layers.Gate Parameters for the update gate, if RecurrentNet is GRU gru_hidden_update : lasagne.layers.Gate Parameters for the hidden update, if RecurrentNet is GRU gru_hid_init : callable, np.ndarray, theano.shared or lasagne.layers.Layer Initializer for initial hidden state, if RecurrentNet is GRU lstm_ingate : lasagne.layers.Gate Parameters for the input gate, if RecurrentNet is LSTM lstm_forgetgate : lasagne.layers.Gate Parameters for the forget gate, if RecurrentNet is LSTM lstm_cell : lasagne.layers.Gate Parameters for the cell computation, if RecurrentNet is LSTM lstm_outgate : lasagne.layers.Gate Parameters for the output gate, if RecurrentNet is LSTM rnn_W_in_to_hid : Theano shared variable, numpy array or callable Initializer for input-to-hidden weight matrix, if RecurrentNet is RecurrentLayer rnn_W_hid_to_hid : Theano shared variable, numpy array or callable Initializer for hidden-to-hidden weight matrix, if RecurrentNet is RecurrentLayer rnn_b : Theano shared variable, numpy array, callable or None Initializer for bias vector, if RecurrentNet is RecurrentLaye. If None is provided there will be no bias name : string The name of the layer, optional """ super(ReNetLayer, self).__init__(l_in, name) self.l_in = l_in self.patch_size = patch_size self.n_hidden = n_hidden self.stack_sublayers = stack_sublayers self.name = name self.stride = self.patch_size # for now, it's not parametrized # Dynamically add padding if the input is not a multiple of the # patch size (expected input format: bs, ch, rows, cols) l_in = DynamicPaddingLayer(l_in, patch_size, self.stride, name=self.name + '_padding') # get_output(l_in).shape will result in an error in the # recurrent layers batch_size = -1 cchannels, cheight, cwidth = get_output_shape(l_in)[1:] pheight, pwidth = patch_size psize = pheight * pwidth * cchannels # Number of patches in each direction npatchesH = cheight / pheight npatchesW = cwidth / pwidth # Split in patches: bs, cc, #H, ph, #W, pw l_in = lasagne.layers.ReshapeLayer( l_in, (batch_size, cchannels, npatchesH, pheight, npatchesW, pwidth), name=self.name + "_pre_reshape0") # bs, #H, #W, ph, pw, cc l_in = lasagne.layers.DimshuffleLayer( l_in, (0, 2, 4, 3, 5, 1), name=self.name + "_pre_dimshuffle0") # FIRST SUBLAYER # The RNN Layer needs a 3D tensor input: bs*#H, #W, psize # bs*#H, #W, ph * pw * cc l_sub0 = lasagne.layers.ReshapeLayer( l_in, (-1, npatchesW, psize), name=self.name + "_sub0_reshape0") # Left/right scan: bs*#H, #W, 2*hid l_sub0 = BidirectionalRNNLayer( l_sub0, n_hidden, RecurrentNet=RecurrentNet, nonlinearity=nonlinearity, hid_init=hid_init, grad_clipping=grad_clipping, precompute_input=precompute_input, mask_input=mask_input, # GRU specific params gru_resetgate=gru_resetgate, gru_updategate=gru_updategate, gru_hidden_update=gru_hidden_update, gru_hid_init=gru_hid_init, batch_norm=batch_norm, # LSTM specific params lstm_ingate=lstm_ingate, lstm_forgetgate=lstm_forgetgate, lstm_cell=lstm_cell, lstm_outgate=lstm_outgate, # RNN specific params rnn_W_in_to_hid=rnn_W_in_to_hid, rnn_W_hid_to_hid=rnn_W_hid_to_hid, rnn_b=rnn_b, name=self.name + "_sub0_renetsub") # Revert reshape: bs, #H, #W, 2*hid l_sub0 = lasagne.layers.ReshapeLayer( l_sub0, (batch_size, npatchesH, npatchesW, 2 * n_hidden), name=self.name + "_sub0_unreshape") # # Invert rows and columns: #H, bs, #W, 2*hid # l_sub0 = lasagne.layers.DimshuffleLayer( # l_sub0, # (2, 1, 0, 3), # name=self.name + "_sub0_undimshuffle") # If stack_sublayers is True, the second sublayer takes as an input the # first sublayer's output, otherwise the input of the ReNetLayer (e.g # the image) if stack_sublayers: # bs, #H, #W, 2*hid input_sublayer1 = l_sub0 psize = 2 * n_hidden else: # # #H, bs, #W, ph, pw, cc # input_sublayer1 = lasagne.layers.DimshuffleLayer( # l_in, # (2, 1, 0, 3, 4, 5), # name=self.name + "_presub1_in_dimshuffle") # bs, #H, #W, ph*pw*cc input_sublayer1 = lasagne.layers.ReshapeLayer( l_in, (batch_size, npatchesH, npatchesW, psize), name=self.name + "_presub1_in_dimshuffle") # SECOND SUBLAYER # Invert rows and columns: bs, #W, #H, psize l_sub1 = lasagne.layers.DimshuffleLayer( input_sublayer1, (0, 2, 1, 3), name=self.name + "_presub1_dimshuffle") # The RNN Layer needs a 3D tensor input: bs*#W, #H, psize l_sub1 = lasagne.layers.ReshapeLayer( l_sub1, (-1, npatchesH, psize), name=self.name + "_sub1_reshape") # Down/up scan: bs*#W, #H, 2*hid l_sub1 = BidirectionalRNNLayer( l_sub1, n_hidden, RecurrentNet=RecurrentNet, nonlinearity=nonlinearity, hid_init=hid_init, grad_clipping=grad_clipping, precompute_input=precompute_input, mask_input=mask_input, # GRU specific params gru_resetgate=gru_resetgate, gru_updategate=gru_updategate, gru_hidden_update=gru_hidden_update, gru_hid_init=gru_hid_init, # LSTM specific params lstm_ingate=lstm_ingate, lstm_forgetgate=lstm_forgetgate, lstm_cell=lstm_cell, lstm_outgate=lstm_outgate, # RNN specific params rnn_W_in_to_hid=rnn_W_in_to_hid, rnn_W_hid_to_hid=rnn_W_hid_to_hid, rnn_b=rnn_b, name=self.name + "_sub1_renetsub") psize = 2 * n_hidden # Revert the reshape: bs, #W, #H, 2*hid l_sub1 = lasagne.layers.ReshapeLayer( l_sub1, (batch_size, npatchesW, npatchesH, psize), name=self.name + "_sub1_unreshape") # Invert rows and columns: bs, #H, #W, psize l_sub1 = lasagne.layers.DimshuffleLayer( l_sub1, (0, 2, 1, 3), name=self.name + "_sub1_undimshuffle") # Concat all 4 layers if needed: bs, #H, #W, {2,4}*hid if not stack_sublayers: l_sub1 = lasagne.layers.ConcatLayer([l_sub0, l_sub1], axis=3) # Get back to bc01: bs, psize, #H, #W self.out_layer = lasagne.layers.DimshuffleLayer( l_sub1, (0, 3, 1, 2), name=self.name + "_out_undimshuffle") # HACK LASAGNE # This will set `self.input_layer`, which is needed by Lasagne to find # the layers with the get_all_layers() helper function in the # case of a layer with sublayers if isinstance(self.out_layer, tuple): self.input_layer = None else: self.input_layer = self.out_layer
def build_model(vmap, nclasses=2, embedding_dim=50, nhidden=256, batchsize=None, invar=None, maskvar=None, bidirectional=True, pool=True, grad_clip=100, maxlen=MAXLEN): V = len(vmap) W = lasagne.init.Normal() # Input Layer # TODO: should be (batchsize, maxlen, vocab_size) l_in = layer.InputLayer((batchsize, maxlen, V), input_var=invar) l_mask = layer.InputLayer((batchsize, maxlen), input_var=maskvar) ASSUME = {l_in: (200, 140, 94), l_mask: (200, 140)} print 'Input Layer' print 'output:', get_output_shape(l_in, ASSUME) print 'output(mask):', get_output_shape(l_mask, ASSUME) print # Embedding Layer l_emb = layer.EmbeddingLayer(l_in, input_size=V, output_size=embedding_dim, W=W) print 'Embedding Layer' print 'output:', get_output_shape(l_emb, ASSUME) gate_params = layer.recurrent.Gate( W_in=lasagne.init.Orthogonal(), W_hid=lasagne.init.Orthogonal(), b=lasagne.init.Constant(0.) ) cell_params = layer.recurrent.Gate( W_in=lasagne.init.Orthogonal(), W_hid=lasagne.init.Orthogonal(), W_cell=None, b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.tanh ) l_fwd = layer.LSTMLayer( l_emb, num_units=nhidden, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh, mask_input=l_mask, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True ) print 'Forward LSTM' print 'output:', get_output_shape(l_fwd, ASSUME) l_concat = None if bidirectional: l_bwd = layer.LSTMLayer( l_emb, num_units=nhidden, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh, mask_input=l_mask, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True, backwards=True ) print 'Backward LSTM' print 'output:', get_output_shape(l_bwd, ASSUME) def tmean(a, b): agg = theano.tensor.add(a, b) agg /= 2. return agg if pool: l_concat = layer.ElemwiseMergeLayer([l_fwd, l_bwd], tmean) else: l_concat = layer.ConcatLayer([l_fwd, l_bwd]) else: l_concat = layer.ConcatLayer([l_fwd]) print 'Concat' print 'output:', get_output_shape(l_concat, ASSUME) l_concat = layer.DropoutLayer(l_concat, p=0.5) l_lstm2 = layer.LSTMLayer( l_concat, num_units=nhidden, grad_clipping=grad_clip, nonlinearity=lasagne.nonlinearities.tanh, mask_input=l_mask, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True, only_return_final=True ) print 'LSTM #2' print 'output:', get_output_shape(l_lstm2, ASSUME) l_lstm2 = layer.DropoutLayer(l_lstm2, p=0.6) network = layer.DenseLayer( l_lstm2, num_units=nclasses, nonlinearity=lasagne.nonlinearities.softmax ) print 'Dense Layer' print 'output:', get_output_shape(network, ASSUME) return network
def get_output_size(network, name): layer = [l for l in get_all_layers(network) if l.name == name] if len(layer): return get_output_shape(layer)[0][1] return 0
def build(myNet, idxSiam, verbose=True): # # custom activations # relu = lambda x: x * (x > 0) # from theano example # # stable softmax # softmax = lambda x: T.exp(x) \ # / (T.exp(x).sum(1, keepdims=True, dtype=floatX)) # # log-soft-max # log_softmax = lambda x: (x - x.max(1, keepdims=True)) \ # - T.log(T.sum( # T.exp(x - x.max(1, keepdims=True)), # axis=1, keepdims=True, dtype=floatX # )) # batch_size = myNet.config.batch_size # num_channel = myNet.config.num_channel # output_dim = myNet.config.out_dim INITIALIZATION_GAIN = 1.0 # INITIALIZATION_GAIN = 0.0 BIAS_RND = myNet.config.bias_rnd # --------------------------------------------------------------------- # Dropout on the input (no rescaling!) fInputDroprate = getattr(myNet.config, 'fInputDroprate', 0.0) myNet.layers[idxSiam]['kp-inputdrop'] = DropoutLayer( myNet.layers[idxSiam]['kp-input'], p=np.cast[floatX](fInputDroprate), rescale=False, name='kp-inputdrop') # --------------------------------------------------------------------- # convolution sharing weights # shape of fileter if 'nFilterScaleSize' in myNet.config.__dict__.keys(): fs = [myNet.config.nFilterSize, myNet.config.nFilterSize, myNet.config.nFilterScaleSize] else: fs = [myNet.config.nFilterSize, myNet.config.nFilterSize, 1] ns = 4 # num in sum nm = 4 # num in max nu = 1 # num units after Feuture pooling if idxSiam == 0: W_init = HeNormal(gain=INITIALIZATION_GAIN) # W_init = Constant(0.0) b_init = Constant(0.0) else: W_init = myNet.layers[0]['kp-c0'].W b_init = myNet.layers[0]['kp-c0'].b # For testing 3D2D convolution if 'bTestConv3D2D' in myNet.config.__dict__.keys(): raise RuntimeError('Deprecated!') myNet.layers[idxSiam]['kp-c0'] = Conv3DLayer( myNet.layers[idxSiam]['kp-inputdrop'], num_filters=nu * ns * nm, filter_size=fs, nonlinearity=None, W=W_init, b=b_init, name='kp-c0', ) # noise layer myNet.layers[idxSiam]['kp-c0n'] = GaussianNoiseLayer( myNet.layers[idxSiam]['kp-c0'], sigma=BIAS_RND, name='kp-c0n', ) # GHH pooling activation myNet.layers[idxSiam]['kp-c0a'] = GHHFeaturePoolLayer( myNet.layers[idxSiam]['kp-c0n'], num_in_sum=ns, num_in_max=nm, axis=1, max_strength=myNet.config.max_strength, name='kp-c0a', ) # # ------------------------------------------------------------------- # # Fully connected with sharing weights # if idxSiam == 0: # W_init = HeNormal(gain=INITIALIZATION_GAIN) # # W_init = Constant(0.0) # b_init = Constant(0.0) # else: # W_init = myNet.layers[0]['output'].W # b_init = myNet.layers[0]['output'].b # myNet.layers[idxSiam]['output'] = DenseLayer( # myNet.layers[idxSiam]['f3a'], # num_units=10, # nonlinearity=log_softmax, # W=W_init, b=b_init, name='output' # ) final_nonlinearity = getattr(myNet.config, 'sKpNonlinearity', 'None') if verbose and idxSiam == 0: print(' -- kp_info: nonlinearity == ' + final_nonlinearity) if final_nonlinearity == 'None': final_nonlinearity = None elif final_nonlinearity == 'tanh': final_nonlinearity = T.tanh else: raise ValueError('Unsupported nonlinearity!') myNet.layers[idxSiam]['kp-scoremap'] = NonlinearityLayer( myNet.layers[idxSiam]['kp-c0a'], nonlinearity=final_nonlinearity, name='kp-scoremap', ) # --------------------------------------------------------------------- # Layer for cropping to keep desc part within boundary rf = np.cast[floatX](float(myNet.config.nPatchSizeKp) / float(myNet.config.nPatchSize)) input_shape = get_output_shape(myNet.layers[0]['kp-input']) uncut_shape = get_output_shape(myNet.layers[0]['kp-scoremap']) req_boundary = np.ceil(rf * np.sqrt(2) * myNet.config.nDescInputSize / 2.0).astype(int) cur_boundary = (input_shape[2] - uncut_shape[2]) // 2 crop_size = req_boundary - cur_boundary if verbose and idxSiam == 0: resized_shape = get_output_shape(myNet.layers[0]['kp-input']) print(' -- kp_info: output score map shape {}'.format(uncut_shape)) print(' -- kp_info: input size after resizing {}'.format(resized_shape[ 2])) print(' -- kp_info: output score map size {}'.format(uncut_shape[2])) print(' -- kp info: required boundary {}'.format(req_boundary)) print(' -- kp info: current boundary {}'.format(cur_boundary)) print(' -- kp_info: additional crop size {}'.format(crop_size)) print(' -- kp_info: additional crop size {}'.format(crop_size)) print(' -- kp_info: final cropped score map size {}'.format( uncut_shape[2] - 2 * crop_size)) print(' -- kp_info: movement ratio will be {}'.format(( float(uncut_shape[2] - 2.0 * crop_size) / float(myNet.config.nPatchSizeKp - 1)))) def crop(out, crop_size): return out[:, :, crop_size:-crop_size, crop_size:-crop_size, :] def cropfunc(out): return crop(out, crop_size) myNet.layers[idxSiam]['kp-scoremap-cut'] = ExpressionLayer( myNet.layers[idxSiam]['kp-scoremap'], cropfunc, output_shape='auto', name='kp-scoremap-cut', ) # --------------------------------------------------------------------- # Mapping layer to x,y,z myNet.layers[idxSiam]['kp-output'] = createXYZMapLayer( myNet.layers[idxSiam]['kp-scoremap-cut'], fScaleList=myNet.config.fScaleList, req_boundary=req_boundary, name='kp-output', fCoMStrength=10.0, eps=myNet.config.epsilon)