def build_baseline5_fan(input_var): # TODO remove these imports + move relevant parts to layers.py once everything is # up and running import theano.tensor as T import numpy as np """ Using Baseline 1 with the novel FAN layer. VGG conv4_1 is used for feature extraction """ net = OrderedDict() # Input, standardization last = net['input'] = InputLayer( (None, 3, tools.INP_PSIZE, tools.INP_PSIZE), input_var=input_var) last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x)) net['features_s8'] = get_features(last)["conv4_1"] net['features'] = Upscale2DLayer(net["features_s8"], 8) net['mask'] = ExpressionLayer( net["features"], lambda x: 1. * T.eq(x, x.max(axis=1, keepdims=True))) last = net["middle"] = ConvLayer(last, 3, 1, nonlinearity=linear) last = net["fan"] = FeatureAwareNormLayer( (last, net['mask']), beta=nn.init.Constant(np.float32(128.)), gamma=nn.init.Constant(np.float32(25.))) return last, net
def build(myNet, idxSiam, verbose=True): # ------------------------------------------------------------------------- # Bypass for score map myNet.layers[idxSiam]['kp-bypass-input-score'] = InputLayer( (myNet.config.batch_size, ), input_var=myNet.y[idxSiam], name='kp-bypass-input-score') myNet.layers[idxSiam]['kp-scoremap-cut'] = ExpressionLayer( myNet.layers[idxSiam]['kp-bypass-input-score'], lambda x: x.reshape([myNet.config.batch_size, 1]) * 2.0 - 1.0, output_shape=[myNet.config.batch_size, 1], name='kp-scoremap-cut') myNet.layers[idxSiam]['kp-scoremap'] = ExpressionLayer( myNet.layers[idxSiam]['kp-bypass-input-score'], lambda x: x.reshape([myNet.config.batch_size, 1]) * 2.0 - 1.0, output_shape=[myNet.config.batch_size, 1], name='kp-scoremap') # ------------------------------------------------------------------------- # Bypass for xyz coordinates myNet.layers[idxSiam]['kp-bypass-input-xyz'] = InputLayer( (myNet.config.batch_size, 3), input_var=myNet.pos[idxSiam], name='kp-bypass-input-xyz') myNet.layers[idxSiam]['kp-output'] = ExpressionLayer( myNet.layers[idxSiam]['kp-bypass-input-xyz'], # lambda x: x + np.asarray([0.5, 0.5, 1], # dtype=floatX).reshape([1, 3]), lambda x: x, output_shape=[myNet.config.batch_size, 3], name='kp-output')
def get_model(input_images, input_position, input_mult, target_var): # number of SAX and distance between SAX slices #indexes = [] #for i in range(input_position.shape[0]): # indexes.append(numpy.where(input_position[i][:,0] == 0.)[0][0]) # input layer with unspecified batch size layer = InputLayer(shape=(None, 22, 30, 64, 64), input_var=input_images) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var) # Z-score? # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) shortcut = layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = ElemwiseSumLayer([layer, shortcut]) shortcut = layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = ElemwiseSumLayer([layer, shortcut]) shortcut = layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = ElemwiseSumLayer([layer, shortcut]) shortcut = layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = ElemwiseSumLayer([layer, shortcut]) shortcut = layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = ElemwiseSumLayer([layer, shortcut]) shortcut = layer layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = ElemwiseSumLayer([layer, shortcut]) layer = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify)) layer = Conv3DDNNLayer(incoming=layer, num_filters=22, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=sigmoid) layer_max = ExpressionLayer(layer, lambda X: X.max(1), output_shape='auto') layer_min = ExpressionLayer(layer, lambda X: X.min(1), output_shape='auto') layer_prediction = layer # image prediction prediction = get_output(layer_prediction) loss = binary_crossentropy(prediction, target_var).mean() #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum params = get_all_params(layer_prediction, trainable=True) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, disabling dropout layers. test_prediction = get_output(layer_prediction, deterministic=True) test_loss = binary_crossentropy(test_prediction, target_var).mean() return test_prediction, prediction, loss, params
def geometric_mean(incoming): exp_out = ExpressionLayer( ElemwiseSumLayer( [ ExpressionLayer( member, lambda x: T.log(x + NCEnsemble.eps) ) for member in incoming ], coeffs=1./len(incoming) ), T.exp ) Z = T.sum(get_output(exp_out), axis=1)[..., np.newaxis] return ExpressionLayer(exp_out, lambda x: x / Z)
def shortcut(self, incoming, residual, type=None): """Create a shortcut from ``incoming`` to ``residual``.""" type = type or self.type in_shape = getattr(incoming, 'output_shape', incoming) out_shape = getattr(residual, 'output_shape', residual) in_filters = in_shape[1] out_filters = out_shape[1] stride = (in_shape[-2] // out_shape[-2], in_shape[-1] // out_shape[-1]) if type == 'C': # all shortcuts are projections return self.projection(incoming, out_filters, stride=stride) elif in_filters == out_filters: # A and B use identity shortcuts (if the dimensions stay) return incoming elif type == 'B': # if dimensions increase, B uses projections return self.projection(incoming, out_filters, stride=stride) elif type == 'A': if not numpy.all(in_shape[2:] == out_shape[2:]): shortcut = ExpressionLayer( incoming, lambda x: x[:, :, ::stride[0], ::stride[1]], in_shape[:2] + out_shape[2:]) else: shortcut = incoming side = (out_filters - in_filters) // 2 return PadLayer(shortcut, [side, 0, 0], batch_ndim=1)
def residual_block(l, increase_dim=False, projection=False): input_num_filters = l.output_shape[1] if increase_dim: first_stride = (2,2) out_num_filters = input_num_filters*2 else: first_stride = (1,1) out_num_filters = input_num_filters stack_1 = batch_norm(ConvLayer(l, num_filters=out_num_filters, filter_size=(3,3), stride=first_stride, nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) stack_2 = batch_norm(ConvLayer(stack_1, num_filters=out_num_filters, filter_size=(3,3), stride=(1,1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) # add shortcut connections if increase_dim: if projection: # projection shortcut, as option B in paper projection = batch_norm(ConvLayer(l, num_filters=out_num_filters, filter_size=(1,1), stride=(2,2), nonlinearity=None, pad='same', b=None, flip_filters=False)) block = NonlinearityLayer(ElemwiseSumLayer([stack_2, projection]),nonlinearity=rectify) else: # identity shortcut, as option A in paper identity = ExpressionLayer(l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2]//2, s[3]//2)) padding = PadLayer(identity, [out_num_filters//4,0,0], batch_ndim=1) block = NonlinearityLayer(ElemwiseSumLayer([stack_2, padding]),nonlinearity=rectify) else: block = NonlinearityLayer(ElemwiseSumLayer([stack_2, l]),nonlinearity=rectify) return block
def build_model(feadim, Nclass, kernel_size=3, border_mode='same', input_length=None, noise=(0.1, 0.2, 0.1)): """ Input shape: X.shape=(B, 1, rows, cols), GT.shape=(B, L) :param feadim: :param Nclass: :param loss: :param optimizer: :return: """ input0 = InputLayer(shape=(None, 1, feadim, input_length), name='input0') pool0 = MaxPool2DLayer(input0, pool_size=(2, 2), name='pool0') pool1 = MaxPool2DLayer(pool0, pool_size=(2, 2), name='pool1') pool2 = MaxPool2DLayer(pool1, pool_size=(2, 1), name='pool2') pool3 = MaxPool2DLayer(pool2, pool_size=(2, 1), name='pool3') permute0 = ExpressionLayer(pool3, filter_merge, output_shape=filter_merge_output_shape, name='permute0') pool4 = Pool1DLayer(permute0, pool_size=2, mode='average_exc_pad', axis=1, name='pool4') dense0 = DenseLayer(pool4, num_units=Nclass + 1, nonlinearity=softmax, num_leading_axes=2, name='dense0') return dense0
def __init__(self, vocab, input_var=None): ### THEANO GRAPH INPUT ### # self.input_phrase = T.imatrix("encoder phrase tokens") ########################## self.l_in = InputLayer((None, None), input_var=input_var, name='utt input') self.l_mask = ExpressionLayer(self.l_in, lambda x: T.neq(x, vocab.PAD_ix), name='utt mask') self.l_emb = EmbeddingLayer(self.l_in, vocab.n_tokens, Config.EMB_SIZE, name="utt embedding") self.l_lstm = LSTMLayer(self.l_emb, Config.N_LSTM_UNITS, name='encoder_lstm', grad_clipping=Config.LSTM_LAYER_GRAD_CLIP, mask_input=self.l_mask, only_return_final=True, peepholes=False) self.output = self.l_lstm
def create_attention(self, gru_con, in_con_mask, condition, batch_size, n_hidden_con, **kwargs): # (batch_size, n_attention) gru_cond2 = non_flattening_dense_layer(gru_con, self.in_con_mask, self.n_attention, nonlinearity=None) gru_que2 = DenseLayer(condition, self.n_attention, nonlinearity=None) gru_que2 = dimshuffle(gru_que2, (0, 'x', 1)) att = ElemwiseSumLayer([gru_cond2, gru_que2]) att = NonlinearityLayer(att, T.tanh) att = SliceLayer(non_flattening_dense_layer(att, self.in_con_mask, 1, nonlinearity=None), indices=0, axis=2) att_softmax = SequenceSoftmax(att, self.in_con_mask) rep = ElemwiseMergeLayer( [ForgetSizeLayer(dimshuffle(att_softmax, (0, 1, 'x'))), gru_con], T.mul) return ExpressionLayer(rep, lambda x: T.sum(x, axis=1), lambda s: (s[0], ) + s[2:])
def build_baseline2_feats(input_var, nb_filter=96): """ Slightly more complex model. Transform x to a feature space first """ net = OrderedDict() # Input, standardization last = net['input'] = InputLayer( (None, 3, tools.INP_PSIZE, tools.INP_PSIZE), input_var=input_var) last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x)) # Pretrained Encoder as before last = net["conv1_1"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_1"] = BatchNormLayer(last) last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify) last = net["conv1_2"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_2"] = BatchNormLayer(last) last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify) # Modified Middle Part last = net["middle"] = ConvLayer(last, nb_filter, 1, nonlinearity=linear) # Decoder as before last = net["deconv1_2"] = TransposedConv2DLayer( last, net["conv1_2"].input_shape[1], net["conv1_2"].filter_size, stride=net["conv1_2"].stride, crop=net["conv1_2"].pad, W=net["conv1_2"].W, flip_filters=not net["conv1_2"].flip_filters, nonlinearity=None) last = net["deconv1_1"] = TransposedConv2DLayer( last, net["conv1_1"].input_shape[1], net["conv1_1"].filter_size, stride=net["conv1_1"].stride, crop=net["conv1_1"].pad, W=net["conv1_1"].W, flip_filters=not net["conv1_1"].flip_filters, nonlinearity=None) last = net["bn"] = BatchNormLayer(last, beta=nn.init.Constant(128.), gamma=nn.init.Constant(25.)) return last, net
def build_model(self): # reshape to [batch, color, x, y] to allow for convolution layers to work correctly observation_reshape = DimshuffleLayer(self.observation_layer, (0, 3, 1, 2)) observation_reshape = Pool2DLayer(observation_reshape, pool_size=(2, 2)) # memory window_size = 5 # prev state input prev_window = InputLayer( (None, window_size) + tuple(observation_reshape.output_shape[1:]), name="previous window state") # our window memory_layer = WindowAugmentation(observation_reshape, prev_window, name="new window state") memory_dict = {memory_layer: prev_window} # pixel-wise maximum over the temporal window (to avoid flickering) memory_layer = ExpressionLayer(memory_layer, lambda a: a.max(axis=1), output_shape=(None, ) + memory_layer.output_shape[2:]) # neural network body nn = batch_norm( lasagne.layers.Conv2DLayer(memory_layer, num_filters=16, filter_size=(8, 8), stride=(4, 4))) nn = batch_norm( lasagne.layers.Conv2DLayer(nn, num_filters=32, filter_size=(4, 4), stride=(2, 2))) nn = batch_norm(lasagne.layers.DenseLayer(nn, num_units=256)) # q_eval policy_layer = DenseLayer(nn, num_units=self.n_actions, nonlinearity=lasagne.nonlinearities.linear, name="QEvaluator") # resolver resolver = EpsilonGreedyResolver(policy_layer, name="resolver") # all together agent = Agent(self.observation_layer, memory_dict, policy_layer, resolver) return resolver, agent
def createXYZTCropLayer(input_layer_4d, xyz_layer, theta_layer, max_scale, out_width, name=None): input_layer_shape = get_output_shape(input_layer_4d) batch_size = input_layer_shape[0] new_width = out_width new_height = out_width # ratio to reduce to patch size from original reduc_ratio = (np.cast[floatX](out_width) / np.cast[floatX](input_layer_shape[3])) # merge xyz and t layers together to form xyzt xyzt_layer = ConcatLayer([xyz_layer, theta_layer]) # create a param layer from xyz layer def xyzt_2_param(xyzt): # get individual xyz dx = xyzt[:, 0] # x and y are already between -1 and 1 dy = xyzt[:, 1] # x and y are already between -1 and 1 z = xyzt[:, 2] t = xyzt[:, 3] # compute the resize from the largest scale image dr = (np.cast[floatX](reduc_ratio) * np.cast[floatX](2.0)**z / np.cast[floatX](max_scale)) # dimshuffle before concatenate params = [ dr * T.cos(t), -dr * T.sin(t), dx, dr * T.sin(t), dr * T.cos(t), dy ] params = [_p.flatten().dimshuffle(0, 'x') for _p in params] # concatenate to have (1 0 0 0 1 0) when identity transform return T.concatenate(params, axis=1) param_layer = ExpressionLayer(xyzt_layer, xyzt_2_param, output_shape=(batch_size, 6)) resize_layer = TransformerLayer(input_layer_4d, param_layer, new_height, new_width, name=name) return resize_layer
def build_baseline1_small(input_var): """ Most simplistic model possible. Effectively only uses last batch norm layer """ net = OrderedDict() # Input, standardization last = net['input'] = InputLayer( (None, 3, tools.INP_PSIZE, tools.INP_PSIZE), input_var=input_var) last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x)) last = net["middle"] = ConvLayer(last, 3, 1, nonlinearity=linear) last = net["bn"] = BatchNormLayer(last, beta=nn.init.Constant(128.), gamma=nn.init.Constant(25.)) return last, net
def nn_upsample(upsample_in, num_styles=None, num_filters=None, filter_size=3, stride=1): if num_filters == None: num_filters = upsample_in.output_shape[1] nn_network = ExpressionLayer(upsample_in, lambda X: X.repeat(2, 2).repeat(2, 3), output_shape='auto') nn_network = style_conv_block(nn_network, num_styles, num_filters, filter_size, stride) return nn_network
def init_nn_structure(self, seq_length, pred_len): """ Inits network structure :param seq_length: number of features :type seq_length: int :param pred_len: number of predicted values (target dimensionality) :type pred_len: int :return: None """ self.iteration = 0 theano_input = T.tensor3() theano_output = T.matrix() from lasagne.layers import InputLayer, LSTMLayer, DenseLayer, ExpressionLayer, ConcatLayer from lasagne.nonlinearities import tanh model = {} model['input_layer'] = InputLayer((None, seq_length, 1), input_var=theano_input) lst_concat = [] for i, key in enumerate(self.feature_dict.keys()): if self.feature_dict[key] is None or len(self.feature_dict[key]) == 0: continue model['input_slice_' + str(i)] = ExpressionLayer(model['input_layer'], lambda X: X[:,self.feature_dict[key],:]) num_units = self.num_lstm_units_large if len(self.feature_dict[key]) > 10 else self.num_lstm_units_small model['hidden_layer_' + str(i) + '_1'] = LSTMLayer(model['input_slice_' + str(i)], num_units, grad_clipping=self.grad_clip, nonlinearity=tanh) model['hidden_layer_' + str(i) + '_2'] = LSTMLayer(model['hidden_layer_' + str(i) + '_1'], num_units, grad_clipping=self.grad_clip, nonlinearity=tanh, only_return_final=True) lst_concat.append(model['hidden_layer_' + str(i) + '_2']) model['concatenate_hidden'] = ConcatLayer(lst_concat, axis=1) model['output_layer'] = DenseLayer(model['concatenate_hidden'], pred_len, nonlinearity=None) model_output = lasagne.layers.get_output(model['output_layer']) params = lasagne.layers.get_all_params(model['output_layer'], trainable=True) self.loss = lasagne.objectives.squared_error(model_output, theano_output).mean() self.lr = theano.shared(np.array(self.learning_rate, dtype='float32')) self.updates = lasagne.updates.adam(self.loss, params, learning_rate=self.lr) self.l_out = model['output_layer'] self.trainT = theano.function([theano_input, theano_output], self.loss, updates=self.updates) self.compute_cost = theano.function([theano_input, theano_output], self.loss) self.forecast = theano.function([theano_input], model_output) '''
def setup_transform_net(self, input_var=None): transform_net = InputLayer(shape=self.shape, input_var=input_var) transform_net = style_conv_block(transform_net, self.num_styles, 32, 9, 1) transform_net = style_conv_block(transform_net, self.num_styles, 64, 3, 2) transform_net = style_conv_block(transform_net, self.num_styles, 128, 3, 2) for _ in range(5): transform_net = residual_block(transform_net, self.num_styles) transform_net = nn_upsample(transform_net, self.num_styles) transform_net = nn_upsample(transform_net, self.num_styles) if self.net_type == 0: transform_net = style_conv_block(transform_net, self.num_styles, 3, 9, 1, tanh) transform_net = ExpressionLayer(transform_net, lambda X: 150. * X, output_shape=None) elif self.net_type == 1: transform_net = style_conv_block(transform_net, self.num_styles, 3, 9, 1, sigmoid) self.network['transform_net'] = transform_net
def build_sb_resnet_phase(prev_layer, n_out, count, stride): remaining_sticks = [] # Initial stick length is 1. stick = ExpressionLayer(prev_layer, function=lambda X: T.ones((X.shape[0], 1)), output_shape=(None, 1)) layer, remaining_stick = build_bottleneck_sb_residual_layer( prev_layer, n_out, stride, stick) remaining_sticks.append(remaining_stick) for _ in range(count - 1): layer, remaining_stick = build_bottleneck_sb_residual_layer( layer, n_out, stride=(1, 1), remaining_stick=remaining_stick) remaining_sticks.append(remaining_stick) # Compute posteriors posterior_a = ConcatLayer( [_remaining_stick.kumar_a for _remaining_stick in remaining_sticks], axis=1) posterior_b = ConcatLayer( [_remaining_stick.kumar_b for _remaining_stick in remaining_sticks], axis=1) stick_lengths = ConcatLayer(remaining_sticks, axis=1) return layer, (posterior_a, posterior_b, stick_lengths)
def build(myNet, idxSiam, verbose=True): INITIALIZATION_GAIN = 1.0 # ----------------------------------------------------------------------------- # input layer (2d croped patch) # myNet.layers[idxSiam]['ori-input'] # ----------------------------------------------------------------------------- # 3x Convolution and Max Pooling layers # -------------- # Conv 0 if idxSiam == 0: W_init = HeNormal(gain=INITIALIZATION_GAIN) # W_init = Constant(0.0) b_init = Constant(0.0) else: W_init = myNet.layers[0]['ori-c0'].W b_init = myNet.layers[0]['ori-c0'].b myNet.layers[idxSiam]['ori-c0'] = Conv2DLayer( myNet.layers[idxSiam]['ori-input'], num_filters=10, filter_size=5, W=W_init, b=b_init, nonlinearity=None, flip_filters=False, name='ori-c0', ) # Activation 0 myNet.layers[idxSiam]['ori-c0a'] = NonlinearityLayer( myNet.layers[idxSiam]['ori-c0'], nonlinearity=relu, name='ori-c0a', ) # Pool 0 myNet.layers[idxSiam]['ori-c0p'] = MaxPool2DLayer( myNet.layers[idxSiam]['ori-c0a'], pool_size=2, name='ori-c0p', ) # -------------- # Conv 1 if idxSiam == 0: W_init = HeNormal(gain=INITIALIZATION_GAIN) # W_init = Constant(0.0) b_init = Constant(0.0) else: W_init = myNet.layers[0]['ori-c1'].W b_init = myNet.layers[0]['ori-c1'].b myNet.layers[idxSiam]['ori-c1'] = Conv2DLayer( myNet.layers[idxSiam]['ori-c0p'], num_filters=20, filter_size=5, W=W_init, b=b_init, nonlinearity=None, flip_filters=False, name='ori-c1', ) # Activation 1 myNet.layers[idxSiam]['ori-c1a'] = NonlinearityLayer( myNet.layers[idxSiam]['ori-c1'], nonlinearity=relu, name='ori-c1a', ) # Pool 1 myNet.layers[idxSiam]['ori-c1p'] = MaxPool2DLayer( myNet.layers[idxSiam]['ori-c1a'], pool_size=2, name='ori-c1p', ) # -------------- # Conv 2 if idxSiam == 0: W_init = HeNormal(gain=INITIALIZATION_GAIN) # W_init = Constant(0.0) b_init = Constant(0.0) else: W_init = myNet.layers[0]['ori-c2'].W b_init = myNet.layers[0]['ori-c2'].b myNet.layers[idxSiam]['ori-c2'] = Conv2DLayer( myNet.layers[idxSiam]['ori-c1p'], num_filters=50, filter_size=3, W=W_init, b=b_init, nonlinearity=None, flip_filters=False, name='ori-c2', ) # Activation 2 myNet.layers[idxSiam]['ori-c2a'] = NonlinearityLayer( myNet.layers[idxSiam]['ori-c2'], nonlinearity=relu, name='ori-c2a', ) # Pool 2 myNet.layers[idxSiam]['ori-c2p'] = MaxPool2DLayer( myNet.layers[idxSiam]['ori-c2a'], pool_size=2, name='ori-c2p', ) # ----------------------------------------------------------------------------- # Fully Connected Layers # -------------- # FC 3 nu = 100 ns = 4 nm = 4 if idxSiam == 0: W_init = HeNormal(gain=INITIALIZATION_GAIN) # W_init = Constant(0.0) b_init = Constant(0.0) else: W_init = myNet.layers[0]['ori-f3'].W b_init = myNet.layers[0]['ori-f3'].b myNet.layers[idxSiam]['ori-f3'] = DenseLayer( myNet.layers[idxSiam]['ori-c2a'], num_units=nu * ns * nm, W=W_init, b=b_init, nonlinearity=None, name='ori-f3', ) # Activation 3 myNet.layers[idxSiam]['ori-f3a'] = GHHFeaturePoolLayer( myNet.layers[idxSiam]['ori-f3'], num_in_sum=ns, num_in_max=nm, max_strength=myNet.config.max_strength, name='ori-f3a', ) # Dropout 3 myNet.layers[idxSiam]['ori-f3d'] = DropoutLayer( myNet.layers[idxSiam]['ori-f3a'], p=0.3, name='ori-f3d', ) # -------------- # FC 4 nu = 2 ns = 4 nm = 4 if idxSiam == 0: W_init = HeNormal(gain=INITIALIZATION_GAIN) # W_init = Constant(0.0) b_init = Constant(0.0) else: W_init = myNet.layers[0]['ori-f4'].W b_init = myNet.layers[0]['ori-f4'].b myNet.layers[idxSiam]['ori-f4'] = DenseLayer( myNet.layers[idxSiam]['ori-f3d'], num_units=nu * ns * nm, W=W_init, b=b_init, nonlinearity=None, name='ori-f4', ) # Activation 4 myNet.layers[idxSiam]['ori-f4a'] = GHHFeaturePoolLayer( myNet.layers[idxSiam]['ori-f4'], num_in_sum=ns, num_in_max=nm, max_strength=myNet.config.max_strength, name='ori-f4a', ) # ----------------------------------------------------------------------------- # Arctan2 Layer myNet.layers[idxSiam]['ori-output'] = ExpressionLayer( myNet.layers[idxSiam]['ori-f4a'], lambda x: CT.custom_arctan2(x[:, 0], x[:, 1]).flatten().dimshuffle( 0, 'x'), output_shape=(myNet.config.batch_size, 1), name='ori-output', )
def build_generator_lstm(input_var, noise_size, cond_var=None, n_conds=0, arch='lstm', with_BatchNorm=True, batch_size=None, n_steps=None): from lasagne.layers import ( InputLayer, DenseLayer, LSTMLayer, ReshapeLayer, DimshuffleLayer, concat, ExpressionLayer, NonlinearityLayer, DropoutLayer) from lasagne.init import Constant, HeNormal from lasagne.nonlinearities import rectify, softmax non_lin = rectify layer = InputLayer( shape=(batch_size, n_steps, noise_size), input_var=input_var) if cond_var is not None: layer = BatchNorm(DenseLayer( layer, noise_size, nonlinearity=non_lin), with_BatchNorm) layer = concat( [layer, InputLayer(shape=(batch_size, n_steps, n_conds), input_var=cond_var)]) if arch == 'lstm': layer = batch_norm(DenseLayer(layer, 1024, num_leading_axes=2)) # recurrent layers for bidirectional network l_forward_noise = BatchNorm(LSTMLayer( layer, 512, learn_init=True, grad_clipping=100, only_return_final=False), with_BatchNorm) l_backward_noise = BatchNorm(LSTMLayer( layer, 512, learn_init=True, grad_clipping=100, only_return_final=False, backwards=True), with_BatchNorm) layer = concat([l_forward_noise, l_backward_noise], axis=2) # dense layers layer = BatchNorm(DenseLayer( layer, 1024, num_leading_axes=2), with_BatchNorm) layer = BatchNorm(DenseLayer( layer, 128, num_leading_axes=2), with_BatchNorm) # reshape to apply softmax per timestep layer = ReshapeLayer(layer, (-1, [2])) layer = NonlinearityLayer(layer, softmax) layer = ReshapeLayer(layer, (input_var.shape[0], -1, [1])) layer = DimshuffleLayer(layer, (0, 'x', 2, 1)) layer = ExpressionLayer(layer, lambda X: X*2 - 1) elif arch == 1: # input layers l_in = InputLayer( shape=params['input_shape'], input_var=params['input_var'], name='g_in') l_noise = InputLayer( shape=params['noise_shape'], input_var=params['noise_var'], name='g_noise') l_cond = InputLayer( shape=params['cond_shape'], input_var=params['cond_var'], name='g_cond') l_mask = InputLayer( shape=params['mask_shape'], input_var=params['mask_var'], name='g_mask') # recurrent layers for bidirectional network l_forward_data = LSTMLayer( l_in, params['n_units'][0], mask_input=l_mask, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True, grad_clipping=params['grad_clip'], only_return_final=False, nonlinearity=params['non_linearities'][0]) l_forward_noise = LSTMLayer( l_noise, params['n_units'][0], mask_input=l_mask, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True, grad_clipping=params['grad_clip'], only_return_final=False, nonlinearity=params['non_linearities'][1]) l_backward_data = LSTMLayer( l_in, params['n_units'][0], mask_input=l_mask, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True, grad_clipping=params['grad_clip'], only_return_final=False, backwards=True, nonlinearity=params['non_linearities'][0]) l_backward_noise = LSTMLayer( l_noise, params['n_units'][0], mask_input=l_mask, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True, grad_clipping=params['grad_clip'], only_return_final=False, backwards=True, nonlinearity=params['non_linearities'][1]) # concatenate output of forward and backward layers l_lstm_concat = concat( [l_forward_data, l_forward_noise, l_backward_data, l_backward_noise], axis=2) # dense layer on output of data and noise lstms, w/dropout l_lstm_dense = DenseLayer( DropoutLayer(l_lstm_concat, p=0.5), num_units=params['n_units'][1], num_leading_axes=2, W=HeNormal(gain='relu'), b=Constant(0.1), nonlinearity=params['non_linearities'][2]) # batch norm for lstm dense # l_lstm_dense = lasagne.layer.BatchNorm(l_lstm_dense) # concatenate dense layer of lstsm with condition l_lstm_cond_concat = concat( [l_lstm_dense, l_cond], axis=2) # dense layer with dense layer lstm and condition, w/dropout l_out = DenseLayer( DropoutLayer(l_lstm_cond_concat, p=0.5), num_units=params['n_units'][2], num_leading_axes=2, W=HeNormal(gain=1.0), b=Constant(0.1), nonlinearity=params['non_linearities'][3]) elif arch == 2: raise Exception("arch 2 not implemented") elif arch == 3: raise Exception("arch 2 not implemented") print("Generator output:", layer.output_shape) return layer
def build_fan_reworked(input_var, nb_filter=16, input_size=(None, 3, tools.INP_PSIZE, tools.INP_PSIZE)): net = OrderedDict() # Input, standardization last = net['input'] = InputLayer(input_size, input_var=input_var) last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x)) # load feature encoder feats = get_features(last) net['features_s8_1'] = feats["conv4_4"] net['features_s8_2'] = feats["conv4_1"] net['features_s4'] = feats["conv3_3"] # Pretrained Encoder as before last = net["conv1_1"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_1"] = layers.NonUpdateBatchNormLayer(last) last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify) last = net["conv1_2"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_2"] = layers.NonUpdateBatchNormLayer(last) last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify) # feature aggregation at multiple scales last = net["bn1"] = layers.NonUpdateBatchNormLayer(last, beta=None, gamma=None) last = fan_module_improved(last, net, "s8_1", net['features_s8_1'], nb_filter=nb_filter, scale=8, upsampling_strategy="repeat") last = net["bn2"] = layers.NonUpdateBatchNormLayer(last, beta=None, gamma=None) last = fan_module_improved(last, net, "s8_2", net['features_s8_2'], nb_filter=nb_filter, scale=8, upsampling_strategy="repeat") last = net["bn3"] = layers.NonUpdateBatchNormLayer(last, beta=None, gamma=None) last = fan_module_improved(last, net, "s4", net['features_s4'], nb_filter=nb_filter, scale=4, upsampling_strategy="repeat") # unclear if Fixed, NonUpdate or Regular Layer will work best... last = net["bn4"] = BatchNormLayer(last) # Decoder as before last = net["deconv1_2"] = transpose(last, net["conv1_2"], nonlinearity=None) last = net["deconv1_1"] = transpose(last, net["conv1_1"], nonlinearity=None) return last, net
def test_space_invaders( game_title='SpaceInvaders-v0', n_parallel_games=3, replay_seq_len=2, ): """ :param game_title: name of atari game in Gym :param n_parallel_games: how many games we run in parallel :param replay_seq_len: how long is one replay session from a batch """ atari = gym.make(game_title) atari.reset() # Game Parameters n_actions = atari.action_space.n observation_shape = (None, ) + atari.observation_space.shape action_names = atari.get_action_meanings() del atari # ##### Agent observations # image observation at current tick goes here observation_layer = InputLayer(observation_shape, name="images input") # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2)) # Agent memory states window_size = 3 # prev state input prev_window = InputLayer( (None, window_size) + tuple(observation_reshape.output_shape[1:]), name="previous window state") # our window window = WindowAugmentation(observation_reshape, prev_window, name="new window state") memory_dict = {window: prev_window} # ##### Neural network body # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc # pixel-wise maximum over the temporal window (to avoid flickering) window_max = ExpressionLayer(window, lambda a: a.max(axis=1), output_shape=(None, ) + window.output_shape[2:]) # a simple lasagne network (try replacing with any other lasagne network and see what works best) nn = DenseLayer(window_max, num_units=50, name='dense0') # Agent policy and action picking q_eval = DenseLayer(nn, num_units=n_actions, nonlinearity=lasagne.nonlinearities.linear, name="QEvaluator") #fakes for a2c policy_eval = DenseLayer(nn, num_units=n_actions, nonlinearity=lasagne.nonlinearities.softmax, name="a2c action probas") state_value_eval = DenseLayer(nn, num_units=1, nonlinearity=None, name="a2c state values") # resolver resolver = ProbabilisticResolver(policy_eval, name="resolver") # agent agent = Agent(observation_layer, memory_dict, (q_eval, policy_eval, state_value_eval), resolver) # Since it's a single lasagne network, one can get it's weights, output, etc weights = lasagne.layers.get_all_params(resolver, trainable=True) # Agent step function print('compiling react') applier_fun = agent.get_react_function() # a nice pythonic interface def step(observation, prev_memories='zeros', batch_size=n_parallel_games): """ returns actions and new states given observation and prev state Prev state in default setup should be [prev window,]""" # default to zeros if prev_memories == 'zeros': prev_memories = [ np.zeros((batch_size, ) + tuple(mem.output_shape[1:]), dtype='float32') for mem in agent.agent_states ] res = applier_fun(np.array(observation), *prev_memories) action = res[0] memories = res[1:] return action, memories # # Create and manage a pool of atari sessions to play with pool = GamePool(game_title, n_parallel_games) observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50) print(np.array(action_names)[np.array(action_log)[:3, :5]]) # # experience replay pool # Create an environment with all default parameters env = SessionPoolEnvironment(observations=observation_layer, actions=resolver, agent_memories=agent.agent_states) def update_pool(env, pool, n_steps=100): """ a function that creates new sessions and ads them into the pool throwing the old ones away entirely for simplicity""" preceding_memory_states = list(pool.prev_memory_states) # get interaction sessions observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact( step, n_steps=n_steps) # load them into experience replay environment env.load_sessions(observation_tensor, action_tensor, reward_tensor, is_alive_tensor, preceding_memory_states) # load first sessions update_pool(env, pool, replay_seq_len) # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them. # ### Training via experience replay # get agent's Q-values, policy, etc obtained via experience replay _env_states, _observations, _memories, _imagined_actions, estimators = agent.get_sessions( env, session_length=replay_seq_len, batch_size=env.batch_size, optimize_experience_replay=True, ) (q_values_sequence, policy_sequence, value_sequence) = estimators # Evaluating loss function scaled_reward_seq = env.rewards # For SpaceInvaders, however, not scaling rewards is at least working elwise_mse_loss = 0. #1-step algos for algo in qlearning, sarsa: elwise_mse_loss += algo.get_elementwise_objective( q_values_sequence, env.actions[0], scaled_reward_seq, env.is_alive, gamma_or_gammas=0.99, ) #qlearning_n_step for n in (1, 3, replay_seq_len - 1, replay_seq_len, replay_seq_len + 1, None): elwise_mse_loss += qlearning_n_step.get_elementwise_objective( q_values_sequence, env.actions[0], scaled_reward_seq, env.is_alive, gamma_or_gammas=0.99, n_steps=n) #a2c n_step elwise_mse_loss += a2c_n_step.get_elementwise_objective( policy_sequence, value_sequence[:, :, 0], env.actions[0], scaled_reward_seq, env.is_alive, gamma_or_gammas=0.99, n_steps=3) # compute mean over "alive" fragments mse_loss = elwise_mse_loss.sum() / env.is_alive.sum() # regularize network weights reg_l2 = regularize_network_params(resolver, l2) * 10**-4 loss = mse_loss + reg_l2 # Compute weight updates updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01) # mean session reward mean_session_reward = env.rewards.sum(axis=1).mean() # # Compile train and evaluation functions print('compiling') train_fun = theano.function([], [loss, mean_session_reward], updates=updates) evaluation_fun = theano.function( [], [loss, mse_loss, reg_l2, mean_session_reward]) print("I've compiled!") # # Training loop for epoch_counter in range(10): update_pool(env, pool, replay_seq_len) loss, avg_reward = train_fun() full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun() print("epoch %i,loss %.5f, rewards: %.5f " % (epoch_counter, full_loss, avg_reward_current)) print("rec %.3f reg %.3f" % (q_loss, l2_penalty))
def build_big_fan(input_var, nb_filter=96, input_size=(None, 3, tools.INP_PSIZE, tools.INP_PSIZE)): net = OrderedDict() # Input, standardization last = net['input'] = InputLayer(input_size, input_var=input_var) last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x)) # load feature encoder f = get_features(last) net['features_s8'] = f["conv4_1"] net['features_s4'] = f["conv3_3"] # Pretrained Encoder as before last = net["conv1_1"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_1"] = layers.NonUpdateBatchNormLayer(last) last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify) last = net["conv1_2"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_2"] = layers.NonUpdateBatchNormLayer(last) last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify) # Modified Middle Part last = net["middle"] = ConvLayer(last, nb_filter, 1, nonlinearity=linear) # feature aggregation at multiple scales last = net["bn1"] = layers.NonUpdateBatchNormLayer(last) last = fan_module_simple(last, net, "s8", net['features_s8'], nb_filter=nb_filter, scale=8) last = net["bn1"] = layers.NonUpdateBatchNormLayer(last) last = fan_module_simple(last, net, "s8", net['features_s8'], nb_filter=nb_filter, scale=8) last = net["bn3"] = layers.NonUpdateBatchNormLayer(last) last = fan_module_simple(last, net, "s4", net['features_s4'], nb_filter=nb_filter, scale=4) last = net["bn4"] = layers.NonUpdateBatchNormLayer(last) last = fan_module_simple(last, net, "s4", net['features_s4'], nb_filter=nb_filter, scale=4) last = net["bn5"] = layers.NonUpdateBatchNormLayer(last) # Decoder as before last = net["deconv1_2"] = transpose(last, net["conv1_2"], nonlinearity=None) last = net["deconv1_1"] = transpose(last, net["conv1_1"], nonlinearity=None) return last, net
def architecture(input_var, input_shape, cfg): layer = InputLayer(input_shape, input_var) # filterbank, if any if cfg['filterbank'] == 'mel': import audio filterbank = audio.create_mel_filterbank(cfg['sample_rate'], cfg['frame_len'], cfg['mel_bands'], cfg['mel_min'], cfg['mel_max']) filterbank = filterbank[:input_shape[3]].astype(theano.config.floatX) layer = DenseLayer(layer, num_units=cfg['mel_bands'], num_leading_axes=-1, W=T.constant(filterbank), b=None, nonlinearity=None) elif cfg['filterbank'] == 'mel_learn': layer = MelBankLayer(layer, cfg['sample_rate'], cfg['frame_len'], cfg['mel_bands'], cfg['mel_min'], cfg['mel_max']) elif cfg['filterbank'] != 'none': raise ValueError("Unknown filterbank=%s" % cfg['filterbank']) # magnitude transformation, if any if cfg['magscale'] == 'log': layer = ExpressionLayer(layer, lambda x: T.log(T.maximum(1e-7, x))) elif cfg['magscale'] == 'log1p': layer = ExpressionLayer(layer, T.log1p) elif cfg['magscale'].startswith('log1p_learn'): # learnable log(1 + 10^a * x), with given initial a (or default 0) a = float(cfg['magscale'][len('log1p_learn'):] or 0) a = T.exp(theano.shared(lasagne.utils.floatX(a))) layer = lasagne.layers.ScaleLayer(layer, scales=a, shared_axes=(0, 1, 2, 3)) layer = ExpressionLayer(layer, T.log1p) elif cfg['magscale'].startswith('pow_learn'): # learnable x^sigmoid(a), with given initial a (or default 0) a = float(cfg['magscale'][len('pow_learn'):] or 0) a = T.nnet.sigmoid(theano.shared(lasagne.utils.floatX(a))) layer = PowLayer(layer, exponent=a) elif cfg['magscale'] == 'pcen': layer = PCENLayer(layer) if cfg.get('pcen_fix_alpha'): layer.params[layer.log_alpha].remove("trainable") elif cfg['magscale'] == 'loudness_only': # cut away half a block length on the left and right layer = lasagne.layers.SliceLayer(layer, slice(cfg['blocklen'] // 2, -(cfg['blocklen'] // 2)), axis=2) # average over the frequencies and channels layer = lasagne.layers.ExpressionLayer( layer, lambda X: X.mean(axis=(1, 3), keepdims=True), lambda shp: (shp[0], 1, shp[2], 1)) elif cfg['magscale'] != 'none': raise ValueError("Unknown magscale=%s" % cfg['magscale']) # temporal difference, if any if cfg['arch.timediff']: layer = TimeDiffLayer(layer, delta=cfg['arch.timediff']) # standardization per frequency band if cfg.get('input_norm', 'batch') == 'batch': layer = batch_norm_vanilla(layer, axes=(0, 2), beta=None, gamma=None) elif cfg['input_norm'] == 'instance': layer = lasagne.layers.StandardizationLayer(layer, axes=2) elif cfg['input_norm'] == 'none': pass else: raise ValueError("Unknown input_norm=%s" % cfg['input_norm']) # convolutional neural network kwargs = dict(nonlinearity=lasagne.nonlinearities.leaky_rectify, W=lasagne.init.Orthogonal()) maybe_batch_norm = batch_norm if cfg['arch.batch_norm'] else lambda x: x if cfg['arch.convdrop'] == 'independent': maybe_dropout = lambda x: dropout(x, 0.1) elif cfg['arch.convdrop'] == 'channels': maybe_dropout = lambda x: dropout(x, 0.1, shared_axes=(2, 3)) elif cfg['arch.convdrop'] == 'bands': maybe_dropout = lambda x: dropout(x, 0.1, shared_axes=(1, 2)) elif cfg['arch.convdrop'] == 'none': maybe_dropout = lambda x: x else: raise ValueError("Unknown arch.convdrop=%s" % cfg['arch.convdrop']) if cfg['arch'] == 'dense:16': layer = DenseLayer(layer, 16, **kwargs) layer = DenseLayer(layer, 1, nonlinearity=lasagne.nonlinearities.sigmoid, W=lasagne.init.Orthogonal()) return layer convmore = cfg['arch.convmore'] layer = Conv2DLayer(layer, int(64 * convmore), 3, **kwargs) if cfg.get('arch.firstconv_zeromean', False) == 'params': layer.W = layer.W - T.mean(layer.W, axis=(2, 3), keepdims=True) layer = maybe_batch_norm(layer) layer = maybe_dropout(layer) layer = Conv2DLayer(layer, int(32 * convmore), 3, **kwargs) layer = maybe_batch_norm(layer) layer = MaxPool2DLayer(layer, 3) layer = maybe_dropout(layer) layer = Conv2DLayer(layer, int(128 * convmore), 3, **kwargs) layer = maybe_batch_norm(layer) layer = maybe_dropout(layer) layer = Conv2DLayer(layer, int(64 * convmore), 3, **kwargs) layer = maybe_batch_norm(layer) if cfg['arch'] == 'ismir2015': layer = MaxPool2DLayer(layer, 3) elif cfg['arch'] == 'ismir2016': layer = maybe_dropout(layer) layer = Conv2DLayer(layer, int(128 * convmore), (3, layer.output_shape[3] - 3), **kwargs) layer = maybe_batch_norm(layer) layer = MaxPool2DLayer(layer, (1, 4)) else: raise ValueError('Unknown arch=%s' % cfg['arch']) layer = DenseLayer(dropout(layer, 0.5), 256, **kwargs) layer = maybe_batch_norm(layer) layer = DenseLayer(dropout(layer, 0.5), 64, **kwargs) layer = maybe_batch_norm(layer) layer = DenseLayer(dropout(layer, 0.5), 1, nonlinearity=lasagne.nonlinearities.sigmoid, W=lasagne.init.Orthogonal()) return layer
def build_baseline9_fan_fan_bilinear(input_var, nb_filter=96): net = OrderedDict() import theano.tensor as T import numpy as np # Input, standardization last = net['input'] = InputLayer( (None, 3, tools.INP_PSIZE, tools.INP_PSIZE), input_var=input_var) last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x)) # load feature encoder net['features_s8'] = get_features(last)["conv4_1"] net['features_s4'] = get_features(last)["conv3_3"] net['mask'] = ExpressionLayer( layers.upsample(net["features_s8"], 8, mode="bilinear"), lambda x: 1. * T.eq(x, x.max(axis=1, keepdims=True))) # Pretrained Encoder as before last = net["conv1_1"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_1"] = BatchNormLayer(last) last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify) last = net["conv1_2"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_2"] = BatchNormLayer(last) last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify) # Modified Middle Part last = net["middle"] = ConvLayer(last, nb_filter, 1, nonlinearity=linear) # feature aggregation at multiple scales last = net["fan1"] = FeatureAwareNormLayer((last, net['mask'])) last = fan_module_simple(last, net, "s8", net['features_s8'], nb_filter=nb_filter, scale=8, upsampling_strategy="bilinear") last = net["fan2"] = FeatureAwareNormLayer((last, net['mask'])) last = fan_module_simple(last, net, "s4", net['features_s4'], nb_filter=nb_filter, scale=4, upsampling_strategy="bilinear") # Decoder as before last = net["deconv1_2"] = transpose(last, net["conv1_2"], nonlinearity=None) last = net["deconv1_1"] = transpose(last, net["conv1_1"], nonlinearity=None) last = net["fan"] = FeatureAwareNormLayer( (last, net['mask']), beta=nn.init.Constant(np.float32(128.)), gamma=nn.init.Constant(np.float32(25.))) return last, net
def build_finetuned2_fan(input_var, nb_filter=96, input_size=(None, 3, tools.INP_PSIZE, tools.INP_PSIZE)): net = OrderedDict() # Input, standardization last = net['input'] = InputLayer(input_size, input_var=input_var) last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x)) # load feature encoder # TODO this is clearly a bug. only for compatibility reasons. remove once all weights are converted net['features_s8'] = get_features(last)["conv4_1"] net['features_s4'] = get_features(last)["conv3_3"] # Pretrained Encoder as before last = net["conv1_1"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_1"] = layers.NonUpdateBatchNormLayer(last) last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify) last = net["conv1_2"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_2"] = layers.NonUpdateBatchNormLayer(last) last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify) # Modified Middle Part last = net["middle"] = ConvLayer(last, nb_filter, 1, nonlinearity=linear) # feature aggregation at multiple scales last = net["bn1"] = layers.NonUpdateBatchNormLayer(last) last = fan_module_simple(last, net, "s8", net['features_s8'], nb_filter=nb_filter, scale=8) last = net["bn2"] = layers.NonUpdateBatchNormLayer(last) last = fan_module_simple(last, net, "s4", net['features_s4'], nb_filter=nb_filter, scale=4) # Decoder as before last = net["deconv1_2"] = transpose(last, net["conv1_2"], nonlinearity=None) last = net["deconv1_1"] = transpose(last, net["conv1_1"], nonlinearity=None) last = net["bn"] = layers.FixedBatchNormLayer(last) weights = "170123_runs/run_H.E.T._1485012575.4045253/3.npz" data = tools.load_weights(last, weights) return last, net
def build_baseline8_fan_bilinear(input_var, nb_filter=96): net = OrderedDict() # Input, standardization last = net['input'] = InputLayer( (None, 3, tools.INP_PSIZE, tools.INP_PSIZE), input_var=input_var) last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x)) # load feature encoder net['features_s8'] = get_features(last)["conv4_1"] net['features_s4'] = get_features(last)["conv3_3"] # Pretrained Encoder as before last = net["conv1_1"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_1"] = BatchNormLayer(last) last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify) last = net["conv1_2"] = ConvLayer(last, nb_filter, 1, pad=0, flip_filters=False, nonlinearity=linear) last = net["bn1_2"] = BatchNormLayer(last) last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify) # Modified Middle Part last = net["middle"] = ConvLayer(last, nb_filter, 1, nonlinearity=linear) # feature aggregation at multiple scales last = net["bn1"] = BatchNormLayer(last) last = fan_module_simple(last, net, "s8", net['features_s8'], nb_filter=nb_filter, scale=8, upsampling_strategy="bilinear") last = net["bn2"] = BatchNormLayer(last) last = fan_module_simple(last, net, "s4", net['features_s4'], nb_filter=nb_filter, scale=4, upsampling_strategy="bilinear") # Decoder as before last = net["deconv1_2"] = transpose(last, net["conv1_2"], nonlinearity=None) last = net["deconv1_1"] = transpose(last, net["conv1_1"], nonlinearity=None) last = net["bn"] = BatchNormLayer(last, beta=nn.init.Constant(128.), gamma=nn.init.Constant(25.)) return last, net
def test_memory( game_title='SpaceInvaders-v0', n_parallel_games=3, replay_seq_len=2, ): """ :param game_title: name of atari game in Gym :param n_parallel_games: how many games we run in parallel :param replay_seq_len: how long is one replay session from a batch """ atari = gym.make(game_title) atari.reset() # Game Parameters n_actions = atari.action_space.n observation_shape = (None, ) + atari.observation_space.shape action_names = atari.get_action_meanings() del atari # ##### Agent observations # image observation at current tick goes here observation_layer = InputLayer(observation_shape, name="images input") # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2)) # Agent memory states memory_dict = OrderedDict([]) ###Window window_size = 3 # prev state input prev_window = InputLayer( (None, window_size) + tuple(observation_reshape.output_shape[1:]), name="previous window state") # our window window = WindowAugmentation(observation_reshape, prev_window, name="new window state") # pixel-wise maximum over the temporal window (to avoid flickering) window_max = ExpressionLayer(window, lambda a: a.max(axis=1), output_shape=(None, ) + window.output_shape[2:]) memory_dict[window] = prev_window ###Stack #prev stack stack_w, stack_h = 4, 5 stack_inputs = DenseLayer(observation_reshape, stack_w, name="prev_stack") stack_controls = DenseLayer(observation_reshape, 3, nonlinearity=lasagne.nonlinearities.softmax, name="prev_stack") prev_stack = InputLayer((None, stack_h, stack_w), name="previous stack state") stack = StackAugmentation(stack_inputs, prev_stack, stack_controls) memory_dict[stack] = prev_stack stack_top = lasagne.layers.SliceLayer(stack, 0, 1) ###RNN preset prev_rnn = InputLayer((None, 16), name="previous RNN state") new_rnn = RNNCell(prev_rnn, observation_reshape) memory_dict[new_rnn] = prev_rnn ###GRU preset prev_gru = InputLayer((None, 16), name="previous GRUcell state") new_gru = GRUCell(prev_gru, observation_reshape) memory_dict[new_gru] = prev_gru ###GRUmemorylayer prev_gru1 = InputLayer((None, 15), name="previous GRUcell state") new_gru1 = GRUMemoryLayer(15, observation_reshape, prev_gru1) memory_dict[new_gru1] = prev_gru1 #LSTM with peepholes prev_lstm0_cell = InputLayer( (None, 13), name="previous LSTMCell hidden state [with peepholes]") prev_lstm0_out = InputLayer( (None, 13), name="previous LSTMCell output state [with peepholes]") new_lstm0_cell, new_lstm0_out = LSTMCell( prev_lstm0_cell, prev_lstm0_out, input_or_inputs=observation_reshape, peepholes=True, name="newLSTM1 [with peepholes]") memory_dict[new_lstm0_cell] = prev_lstm0_cell memory_dict[new_lstm0_out] = prev_lstm0_out #LSTM without peepholes prev_lstm1_cell = InputLayer( (None, 14), name="previous LSTMCell hidden state [no peepholes]") prev_lstm1_out = InputLayer( (None, 14), name="previous LSTMCell output state [no peepholes]") new_lstm1_cell, new_lstm1_out = LSTMCell( prev_lstm1_cell, prev_lstm1_out, input_or_inputs=observation_reshape, peepholes=False, name="newLSTM1 [no peepholes]") memory_dict[new_lstm1_cell] = prev_lstm1_cell memory_dict[new_lstm1_out] = prev_lstm1_out ##concat everything for i in [flatten(window_max), stack_top, new_rnn, new_gru, new_gru1]: print(i.output_shape) all_memory = concat([ flatten(window_max), stack_top, new_rnn, new_gru, new_gru1, new_lstm0_out, new_lstm1_out, ]) # ##### Neural network body # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc # a simple lasagne network (try replacing with any other lasagne network and see what works best) nn = DenseLayer(all_memory, num_units=50, name='dense0') # Agent policy and action picking q_eval = DenseLayer(nn, num_units=n_actions, nonlinearity=lasagne.nonlinearities.linear, name="QEvaluator") # resolver resolver = EpsilonGreedyResolver(q_eval, epsilon=0.1, name="resolver") # agent agent = Agent(observation_layer, memory_dict, q_eval, resolver) # Since it's a single lasagne network, one can get it's weights, output, etc weights = lasagne.layers.get_all_params(resolver, trainable=True) # Agent step function print('compiling react') applier_fun = agent.get_react_function() # a nice pythonic interface def step(observation, prev_memories='zeros', batch_size=n_parallel_games): """ returns actions and new states given observation and prev state Prev state in default setup should be [prev window,]""" # default to zeros if prev_memories == 'zeros': prev_memories = [ np.zeros((batch_size, ) + tuple(mem.output_shape[1:]), dtype='float32') for mem in agent.agent_states ] res = applier_fun(np.array(observation), *prev_memories) action = res[0] memories = res[1:] return action, memories # # Create and manage a pool of atari sessions to play with pool = GamePool(game_title, n_parallel_games) observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50) print(np.array(action_names)[np.array(action_log)[:3, :5]]) # # experience replay pool # Create an environment with all default parameters env = SessionPoolEnvironment(observations=observation_layer, actions=resolver, agent_memories=agent.agent_states) def update_pool(env, pool, n_steps=100): """ a function that creates new sessions and ads them into the pool throwing the old ones away entirely for simplicity""" preceding_memory_states = list(pool.prev_memory_states) # get interaction sessions observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact( step, n_steps=n_steps) # load them into experience replay environment env.load_sessions(observation_tensor, action_tensor, reward_tensor, is_alive_tensor, preceding_memory_states) # load first sessions update_pool(env, pool, replay_seq_len) # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them. # ### Training via experience replay # get agent's Q-values obtained via experience replay _env_states, _observations, _memories, _imagined_actions, q_values_sequence = agent.get_sessions( env, session_length=replay_seq_len, batch_size=env.batch_size, optimize_experience_replay=True, ) # Evaluating loss function scaled_reward_seq = env.rewards # For SpaceInvaders, however, not scaling rewards is at least working elwise_mse_loss = qlearning.get_elementwise_objective( q_values_sequence, env.actions[0], scaled_reward_seq, env.is_alive, gamma_or_gammas=0.99, ) # compute mean over "alive" fragments mse_loss = elwise_mse_loss.sum() / env.is_alive.sum() # regularize network weights reg_l2 = regularize_network_params(resolver, l2) * 10**-4 loss = mse_loss + reg_l2 # Compute weight updates updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01) # mean session reward mean_session_reward = env.rewards.sum(axis=1).mean() # # Compile train and evaluation functions print('compiling') train_fun = theano.function([], [loss, mean_session_reward], updates=updates) evaluation_fun = theano.function( [], [loss, mse_loss, reg_l2, mean_session_reward]) print("I've compiled!") # # Training loop for epoch_counter in range(10): update_pool(env, pool, replay_seq_len) loss, avg_reward = train_fun() full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun() print("epoch %i,loss %.5f, rewards: %.5f " % (epoch_counter, full_loss, avg_reward_current)) print("rec %.3f reg %.3f" % (q_loss, l2_penalty))
def build_baseline3_vgg(input_var, nb_filter=64): net = OrderedDict() def get_weights(file): with open(file, "rb") as f: vgg16 = pickle.load(f, encoding="latin-1") weights = vgg16['param values'] return weights[0], weights[1], weights[2], weights[3] # Input, standardization last = net['input'] = InputLayer( (None, 3, tools.INP_PSIZE, tools.INP_PSIZE), input_var=input_var) last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x)) # load feature encoder net['features_s8'] = get_features(last)["conv4_1"] net['features_s4'] = get_features(last)["conv3_3"] # Pretrained Encoder as before W1, b1, W2, b2 = get_weights("vgg16.pkl") last = net["conv1_1"] = ConvLayer(last, nb_filter, 3, pad=1, flip_filters=False, nonlinearity=linear, W=W1, b=b1) last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify) last = net["conv1_2"] = ConvLayer(last, nb_filter, 3, pad=1, flip_filters=False, nonlinearity=linear, W=W2, b=b2) last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify) last = net["pool"] = PoolLayer(last, 2, mode="average_exc_pad") # Modified Middle Part last = net["middle"] = ConvLayer(last, nb_filter, 1, nonlinearity=linear) # feature aggregation at multiple scales last = fan_module_simple(last, net, "s8", net['features_s8'], nb_filter=64, scale=4) last = fan_module_simple(last, net, "s4", net['features_s4'], nb_filter=64, scale=2) # Decoder as before last = net["unpool"] = Upscale2DLayer(last, 2) last = net["deconv1_2"] = transpose(last, net["conv1_2"], nonlinearity=None) last = net["deconv1_1"] = transpose(last, net["conv1_1"], nonlinearity=None) last = net["bn"] = BatchNormLayer(last, beta=nn.init.Constant(128.), gamma=nn.init.Constant(25.)) return last, net
def __init__(self, config): self.clouds = T.tensor3(dtype='float32') self.norms = [ T.tensor3(dtype='float32') for step in xrange(config['steps']) ] self.target = T.vector(dtype='int64') KDNet = {} if config['input_features'] == 'no': KDNet['input'] = InputLayer((None, 1, 2**config['steps']), input_var=self.clouds) else: KDNet['input'] = InputLayer((None, 3, 2**config['steps']), input_var=self.clouds) for i in xrange(config['steps']): KDNet['norm{}_r'.format(i + 1)] = InputLayer( (None, 3, 2**(config['steps'] - 1 - i)), input_var=self.norms[i]) KDNet['norm{}_l'.format(i + 1)] = ExpressionLayer( KDNet['norm{}_r'.format(i + 1)], lambda X: -X) KDNet['norm{}_l_X-'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_l'.format(i + 1)], '-', 0, config['n_f'][i + 1]) KDNet['norm{}_l_Y-'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_l'.format(i + 1)], '-', 1, config['n_f'][i + 1]) KDNet['norm{}_l_Z-'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_l'.format(i + 1)], '-', 2, config['n_f'][i + 1]) KDNet['norm{}_l_X+'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_l'.format(i + 1)], '+', 0, config['n_f'][i + 1]) KDNet['norm{}_l_Y+'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_l'.format(i + 1)], '+', 1, config['n_f'][i + 1]) KDNet['norm{}_l_Z+'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_l'.format(i + 1)], '+', 2, config['n_f'][i + 1]) KDNet['norm{}_r_X-'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_r'.format(i + 1)], '-', 0, config['n_f'][i + 1]) KDNet['norm{}_r_Y-'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_r'.format(i + 1)], '-', 1, config['n_f'][i + 1]) KDNet['norm{}_r_Z-'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_r'.format(i + 1)], '-', 2, config['n_f'][i + 1]) KDNet['norm{}_r_X+'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_r'.format(i + 1)], '+', 0, config['n_f'][i + 1]) KDNet['norm{}_r_Y+'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_r'.format(i + 1)], '+', 1, config['n_f'][i + 1]) KDNet['norm{}_r_Z+'.format(i + 1)] = SPTNormReshapeLayer( KDNet['norm{}_r'.format(i + 1)], '+', 2, config['n_f'][i + 1]) KDNet['cloud{}'.format(i+1)] = SharedDotLayer(KDNet['input'], config['n_f'][i]) if i == 0 else \ ElemwiseSumLayer([KDNet['cloud{}_l_X-_masked'.format(i)], KDNet['cloud{}_l_Y-_masked'.format(i)], KDNet['cloud{}_l_Z-_masked'.format(i)], KDNet['cloud{}_l_X+_masked'.format(i)], KDNet['cloud{}_l_Y+_masked'.format(i)], KDNet['cloud{}_l_Z+_masked'.format(i)], KDNet['cloud{}_r_X-_masked'.format(i)], KDNet['cloud{}_r_Y-_masked'.format(i)], KDNet['cloud{}_r_Z-_masked'.format(i)], KDNet['cloud{}_r_X+_masked'.format(i)], KDNet['cloud{}_r_Y+_masked'.format(i)], KDNet['cloud{}_r_Z+_masked'.format(i)]]) KDNet['cloud{}_bn'.format(i + 1)] = BatchNormDNNLayer( KDNet['cloud{}'.format(i + 1)]) KDNet['cloud{}_relu'.format(i + 1)] = NonlinearityLayer( KDNet['cloud{}_bn'.format(i + 1)], rectify) KDNet['cloud{}_r'.format(i + 1)] = ExpressionLayer( KDNet['cloud{}_relu'.format(i + 1)], lambda X: X[:, :, 1::2], (None, config['n_f'][i], 2**(config['steps'] - i - 1))) KDNet['cloud{}_l'.format(i + 1)] = ExpressionLayer( KDNet['cloud{}_relu'.format(i + 1)], lambda X: X[:, :, ::2], (None, config['n_f'][i], 2**(config['steps'] - i - 1))) KDNet['cloud{}_l_X-'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1]) KDNet['cloud{}_l_Y-'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1]) KDNet['cloud{}_l_Z-'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1]) KDNet['cloud{}_l_X+'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1]) KDNet['cloud{}_l_Y+'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1]) KDNet['cloud{}_l_Z+'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1]) KDNet['cloud{}_r_X-'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_r'.format(i + 1)], config['n_f'][i + 1], W=KDNet['cloud{}_l_X-'.format(i + 1)].W, b=KDNet['cloud{}_l_X-'.format(i + 1)].b) KDNet['cloud{}_r_X-'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_r'.format(i + 1)], config['n_f'][i + 1], W=KDNet['cloud{}_l_X-'.format(i + 1)].W, b=KDNet['cloud{}_l_X-'.format(i + 1)].b) KDNet['cloud{}_r_Y-'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_r'.format(i + 1)], config['n_f'][i + 1], W=KDNet['cloud{}_l_Y-'.format(i + 1)].W, b=KDNet['cloud{}_l_Y-'.format(i + 1)].b) KDNet['cloud{}_r_Z-'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_r'.format(i + 1)], config['n_f'][i + 1], W=KDNet['cloud{}_l_Z-'.format(i + 1)].W, b=KDNet['cloud{}_l_Z-'.format(i + 1)].b) KDNet['cloud{}_r_X+'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_r'.format(i + 1)], config['n_f'][i + 1], W=KDNet['cloud{}_l_X+'.format(i + 1)].W, b=KDNet['cloud{}_l_X+'.format(i + 1)].b) KDNet['cloud{}_r_Y+'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_r'.format(i + 1)], config['n_f'][i + 1], W=KDNet['cloud{}_l_Y+'.format(i + 1)].W, b=KDNet['cloud{}_l_Y+'.format(i + 1)].b) KDNet['cloud{}_r_Z+'.format(i + 1)] = SharedDotLayer( KDNet['cloud{}_r'.format(i + 1)], config['n_f'][i + 1], W=KDNet['cloud{}_l_Z+'.format(i + 1)].W, b=KDNet['cloud{}_l_Z+'.format(i + 1)].b) KDNet['cloud{}_l_X-_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_l_X-'.format(i + 1)], KDNet['norm{}_l_X-'.format(i + 1)] ], T.mul) KDNet['cloud{}_l_Y-_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_l_Y-'.format(i + 1)], KDNet['norm{}_l_Y-'.format(i + 1)] ], T.mul) KDNet['cloud{}_l_Z-_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_l_Z-'.format(i + 1)], KDNet['norm{}_l_Z-'.format(i + 1)] ], T.mul) KDNet['cloud{}_l_X+_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_l_X+'.format(i + 1)], KDNet['norm{}_l_X+'.format(i + 1)] ], T.mul) KDNet['cloud{}_l_Y+_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_l_Y+'.format(i + 1)], KDNet['norm{}_l_Y+'.format(i + 1)] ], T.mul) KDNet['cloud{}_l_Z+_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_l_Z+'.format(i + 1)], KDNet['norm{}_l_Z+'.format(i + 1)] ], T.mul) KDNet['cloud{}_r_X-_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_r_X-'.format(i + 1)], KDNet['norm{}_r_X-'.format(i + 1)] ], T.mul) KDNet['cloud{}_r_Y-_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_r_Y-'.format(i + 1)], KDNet['norm{}_r_Y-'.format(i + 1)] ], T.mul) KDNet['cloud{}_r_Z-_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_r_Z-'.format(i + 1)], KDNet['norm{}_r_Z-'.format(i + 1)] ], T.mul) KDNet['cloud{}_r_X+_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_r_X+'.format(i + 1)], KDNet['norm{}_r_X+'.format(i + 1)] ], T.mul) KDNet['cloud{}_r_Y+_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_r_Y+'.format(i + 1)], KDNet['norm{}_r_Y+'.format(i + 1)] ], T.mul) KDNet['cloud{}_r_Z+_masked'.format(i + 1)] = ElemwiseMergeLayer([ KDNet['cloud{}_r_Z+'.format(i + 1)], KDNet['norm{}_r_Z+'.format(i + 1)] ], T.mul) KDNet['cloud_fin'] = ElemwiseSumLayer([ KDNet['cloud{}_l_X-_masked'.format(config['steps'])], KDNet['cloud{}_l_Y-_masked'.format(config['steps'])], KDNet['cloud{}_l_Z-_masked'.format(config['steps'])], KDNet['cloud{}_l_X+_masked'.format(config['steps'])], KDNet['cloud{}_l_Y+_masked'.format(config['steps'])], KDNet['cloud{}_l_Z+_masked'.format(config['steps'])], KDNet['cloud{}_r_X-_masked'.format(config['steps'])], KDNet['cloud{}_r_Y-_masked'.format(config['steps'])], KDNet['cloud{}_r_Z-_masked'.format(config['steps'])], KDNet['cloud{}_r_X+_masked'.format(config['steps'])], KDNet['cloud{}_r_Y+_masked'.format(config['steps'])], KDNet['cloud{}_r_Z+_masked'.format(config['steps'])] ]) KDNet['cloud_fin_bn'] = BatchNormDNNLayer(KDNet['cloud_fin']) KDNet['cloud_fin_relu'] = NonlinearityLayer(KDNet['cloud_fin_bn'], rectify) KDNet['cloud_fin_reshape'] = ReshapeLayer(KDNet['cloud_fin_relu'], (-1, config['n_f'][-1])) KDNet['output'] = DenseLayer(KDNet['cloud_fin_reshape'], config['num_classes'], nonlinearity=softmax) prob = get_output(KDNet['output']) prob_det = get_output(KDNet['output'], deterministic=True) weights = get_all_params(KDNet['output'], trainable=True) l2_pen = regularize_network_params(KDNet['output'], l2) loss = categorical_crossentropy( prob, self.target).mean() + config['l2'] * l2_pen accuracy = categorical_accuracy(prob, self.target).mean() lr = theano.shared(np.float32(config['learning_rate'])) updates = adam(loss, weights, learning_rate=lr) self.train_fun = theano.function([self.clouds] + self.norms + [self.target], [loss, accuracy], updates=updates) self.prob_fun = theano.function([self.clouds] + self.norms + [self.target], [loss, prob_det]) self.KDNet = KDNet
def residual_block(l, increase_dim=False, projection=False): input_num_filters = l.output_shape[1] if increase_dim: first_stride = (2, 2) out_num_filters = input_num_filters * 2 else: first_stride = (1, 1) out_num_filters = input_num_filters #print(l.output_shape) l_l = DenseLayer(l, num_units=l.output_shape[3], num_leading_axes=-1, nonlinearity=None) #print(l.output_shape[3]) #print("l_1.output_shape", l_l.output_shape) #stride=first_stride stack_left_1 = batch_norm( ConvLayer(l_l, num_filters=out_num_filters, filter_size=(3, 3), stride=first_stride, nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) stack_left_2 = batch_norm( ConvLayer(stack_left_1, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) #stack_right_1 = batch_norm(ConvLayer(ElemwiseSumLayer([l, NegativeLayer(l_l)]), num_filters=out_num_filters, filter_size=(2,2), stride=first_stride, nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) #stack_right_2 = batch_norm(ConvLayer(stack_right_1, num_filters=out_num_filters, filter_size=(2,2), stride=(1,1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) print("first stack: ", stack_left_2.output_shape) # add shortcut connections if increase_dim: if projection: # projection shortcut, as option B in paper projection = batch_norm( ConvLayer(l, num_filters=out_num_filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None, flip_filters=False)) print("projection shape: ", projection.output_shape) ##block = NonlinearityLayer(ElemwiseSumLayer([stack_left_2, stack_right_2, projection]),nonlinearity=rectify) block = NonlinearityLayer(ElemwiseSumLayer( [stack_left_2, projection]), nonlinearity=rectify) else: # identity shortcut, as option A in paper #print(l.output_shape[2]) if (l.output_shape[2] % 2 == 0 and l.output_shape[3] % 2 == 0): identity = ExpressionLayer( l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2] // 2, s[3] // 2)) elif (l.output_shape[2] % 2 == 0 and l.output_shape[3] % 2 == 1): identity = ExpressionLayer( l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2] // 2, s[3] // 2 + 1)) elif (l.output_shape[2] % 2 == 1 and l.output_shape[3] % 2 == 0): identity = ExpressionLayer( l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2] // 2 + 1, s[3] // 2)) else: identity = ExpressionLayer( l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2] // 2 + 1, s[3] // 2 + 1)) padding = PadLayer(identity, [(int)(out_num_filters / 4), 0, 0], batch_ndim=1) print('------------------') print(stack_left_2.output_shape) #print(stack_right_2.output_shape) print(identity.output_shape) print(padding.output_shape) #block = NonlinearityLayer(ElemwiseSumLayer([stack_left_2, stack_right_2, padding]),nonlinearity=rectify) block = NonlinearityLayer(ElemwiseSumLayer( [stack_left_2, padding]), nonlinearity=rectify) else: #block = NonlinearityLayer(ElemwiseSumLayer([stack_left_2, stack_right_2, l]),nonlinearity=rectify) print("l output shape: ", l.output_shape) block = NonlinearityLayer(ElemwiseSumLayer([stack_left_2, l]), nonlinearity=rectify) return block