def create_deep_rnn(layer, layer_class, depth, layer_mask=None, residual=False, skip_connections=False, bidir=False, dropout=None, init_state_layers=None, name=None, **kwargs): """ (Deep) RNN with possible skip/residual connections, bidirectional, dropout """ if init_state_layers: assert (len(init_state_layers) == depth) layers = [layer] for i in range(depth): if skip_connections and i > 0: layer = concat([layers[0], layer], axis=2) if init_state_layers: hid_init = init_state_layers[i] else: hid_init = init.Constant(0.) new_layer = layer_class(layer, hid_init=hid_init, mask_input=layer_mask, name=name, **kwargs) if bidir: layer_bw = layer_class(layer, mask_input=layer_mask, backwards=True, name=name, **kwargs) new_layer = concat([new_layer, layer_bw], axis=2) if residual: layer = ElemwiseSumLayer([layer, new_layer]) else: layer = new_layer if skip_connections and i == depth - 1: layer = concat([layer] + layers[1:], axis=2) if dropout: layer = DropoutLayer(layer, p=dropout) # We need to apply the mask, otherwise there are problems with multiple # layers if layer_mask and i < depth - 1: layer = apply_mask(layer, layer_mask) layers.append(layer) return layers[1:]
def build_network(self, vocab_size, input_var, mask_var, W_init): l_in = L.InputLayer(shape=(None, None, 1), input_var=input_var) l_mask = L.InputLayer(shape=(None, None), input_var=mask_var) l_embed = L.EmbeddingLayer(l_in, input_size=vocab_size, output_size=EMBED_DIM, W=W_init) l_fwd_1 = L.LSTMLayer(l_embed, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_mask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_1 = L.LSTMLayer(l_embed, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_mask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True) l_all_1 = L.concat([l_fwd_1, l_bkd_1], axis=2) l_fwd_2 = L.LSTMLayer(l_all_1, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_mask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_2 = L.LSTMLayer(l_all_1, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_mask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True) l_fwd_1_slice = L.SliceLayer(l_fwd_1, -1, 1) l_bkd_1_slice = L.SliceLayer(l_bkd_1, 0, 1) y_1 = L.ElemwiseSumLayer([l_fwd_1_slice, l_bkd_1_slice]) l_fwd_2_slice = L.SliceLayer(l_fwd_2, -1, 1) l_bkd_2_slice = L.SliceLayer(l_bkd_2, 0, 1) y_2 = L.ElemwiseSumLayer([l_fwd_2_slice, l_bkd_2_slice]) y = L.concat([y_1, y_2], axis=1) g = L.DenseLayer(y, num_units=EMBED_DIM, nonlinearity=lasagne.nonlinearities.tanh) l_out = L.DenseLayer(g, num_units=vocab_size, W=l_embed.W.T, nonlinearity=lasagne.nonlinearities.softmax) return l_out
def inception(network,no_1x1=64, no_3x3r=96, no_3x3=128, no_5x5r=16, no_5x5=32, no_pool=32): out1=layers.Conv2DLayer(network,num_filters=no_1x1,filter_size=(1,1), nonlinearity=nonLinear.leaky_rectify, W=init.GlorotUniform(gain='relu'),pad='same' ) out3=layers.Conv2DLayer(network,num_filters=no_3x3r,filter_size=(1,1), nonlinearity=nonLinear.leaky_rectify, W=init.GlorotUniform(gain='relu'),pad='same' ) out3=layers.Conv2DLayer(out3,num_filters=no_3x3,filter_size=(3,3), nonlinearity=nonLinear.leaky_rectify, W=init.GlorotUniform(gain='relu'),pad='same' ) out5=layers.Conv2DLayer(network,num_filters=no_5x5r,filter_size=(1,1), nonlinearity=nonLinear.leaky_rectify, W=init.GlorotUniform(gain='relu'),pad='same' ) out5=layers.Conv2DLayer(out5,num_filters=no_5x5,filter_size=(5,5), nonlinearity=nonLinear.leaky_rectify, W=init.GlorotUniform(gain='relu'),pad='same' ) outpool=layers.MaxPool2DLayer(network,3,stride=1,pad=1) outpool=layers.Conv2DLayer(outpool,num_filters=no_pool,filter_size=(1,1), nonlinearity=nonLinear.leaky_rectify, W=init.GlorotUniform(gain='relu'),pad='same' ) return layers.concat([out1,out3,out5,outpool])
def get_context(self, conv_in, avg=False): suf = '_avg' if avg else '' conv_out = [] # for n in [2,3,4,5,6,7,8,9]: # for n in [2,3,4,5]: for n in self.args.context_ngrams: conv = conv_in for i in range(self.args.conv_layers): conv = L.Conv1DLayer( conv, 128, n, name='conv_window_%d(%d)%s' % (n, i, suf), # W=HeNormal('relu') if not avg else Constant()) # (100, 128, 15-n+1) W=GlorotNormal('relu') if not avg else Constant()) # (100, 128, 15-n+1) conv = L.MaxPool1DLayer( conv, self.args.window_size - (n - 1) * self.args.conv_layers) # (100, 128, 1) conv = L.flatten(conv, 2) # (100, 128) conv_out.append(conv) x = L.concat(conv_out, axis=1) # (100, 1024) return x
def __create_toplogy__(self, input_var_first=None, input_var_second=None): # define network topology if (self.conf.rep % 2 != 0): raise ValueError("Representation size should be divisible by two as it's formed by combining two crossmodal translations", self.conf.rep) # input layers l_in_first = InputLayer(shape=(self.conf.batch_size, self.conf.mod1size), input_var=input_var_first) l_in_second = InputLayer(shape=(self.conf.batch_size, self.conf.mod2size), input_var=input_var_second) # first -> second l_hidden1_first = DenseLayer(l_in_first, num_units=self.conf.hdn, nonlinearity=self.conf.act, W=GlorotUniform()) # enc1 l_hidden2_first = DenseLayer(l_hidden1_first, num_units=self.conf.rep//2, nonlinearity=self.conf.act, W=GlorotUniform()) # enc2 l_hidden2_first_d = DropoutLayer(l_hidden2_first, p=self.conf.dropout) l_hidden3_first = DenseLayer(l_hidden2_first_d, num_units=self.conf.hdn, nonlinearity=self.conf.act, W=GlorotUniform()) # dec1 l_out_first = DenseLayer(l_hidden3_first, num_units=self.conf.mod2size, nonlinearity=self.conf.act, W=GlorotUniform()) # dec2 if self.conf.untied: # FREE l_hidden1_second = DenseLayer(l_in_second, num_units=self.conf.hdn, nonlinearity=self.conf.act, W=GlorotUniform()) # enc1 l_hidden2_second = DenseLayer(l_hidden1_second, num_units=self.conf.rep//2, nonlinearity=self.conf.act, W=GlorotUniform()) # enc2 l_hidden2_second_d = DropoutLayer(l_hidden2_second, p=self.conf.dropout) l_hidden3_second = DenseLayer(l_hidden2_second_d, num_units=self.conf.hdn, nonlinearity=self.conf.act, W=GlorotUniform()) # dec1 l_out_second = DenseLayer(l_hidden3_second, num_units=self.conf.mod1size, nonlinearity=self.conf.act, W=GlorotUniform()) # dec2 else: # TIED middle l_hidden1_second = DenseLayer(l_in_second, num_units=self.conf.hdn, nonlinearity=self.conf.act, W=GlorotUniform()) # enc1 l_hidden2_second = DenseLayer(l_hidden1_second, num_units=self.conf.rep//2, nonlinearity=self.conf.act, W=l_hidden3_first.W.T) # enc2 l_hidden2_second_d = DropoutLayer(l_hidden2_second, p=self.conf.dropout) l_hidden3_second = DenseLayer(l_hidden2_second_d, num_units=self.conf.hdn, nonlinearity=self.conf.act, W=l_hidden2_first.W.T) # dec1 l_out_second = DenseLayer(l_hidden3_second, num_units=self.conf.mod1size, nonlinearity=self.conf.act, W=GlorotUniform()) # dec2 l_out = concat([l_out_first, l_out_second]) return l_out, l_hidden2_first, l_hidden2_second
def get_conv_input(self, sidx, tidx, avg=False): suf = '_avg' if avg else '' feat_embs = [ self.manager.feats[name].get_emb_layer(sidx, tidx, avg=avg) for name in self.args.source_feats ] # TODO: change the meaning if self.args.lex == 'mix': concat_emb = L.ElemwiseSumLayer(feat_embs) # (100, 15, 256) else: concat_emb = L.concat(feat_embs, axis=2) # (100, 15, 256+100) pos = np.array([0] * (self.args.window_size / 2) + [1] + [0] * (self.args.window_size / 2)).astype( theano.config.floatX) post = theano.shared(pos[np.newaxis, :, np.newaxis], borrow=True) # (1, 15, 1) posl = L.InputLayer( (None, self.args.window_size, 1), input_var=T.extra_ops.repeat(post, sidx.shape[0], axis=0)) # (100, 15, 1) conv_in = L.concat([concat_emb, posl], axis=2) # (100, 15, 256+1) if self.args.pos_emb: posint = L.flatten( L.ExpressionLayer(posl, lambda x: T.cast(x, 'int64'))) # (100, 15) pos_emb = L.EmbeddingLayer( posint, self.args.window_size, 8, name='epos' + suf, W=Normal(0.01) if not avg else Constant()) # (100, 15, 8) pos_emb.params[pos_emb.W].remove('regularizable') conv_in = L.concat([concat_emb, posl, pos_emb], axis=2) # (100, 15, 256+1+8) # # squeeze # if self.args.squeeze: # conv_in = L.DenseLayer(conv_in, num_units=self.args.squeeze, name='squeeze'+suf, num_leading_axes=2, # W=HeNormal('relu')) # (100, 15, 256) conv_in = L.dimshuffle(conv_in, (0, 2, 1)) # (100, 256+1, 15) return conv_in
def conv4_net_dense_color(data, ndim, pad='same'): res = conv_nonl(data, 6, '1', pad=pad) res = conv_nonl(res, 12, '2', pad=pad) res = conv_nonl(res, 24, '3', pad=pad) res = L.concat([data, res], axis=1, name='concat') res = L.DimshuffleLayer(res, (0, 2, 3, 1), name='transpose') res = L2NormLayer(res, 1e-8, name='l2norm') res = NormedDense(res, ndim, name='normed_dense') return res
def create_deep_rnn(layer, layer_class, depth, layer_mask=None, residual=False, skip_connections=False, bidir=False, dropout=None, init_state_layers=None, **kwargs): """ (Deep) RNN with possible skip/residual connections, bidirectional, dropout """ layers = [layer] for i in range(depth): if skip_connections and i > 0: layer = concat([layers[0], layer], axis=2) if init_state_layers: hid_init = init_state_layers[i] else: hid_init = init.Constant(0.) new_layer = layer_class(layer, hid_init=hid_init, mask_input=layer_mask, **kwargs) if bidir: layer_bw = layer_class(layer, mask_input=layer_mask, backwards=True, **kwargs) new_layer = concat([new_layer, layer_bw], axis=2) if residual: layer = ElemwiseSumLayer([layer, new_layer]) else: layer = new_layer if skip_connections and i == depth-1: layer = concat([layer] + layers[1:], axis=2) if dropout: layer = DropoutLayer(layer, p=dropout) layers.append(layer) return layers[1:]
def buildNetwork(input_var=None): net = {} # The input shape is (freq, time) -> (130,300) net['input'] = InputLayer((None, 129, 300), input_var=input_var, W=GlorotUniform('relu'), b=Constant(0.0)) print "input: {}".format(net['input'].output_shape[1:]) # conv1 net['conv1'] = Conv1DLayer(net['input'], num_filters=256, filter_size=4, W=GlorotUniform('relu'), b=Constant(0.0)) print "conv1: {}".format(net['conv1'].output_shape[1:]) # pool1 net['pool1'] = Pool1DLayer(net['conv1'], pool_size=4) print "pool1: {}".format(net['pool1'].output_shape[1:]) # conv2 net['conv2'] = Conv1DLayer(net['pool1'], num_filters=256, filter_size=4, W=GlorotUniform('relu'), b=Constant(0.0)) print "conv2: {}".format(net['conv2'].output_shape[1:]) # pool2 net['pool2'] = Pool1DLayer(net['conv2'], pool_size=2) print "pool2: {}".format(net['pool2'].output_shape[1:]) # conv3 net['conv3'] = Conv1DLayer(net['pool2'], num_filters=512, filter_size=4, W=GlorotUniform('relu'), b=Constant(0.0)) print "conv3: {}".format(net['conv3'].output_shape[1:]) # global pool net['pool3_1'] = GlobalPoolLayer(net['conv3'], pool_function=T.mean) print "pool3_1: {}".format(net['pool3_1'].output_shape[1:]) net['pool3_2'] = GlobalPoolLayer(net['conv3'], pool_function=T.max) print "pool3_2: {}".format(net['pool3_2'].output_shape[1:]) net['pool3'] = concat((net['pool3_1'], net['pool3_2']), axis=1) print "pool3: {}".format(net['pool3'].output_shape[1:]) # fc6 net['fc6'] = DenseLayer(net['pool3'], num_units=2048, nonlinearity=lasagne.nonlinearities.rectify, W=GlorotUniform('relu'), b=Constant(0.0)) print "fc6: {}".format(net['fc6'].output_shape[1:]) # fc7 net['fc7'] = DenseLayer(net['fc6'], num_units=2048, nonlinearity=lasagne.nonlinearities.rectify, W=GlorotUniform('relu'), b=Constant(0.0)) print "fc7: {}".format(net['fc7'].output_shape[1:]) # output net['output'] = DenseLayer(net['fc7'], num_units=100, nonlinearity=lasagne.nonlinearities.sigmoid, W=GlorotUniform('relu'), b=Constant(0.0)) print "output: {}".format(net['output'].output_shape[1:]) return net
def build_lstm_reader(vocab_size, input_var=T.itensor3(), mask_var=T.tensor3(), skip_connect=True): # the input layer l_in = L.InputLayer(shape=(None, None, 1), input_var=input_var) # the mask layer l_mask = L.InputLayer(shape=(None, None), input_var=mask_var) # the lookup table of word embeddings l_embed = L.EmbeddingLayer(l_in, vocab_size, EMBED_DIM) # the 1st lstm layer l_fwd_1 = L.LSTMLayer(l_embed, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_mask, gradient_steps=GRAD_STEPS, precompute_input=True) # the 2nd lstm layer if skip_connect: # construct skip connection from the lookup table to the 2nd layer batch_size, seq_len, _ = input_var.shape # concatenate the last dimension of l_fwd_1 and embed l_fwd_1_shp = L.ReshapeLayer(l_fwd_1, (-1, NUM_HIDDEN)) l_embed_shp = L.ReshapeLayer(l_embed, (-1, EMBED_DIM)) to_next_layer = L.ReshapeLayer(L.concat([l_fwd_1_shp, l_embed_shp], axis=1), (batch_size, seq_len, NUM_HIDDEN+EMBED_DIM)) else: to_next_layer = l_fwd_1 l_fwd_2 = L.LSTMLayer(to_next_layer, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_mask, gradient_steps=GRAD_STEPS, precompute_input=True) # slice final states of both lstm layers l_fwd_1_slice = L.SliceLayer(l_fwd_1, -1, 1) l_fwd_2_slice = L.SliceLayer(l_fwd_2, -1, 1) # g will be used to score the words based on their embeddings g = L.DenseLayer(L.concat([l_fwd_1_slice, l_fwd_2_slice], axis=1), num_units=EMBED_DIM) # W is shared with the embedding layer l_out = L.DenseLayer(g, num_units=vocab_size, W=l_embed.W.T, nonlinearity=lasagne.nonlinearities.softmax) return l_out
def buildNetwork(input_var=None): net = {} net['input'] = InputLayer((None, 12, 300), input_var=input_var) print "input: {}".format(net['input'].output_shape[1:]) # conv1 net['conv1'] = Conv1DLayer(net['input'], num_filters=256, filter_size=4, nonlinearity=rectify) print "conv1: {}".format(net['conv1'].output_shape[1:]) # pool1 net['pool1'] = Pool1DLayer(net['conv1'], pool_size=4) print "pool1: {}".format(net['pool1'].output_shape[1:]) # conv2 net['conv2'] = Conv1DLayer(net['conv1'], num_filters=256, filter_size=4, nonlinearity=rectify) print "conv2: {}".format(net['conv2'].output_shape[1:]) # pool2 net['pool2'] = Pool1DLayer(net['conv2'], pool_size=1) print "pool2: {}".format(net['pool2'].output_shape[1:]) # conv3 net['conv3'] = Conv1DLayer(net['conv2'], num_filters=512, filter_size=4) print "conv3: {}".format(net['conv3'].output_shape[1:]) # global pool net['pool3_1'] = GlobalPoolLayer(net['conv3'], pool_function=T.mean) print "pool3_1: {}".format(net['pool3_1'].output_shape[1:]) net['pool3_2'] = GlobalPoolLayer(net['conv3'], pool_function=T.max) print "pool3_2: {}".format(net['pool3_2'].output_shape[1:]) net['pool3'] = concat((net['pool3_1'], net['pool3_2']), axis=1) print "pool3: {}".format(net['pool3'].output_shape[1:]) # fc6 net['fc6'] = DenseLayer(net['pool3'], num_units=2048, nonlinearity=lasagne.nonlinearities.rectify) print "fc6: {}".format(net['fc6'].output_shape[1:]) # fc7 net['fc7'] = DenseLayer(net['fc6'], num_units=2048, nonlinearity=lasagne.nonlinearities.rectify) print "fc7: {}".format(net['fc7'].output_shape[1:]) # output net['output'] = DenseLayer(net['fc7'], num_units=256, nonlinearity=lasagne.nonlinearities.sigmoid) print "output: {}".format(net['output'].output_shape[1:]) return net
def build_discriminator_lstm(params, gate_params, cell_params): from lasagne.layers import InputLayer, DenseLayer, concat from lasagne.layers.recurrent import LSTMLayer from lasagne.regularization import l2, regularize_layer_params # from layers import MinibatchLayer # input layers l_in = InputLayer( shape=params['input_shape'], name='d_in') l_mask = InputLayer( shape=params['mask_shape'], name='d_mask') # recurrent layers for bidirectional network l_forward = LSTMLayer( l_in, params['n_units'], grad_clipping=params['grad_clip'], ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, nonlinearity=params['non_linearities'][0], only_return_final=True, mask_input=l_mask) l_backward = LSTMLayer( l_in, params['n_units'], grad_clipping=params['grad_clip'], ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, nonlinearity=params['non_linearities'][1], only_return_final=True, mask_input=l_mask, backwards=True) # concatenate output of forward and backward layers l_concat = concat([l_forward, l_backward], axis=1) # minibatch layer on forward and backward layers # l_minibatch = MinibatchLayer(l_concat, num_kernels=100) # output layer l_out = DenseLayer( l_concat, num_units=params['n_output_units'], nonlinearity=params['non_linearities'][2]) regularization = regularize_layer_params( l_out, l2) * params['regularization'] class Discriminator: def __init__(self, l_in, l_mask, l_out): self.l_in = l_in self.l_mask = l_mask self.l_out = l_out self.regularization = regularization return Discriminator(l_in, l_mask, l_out)
def build_model(): net = {} net['input'] = InputLayer((None, 512 * 20, 3, 3)) au_fc_layers = [] for i in range(20): net['roi_AU_N_' + str(i)] = SliceLayer(net['input'], indices=slice( i * 512, (i + 1) * 512), axis=1) #try to adding upsampling here for more conv net['Roi_upsample_' + str(i)] = Upscale2DLayer(net['roi_AU_N_' + str(i)], scale_factor=2) net['conv_roi_' + str(i)] = ConvLayer(net['Roi_upsample_' + str(i)], 512, 3) net['au_fc_' + str(i)] = DenseLayer(net['conv_roi_' + str(i)], num_units=150) au_fc_layers += [net['au_fc_' + str(i)]] # net['local_fc'] = concat(au_fc_layers) net['local_fc2'] = DenseLayer(net['local_fc'], num_units=2048) net['local_fc_dp'] = DropoutLayer(net['local_fc2'], p=0.5) # net['fc_comb']=concat([net['au_fc_layer'],net['local_fc_dp']]) # net['fc_dense']=DenseLayer(net['fc_comb'],num_units=1024) # net['fc_dense_dp']=DropoutLayer(net['fc_dense'],p=0.3) net['real_out'] = DenseLayer(net['local_fc_dp'], num_units=12, nonlinearity=sigmoid) # net['final']=concat([net['pred_pos_layer'],net['output_layer']]) return net
def score_fused_convnets(self, fusion_type, input_var1=None, input_var2=None, weights_dir_depth=None, weights_dir_rgb=None, bottleneck_W=None, weights_dir=None): net = OrderedDict() rgb_net = self.simple_convnet(4, input_var=input_var1, bottleneck_W=bottleneck_W) depth_net = self.simple_convnet(1, input_var=input_var2, bottleneck_W=bottleneck_W) if weights_dir_depth is not None and weights_dir_rgb is not None: lw_depth = LoadWeights(weights_dir_depth, depth_net) lw_depth.load_weights_numpy() lw_rgb = LoadWeights(weights_dir_rgb, rgb_net) lw_rgb.load_weights_numpy() if fusion_type == self.LOCAL: net['reshape_depth'] = reshape(depth_net['output'], ([0], 1, 1, [1])) net['reshape_rgb'] = reshape(rgb_net['output'], ([0], 1, 1, [1])) net['concat'] = concat([net['reshape_depth'], net['reshape_rgb']]) net['lcl'] = LocallyConnected2DLayer(net['concat'], 1, (1, 1), untie_biases=True, nonlinearity=None) net['output'] = reshape(net['lcl'], ([0], [3])) elif fusion_type == self.SUM: net['output'] = ElemwiseSumLayer( [depth_net['output'], rgb_net['output']], coeffs=0.5) if weights_dir is not None: lw = LoadWeights(weights_dir, net) lw.load_weights_numpy() return net
class decoder_step: #inputs encoder = L.InputLayer((None, None, CODE_SIZE), name='encoded sequence') encoder_mask = L.InputLayer((None, None), name='encoded sequence') inp = L.InputLayer((None, ), name='current character') l_target_emb = L.EmbeddingLayer(inp, dst_voc.len, 128) #recurrent part l_rnn1 = AutoLSTMCell(l_target_emb, 128, name="lstm1") query = L.DenseLayer(l_rnn1.out, 128, nonlinearity=None) attn = AttentionLayer(encoder, query, 128, mask_input=encoder_mask)['attn'] l_rnn = L.concat([attn, l_rnn1.out, l_target_emb]) l_rnn2 = AutoLSTMCell(l_rnn, 128, name="lstm1") next_token_probas = L.DenseLayer(l_rnn2.out, dst_voc.len, nonlinearity=T.nnet.softmax) #pick next token from predicted probas next_token = ProbabilisticResolver(next_token_probas) tau = T.scalar("sample temperature", "float32") next_token_temperatured = TemperatureResolver(next_token_probas, tau) next_token_greedy = GreedyResolver(next_token_probas) auto_updates = { **l_rnn1.get_automatic_updates(), **l_rnn2.get_automatic_updates() }
def get_actor(self, sidx, tidx, valid, avg=False): suf = '_avg' if avg else '' feat_embs = [ self.manager.feats[name].get_emb_layer(sidx, tidx, avg=avg) for name in self.args.source_feats ] x = L.concat(feat_embs, axis=2) # (100, 26, 256+32+32+...) if self.args.squeeze: x = L.DenseLayer(x, num_units=self.args.squeeze, name='h0' + suf, num_leading_axes=2, W=HeNormal('relu')) # (100, 26, 256) x = L.flatten(x) # (100, 26*256) h1 = L.DenseLayer(x, num_units=self.args.nh1, name='h1' + suf, W=HeNormal('relu')) # (100, 512) h1 = L.dropout(h1, self.args.dropout) taggers = {} if self.args.aux_tagger: hids = [h1] for name in self.args.target_feats: hid = L.DenseLayer(h1, 256, name='hid-%s%s' % (name, suf), W=HeNormal('relu')) # (100, 512) hids.append(hid) hid = L.dropout(hid, self.args.dropout) # h1 = L.dropout(h1, self.args.dropout) taggers[name] = L.DenseLayer(hid, len(self.manager.feats[name].map), name='tagger-%s' % name, W=HeNormal(), nonlinearity=softmax) # (100, 25) h1 = L.concat(hids, axis=1) h2 = L.DenseLayer(h1, num_units=self.args.nh2, name='h2' + suf, W=HeNormal('relu')) # (100, 256) h2 = L.dropout(h2, self.args.dropout) h3y = L.DenseLayer(h2, num_units=self.args.nh3, name='h3y' + suf, W=HeNormal(), nonlinearity=softmax) # (100, 4) num of actions h3s = L.concat( [h2, h3y], axis=1 ) # (100, 256+4+4), this way shouldn't output <UNK> if its not SHIFT h3z = L.DenseLayer(h2, num_units=self.args.size['label'], name='h3z' + suf, W=HeNormal(), nonlinearity=softmax) # (100, 25) number of labels if avg: set_all_zero([h3y, h3z] + taggers.values()) return h3y, h3z, taggers
def input_fused_convnets(self, fusion_type, input_var1=None, input_var2=None, bottleneck_W=None): net = OrderedDict() net['input_rgb'] = InputLayer((None, 4, 128, 128), input_var=input_var1) layer = 0 net['input_depth'] = InputLayer((None, 1, 128, 128), input_var=input_var2) layer += 1 if fusion_type == self.CONCAT: net['merge'] = concat([net['input_rgb'], net['input_depth']]) layer += 1 elif fusion_type == self.CONCATCONV: net['concat'] = concat([net['input_rgb'], net['input_depth']]) layer += 1 net['merge'] = Conv2DLayer(net['concat'], num_filters=1, filter_size=(1, 1), nonlinearity=None) layer += 1 for i in range(self._net_specs_dict['num_conv_layers']): # Add convolution layers net['conv{0:d}'.format(i + 1)] = Conv2DLayer( net.values()[layer], num_filters=self._net_specs_dict['num_conv_filters'][i], filter_size=(self._net_specs_dict['conv_filter_size'][i], ) * 2, pad='same') layer += 1 if self._net_specs_dict['num_conv_layers'] <= 2: # Add pooling layers net['pool{0:d}'.format(i + 1)] = MaxPool2DLayer( net.values()[layer], pool_size=(3, 3)) layer += 1 else: if i < 4: if (i + 1) % 2 == 0: # Add pooling layers net['pool{0:d}'.format(i + 1)] = MaxPool2DLayer( net.values()[layer], pool_size=(3, 3)) layer += 1 else: if (i + 1) == 7: # Add pooling layers net['pool{0:d}'.format(i + 1)] = MaxPool2DLayer( net.values()[layer], pool_size=(3, 3)) layer += 1 # Add fc-layers net['fc1'] = DenseLayer(net.values()[layer], self._net_specs_dict['num_fc_units'][0]) # Add dropout layer net['dropout1'] = dropout(net['fc1'], p=self._model_hp_dict['p']) net['fc2'] = DenseLayer(net['dropout1'], self._net_specs_dict['num_fc_units'][1]) # Add dropout layer net['dropout2'] = dropout(net['fc2'], p=self._model_hp_dict['p']) if bottleneck_W is not None: # Add bottleneck layer net['bottleneck'] = DenseLayer(net['dropout2'], 30) # Add output layer(linear activation because it's regression) net['output'] = DenseLayer( net['bottleneck'], 3 * self._num_joints, W=bottleneck_W[0:30], nonlinearity=lasagne.nonlinearities.tanh) else: # Add output layer(linear activation because it's regression) net['output'] = DenseLayer( net['dropout2'], 3 * self._num_joints, nonlinearity=lasagne.nonlinearities.tanh) return net
def build_network(self, vocab_size, doc_var, query_var, docmask_var, qmask_var, W_init): l_docin = L.InputLayer(shape=(None, None, 1), input_var=doc_var) l_qin = L.InputLayer(shape=(None, None, 1), input_var=query_var) l_docmask = L.InputLayer(shape=(None, None), input_var=docmask_var) l_qmask = L.InputLayer(shape=(None, None), input_var=qmask_var) l_docembed = L.EmbeddingLayer(l_docin, input_size=vocab_size, output_size=EMBED_DIM, W=W_init) l_qembed = L.EmbeddingLayer(l_qin, input_size=vocab_size, output_size=EMBED_DIM, W=l_docembed.W) l_fwd_doc = L.GRULayer(l_docembed, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_doc = L.GRULayer(l_docembed, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True) l_doc = L.concat([l_fwd_doc, l_bkd_doc], axis=2) l_fwd_q = L.GRULayer(l_qembed, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_q = L.GRULayer(l_qembed, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True) l_fwd_q_slice = L.SliceLayer(l_fwd_q, -1, 1) l_bkd_q_slice = L.SliceLayer(l_bkd_q, 0, 1) l_q = L.ConcatLayer([l_fwd_q_slice, l_bkd_q_slice]) d = L.get_output(l_doc) # B x N x D q = L.get_output(l_q) # B x D p = T.batched_dot(d, q) # B x N pm = T.nnet.softmax( T.set_subtensor( T.alloc(-20., p.shape[0], p.shape[1])[docmask_var.nonzero()], p[docmask_var.nonzero()])) index = T.reshape(T.repeat(T.arange(p.shape[0]), p.shape[1]), p.shape) final = T.inc_subtensor( T.alloc(0., p.shape[0], vocab_size)[index, T.flatten(doc_var, outdim=2)], pm) #qv = T.flatten(query_var,outdim=2) #index2 = T.reshape(T.repeat(T.arange(qv.shape[0]),qv.shape[1]),qv.shape) #xx = index2[qmask_var.nonzero()] #yy = qv[qmask_var.nonzero()] #pV = T.set_subtensor(final[xx,yy], T.zeros_like(qv[xx,yy])) return final, l_doc, l_q
def build_network(self, K, vocab_size, W_init): l_docin = L.InputLayer(shape=(None, None, 1), input_var=self.inps[0]) l_doctokin = L.InputLayer(shape=(None, None), input_var=self.inps[1]) l_qin = L.InputLayer(shape=(None, None, 1), input_var=self.inps[2]) l_qtokin = L.InputLayer(shape=(None, None), input_var=self.inps[3]) l_docmask = L.InputLayer(shape=(None, None), input_var=self.inps[6]) l_qmask = L.InputLayer(shape=(None, None), input_var=self.inps[7]) l_tokin = L.InputLayer(shape=(None, MAX_WORD_LEN), input_var=self.inps[8]) l_tokmask = L.InputLayer(shape=(None, MAX_WORD_LEN), input_var=self.inps[9]) l_featin = L.InputLayer(shape=(None, None), input_var=self.inps[11]) l_match_feat = L.InputLayer(shape=(None, None, None), input_var=self.inps[13]) l_match_feat = L.EmbeddingLayer(l_match_feat, 2, 1) l_match_feat = L.ReshapeLayer(l_match_feat, (-1, [1], [2])) l_use_char = L.InputLayer(shape=(None, None, self.feat_cnt), input_var=self.inps[14]) l_use_char_q = L.InputLayer(shape=(None, None, self.feat_cnt), input_var=self.inps[15]) doc_shp = self.inps[1].shape qry_shp = self.inps[3].shape l_docembed = L.EmbeddingLayer(l_docin, input_size=vocab_size, output_size=self.embed_dim, W=W_init) # B x N x 1 x DE l_doce = L.ReshapeLayer( l_docembed, (doc_shp[0], doc_shp[1], self.embed_dim)) # B x N x DE l_qembed = L.EmbeddingLayer(l_qin, input_size=vocab_size, output_size=self.embed_dim, W=l_docembed.W) if self.train_emb == 0: l_docembed.params[l_docembed.W].remove('trainable') l_qembed.params[l_qembed.W].remove('trainable') l_qembed = L.ReshapeLayer( l_qembed, (qry_shp[0], qry_shp[1], self.embed_dim)) # B x N x DE l_fembed = L.EmbeddingLayer(l_featin, input_size=2, output_size=2) # B x N x 2 # char embeddings if self.use_chars: # ====== concatenation ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 2*self.char_dim) # T x L x D # l_fgru = L.GRULayer(l_lookup, self.char_dim, grad_clipping=GRAD_CLIP, # mask_input=l_tokmask, gradient_steps=GRAD_STEPS, precompute_input=True, # only_return_final=True) # l_bgru = L.GRULayer(l_lookup, 2*self.char_dim, grad_clipping=GRAD_CLIP, # mask_input=l_tokmask, gradient_steps=GRAD_STEPS, precompute_input=True, # backwards=True, only_return_final=True) # T x 2D # l_fwdembed = L.DenseLayer(l_fgru, self.embed_dim/2, nonlinearity=None) # T x DE/2 # l_bckembed = L.DenseLayer(l_bgru, self.embed_dim/2, nonlinearity=None) # T x DE/2 # l_embed = L.ElemwiseSumLayer([l_fwdembed, l_bckembed], coeffs=1) # l_docchar_embed = IndexLayer([l_doctokin, l_embed]) # B x N x DE/2 # l_qchar_embed = IndexLayer([l_qtokin, l_embed]) # B x Q x DE/2 # l_doce = L.ConcatLayer([l_doce, l_docchar_embed], axis=2) # l_qembed = L.ConcatLayer([l_qembed, l_qchar_embed], axis=2) # ====== bidir feat concat ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_fgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_bgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True, backwards = True) # l_char_gru = L.ElemwiseSumLayer([l_fgru, l_bgru]) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = L.ConcatLayer([l_use_char, l_docchar_embed, l_doce], axis = 2) # l_qembed = L.ConcatLayer([l_use_char_q, l_qchar_embed, l_qembed], axis = 2) # ====== char concat ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = L.ConcatLayer([l_docchar_embed, l_doce], axis = 2) # l_qembed = L.ConcatLayer([l_qchar_embed, l_qembed], axis = 2) # ====== feat concat ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = L.ConcatLayer([l_use_char, l_docchar_embed, l_doce], axis = 2) # l_qembed = L.ConcatLayer([l_use_char_q, l_qchar_embed, l_qembed], axis = 2) # ====== gating ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = GateDymLayer([l_use_char, l_docchar_embed, l_doce]) # l_qembed = GateDymLayer([l_use_char_q, l_qchar_embed, l_qembed]) # ====== tie gating ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = GateDymLayer([l_use_char, l_docchar_embed, l_doce]) # l_qembed = GateDymLayer([l_use_char_q, l_qchar_embed, l_qembed], W = l_doce.W, b = l_doce.b) # ====== scalar gating ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = ScalarDymLayer([l_use_char, l_docchar_embed, l_doce]) # l_qembed = ScalarDymLayer([l_use_char_q, l_qchar_embed, l_qembed]) # ====== dibirectional gating ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_fgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_bgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True, backwards = True) # l_char_gru = L.ElemwiseSumLayer([l_fgru, l_bgru]) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = GateDymLayer([l_use_char, l_docchar_embed, l_doce]) # l_qembed = GateDymLayer([l_use_char_q, l_qchar_embed, l_qembed]) # ====== gate + concat ====== l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) l_char_gru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping=GRAD_CLIP, mask_input=l_tokmask, gradient_steps=GRAD_STEPS, precompute_input=True, only_return_final=True) l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) l_doce = GateDymLayer([l_use_char, l_docchar_embed, l_doce]) l_qembed = GateDymLayer([l_use_char_q, l_qchar_embed, l_qembed]) l_doce = L.ConcatLayer([l_use_char, l_doce], axis=2) l_qembed = L.ConcatLayer([l_use_char_q, l_qembed], axis=2) # ====== bidirectional gate + concat ====== # l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 32) # l_fgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True) # l_bgru = L.GRULayer(l_lookup, self.embed_dim, grad_clipping = GRAD_CLIP, mask_input = l_tokmask, gradient_steps = GRAD_STEPS, precompute_input = True, only_return_final = True, backwards = True) # l_char_gru = L.ElemwiseSumLayer([l_fgru, l_bgru]) # l_docchar_embed = IndexLayer([l_doctokin, l_char_gru]) # l_qchar_embed = IndexLayer([l_qtokin, l_char_gru]) # l_doce = GateDymLayer([l_use_char, l_docchar_embed, l_doce]) # l_qembed = GateDymLayer([l_use_char_q, l_qchar_embed, l_qembed]) # l_doce = L.ConcatLayer([l_use_char, l_doce], axis = 2) # l_qembed = L.ConcatLayer([l_use_char_q, l_qembed], axis = 2) attentions = [] if self.save_attn: l_m = PairwiseInteractionLayer([l_doce, l_qembed]) attentions.append(L.get_output(l_m, deterministic=True)) for i in range(K - 1): l_fwd_doc_1 = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_doc_1 = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \ backwards=True) l_doc_1 = L.concat([l_fwd_doc_1, l_bkd_doc_1], axis=2) # B x N x DE l_fwd_q_1 = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_q_1 = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True) l_q_c_1 = L.ConcatLayer([l_fwd_q_1, l_bkd_q_1], axis=2) # B x Q x DE l_doce = MatrixAttentionLayer( [l_doc_1, l_q_c_1, l_qmask, l_match_feat]) # l_doce = MatrixAttentionLayer([l_doc_1, l_q_c_1, l_qmask]) # === begin GA === # l_m = PairwiseInteractionLayer([l_doc_1, l_q_c_1]) # l_doc_2_in = GatedAttentionLayer([l_doc_1, l_q_c_1, l_m], mask_input=self.inps[7]) # l_doce = L.dropout(l_doc_2_in, p=self.dropout) # B x N x DE # === end GA === # if self.save_attn: # attentions.append(L.get_output(l_m, deterministic=True)) if self.use_feat: l_doce = L.ConcatLayer([l_doce, l_fembed], axis=2) # B x N x DE+2 l_fwd_doc = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_doc = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \ backwards=True) l_doc = L.concat([l_fwd_doc, l_bkd_doc], axis=2) l_fwd_q = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True, only_return_final=False) l_bkd_q = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True, only_return_final=False) l_q = L.ConcatLayer([l_fwd_q, l_bkd_q], axis=2) # B x Q x 2D if self.save_attn: l_m = PairwiseInteractionLayer([l_doc, l_q]) attentions.append(L.get_output(l_m, deterministic=True)) l_prob = AttentionSumLayer([l_doc, l_q], self.inps[4], self.inps[12], mask_input=self.inps[10]) final = L.get_output(l_prob) final_v = L.get_output(l_prob, deterministic=True) return final, final_v, l_prob, l_docembed.W, attentions
def construct_unet(channels=1, no_f_base=8, f_size=3, dropout=False, bs=None, class_nums=2, pad="same",nonlinearity=lasagne.nonlinearities.rectify, input_dim=[512,512]): net={} net["input"]= InputLayer(shape=(bs, channels, input_dim[0], input_dim[1])) # Moving downwards the U-shape. Simplified: net["conv_down11"] = Conv2DLayer(net["input"],no_f_base,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["conv_down12"] = Conv2DLayer(net["conv_down11"],no_f_base,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["pool1"] = Pool2DLayer(net["conv_down12"],pool_size=2) net["conv_down21"] = Conv2DLayer(net["pool1"],no_f_base*2,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["conv_down22"] = Conv2DLayer(net["conv_down21"],no_f_base*2,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["pool2"] = Pool2DLayer(net["conv_down22"],pool_size=2) net["conv_down31"] = Conv2DLayer(net["pool2"],no_f_base*4,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["conv_down32"] = Conv2DLayer(net["conv_down31"],no_f_base*4,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["pool3"] = Pool2DLayer(net["conv_down32"],pool_size=2) net["conv_down41"] = Conv2DLayer(net["pool3"],no_f_base*8,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["conv_down42"] = Conv2DLayer(net["conv_down41"],no_f_base*8,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) to_drop1 = net["pool4"] = Pool2DLayer(net["conv_down42"],pool_size=2) if dropout: to_drop1 = DropoutLayer(to_drop1, p=0.5) #vvvv bottom vvvv net["conv_bottom1"] = Conv2DLayer(to_drop1,no_f_base*16,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["conv_bottom2"] = Conv2DLayer(net["conv_bottom1"],no_f_base*16,f_size,pad=pad,nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["deconv_bottom1"] = Deconv2DLayer(net["conv_bottom2"], no_f_base*8, 2, 2) #^^^^ bottom ^^^^ # Moving upwards the U-shape. Simplified: net["concat1"] = concat([net["deconv_bottom1"], net["conv_down42"]], cropping=(None, None, "center", "center")) net["conv_up11"]= Conv2DLayer(net["concat1"], no_f_base*8, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["conv_up11"]= Conv2DLayer(net["conv_up11"], no_f_base*8, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["deconv_up1"] = Deconv2DLayer(net["conv_up11"], no_f_base*4, 2, 2) net["concat2"] = concat([net["deconv_up1"], net["conv_down32"]], cropping=(None, None, "center", "center")) net["conv_up21"]= Conv2DLayer(net["concat2"], no_f_base*4, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["conv_up22"]= Conv2DLayer(net["conv_up21"], no_f_base*4, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["deconv_up2"] = Deconv2DLayer(net["conv_up22"], no_f_base*2, 2, 2) net["concat3"] = concat([net["deconv_up2"], net["conv_down22"]], cropping=(None, None, "center", "center")) net["conv_up31"]= Conv2DLayer(net["concat3"], no_f_base*2, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["conv_up32"]= Conv2DLayer(net["conv_up31"], no_f_base*2, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["deconv_up3"] = Deconv2DLayer(net["conv_up32"], no_f_base, 2, 2) net["concat4"] = concat([net["deconv_up3"], net["conv_down12"]], cropping=(None, None, "center", "center")) net["conv_up41"]= Conv2DLayer(net["concat4"], no_f_base, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) net["conv_up42"]= Conv2DLayer(net["conv_up41"], no_f_base, f_size, pad=pad, nonlinearity=nonlinearity,W=lasagne.init.HeNormal(gain='relu')) # Class layer: Work around standard softmax bc. it doesn't work with tensor4/3. # Hence, we reshape and feed it to an external Nonlinearity layer. # net["class_ns"] is the output in image-related shape. net["out"] = Conv2DLayer(net["conv_up42"], class_nums, 1, nonlinearity=None,W=lasagne.init.HeNormal(gain='relu')) net["layer_shuffle_dim"] = DimshuffleLayer(net["out"], (1, 0, 2, 3)) net["reshape_layer"] = ReshapeLayer(net["layer_shuffle_dim"], (class_nums, -1)) net["layer_shuffle_dim2"] = DimshuffleLayer(net["reshape_layer"], (1, 0)) # Flattened output to be able to feed it to lasagne.objectives.categorical_crossentropy. net["out_optim"] = NonlinearityLayer(net["layer_shuffle_dim2"], nonlinearity=lasagne.nonlinearities.softmax) return net net = None
def define_net(input_var): net = {} net['data'] = InputLayer(shape=(None, 3, IMAGE_SHAPE[0], IMAGE_SHAPE[1]), input_var=input_var) net['patch'] = sample_layer.Sample2DLayer(net['data'], 5, (227, 227), pad=False) # conv1 net['conv1'] = Conv2DLayer(net['patch'], num_filters=96, filter_size=(11, 11), stride=4, nonlinearity=lasagne.nonlinearities.rectify) # pool1 net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=(3, 3), stride=2) # norm1 net['norm1'] = LocalResponseNormalization2DLayer(net['pool1'], n=5, alpha=0.0001 / 5.0, beta=0.75, k=1) # before conv2 split the data net['conv2_data1'] = SliceLayer(net['norm1'], indices=slice(0, 48), axis=1) net['conv2_data2'] = SliceLayer(net['norm1'], indices=slice(48, 96), axis=1) # now do the convolutions net['conv2_part1'] = Conv2DLayer(net['conv2_data1'], num_filters=128, filter_size=(5, 5), pad=2) net['conv2_part2'] = Conv2DLayer(net['conv2_data2'], num_filters=128, filter_size=(5, 5), pad=2) # now combine net['conv2'] = concat((net['conv2_part1'], net['conv2_part2']), axis=1) # pool2 net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=(3, 3), stride=2) # norm2 net['norm2'] = LocalResponseNormalization2DLayer(net['pool2'], n=5, alpha=0.0001 / 5.0, beta=0.75, k=1) # conv3 # no group net['conv3'] = Conv2DLayer(net['norm2'], num_filters=384, filter_size=(3, 3), pad=1) # conv4 # group = 2 net['conv4_data1'] = SliceLayer(net['conv3'], indices=slice(0, 192), axis=1) net['conv4_data2'] = SliceLayer(net['conv3'], indices=slice(192, 384), axis=1) net['conv4_part1'] = Conv2DLayer(net['conv4_data1'], num_filters=192, filter_size=(3, 3), pad=1) net['conv4_part2'] = Conv2DLayer(net['conv4_data2'], num_filters=192, filter_size=(3, 3), pad=1) net['conv4'] = concat((net['conv4_part1'], net['conv4_part2']), axis=1) # conv5 # group 2 net['conv5_data1'] = SliceLayer(net['conv4'], indices=slice(0, 192), axis=1) net['conv5_data2'] = SliceLayer(net['conv4'], indices=slice(192, 384), axis=1) net['conv5_part1'] = Conv2DLayer(net['conv5_data1'], num_filters=128, filter_size=(3, 3), pad=1) net['conv5_part2'] = Conv2DLayer(net['conv5_data2'], num_filters=128, filter_size=(3, 3), pad=1) net['conv5'] = concat((net['conv5_part1'], net['conv5_part2']), axis=1) # pool 5 net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=(3, 3), stride=2) # fc6 net['fc6'] = DenseLayer(net['pool5'], num_units=4096, nonlinearity=lasagne.nonlinearities.rectify) # fc7 net['fc7'] = DenseLayer(net['fc6'], num_units=4096, nonlinearity=lasagne.nonlinearities.rectify) # fc8 net['out'] = DenseLayer(net['fc7'], num_units=1, nonlinearity=lasagne.nonlinearities.linear) # print ('Objective layer shapes:') # print (lasagne.layers.get_output_shape(net['pool5'])) # # fc6 # net['fc6'] = Conv2DLayer( # net['pool5'], num_filters=4096, filter_size=(6, 6), # nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False) # print (lasagne.layers.get_output_shape(net['fc6'])) # # fc7 # net['fc7'] = Conv2DLayer( # net['fc6'], # num_filters=4096, filter_size=(1, 1), # nonlinearity=lasagne.nonlinearities.rectify) # print (lasagne.layers.get_output_shape(net['fc7'])) # # fc8 # net['out'] = Conv2DLayer( # net['fc7'], # num_filters=1, filter_size=(1, 1), # nonlinearity=lasagne.nonlinearities.linear) # print (lasagne.layers.get_output_shape(net['out'])) return net
def get_params_internal( self, **tags): # this gives ALL the vars (not the params values) return L.get_all_params( # this lasagne function also returns all var below the passed layers L.concat(self._output_layers), **tags)
def build_generator(input_var=None, batch_size=None, n_timesteps=128, alphabet_size=128): from lasagne.layers import InputLayer, DenseLayer, LSTMLayer from lasagne.layers import TransposedConv2DLayer as Deconv2DLayer from lasagne.layers import ExpressionLayer, NonlinearityLayer from lasagne.layers import ReshapeLayer, DimshuffleLayer, Upscale2DLayer, concat try: from lasagne.layers.dnn import batch_norm_dnn as batch_norm except ImportError: from lasagne.layers import batch_norm from lasagne.nonlinearities import sigmoid, tanh, softmax """ layer = InputLayer(shape=(batch_size, 100), input_var=input_var) print("MNIST generator") layer = batch_norm(DenseLayer(layer, 1024)) layer = batch_norm(DenseLayer(layer, 1024*8*8)) layer = ReshapeLayer(layer, ([0], 1024, 8, 8)) layer = batch_norm(Deconv2DLayer( layer, 128, 5, stride=2, crop='same', output_size=16)) layer = batch_norm(Deconv2DLayer( layer, 128, 5, stride=2, crop='same', output_size=32)) layer = batch_norm(Deconv2DLayer( layer, 128, 5, stride=2, crop='same', output_size=64)) layer = batch_norm(Deconv2DLayer( layer, 1, 5, stride=2, crop='same', output_size=128, nonlinearity=tanh)) # Crepe print("Crepe generator") layer = batch_norm(DenseLayer(layer, 1024)) layer = batch_norm(DenseLayer(layer, 1024*13)) layer = ReshapeLayer(layer, ([0], 1024, 1, 13)) layer = batch_norm(Deconv2DLayer( layer, 512, (1, 4), stride=2, crop=0)) layer = batch_norm(Deconv2DLayer( layer, 1024, (1, 5), stride=2, crop=0)) layer = batch_norm(Deconv2DLayer( layer, 2048, (1, 5), stride=2, crop=0)) layer = Deconv2DLayer( layer, 1, (128, 8), stride=1, crop=0, nonlinearity=tanh) """ # LSTM # input layers layer = InputLayer(shape=(batch_size, n_timesteps, 100), input_var=input_var) # recurrent layers for bidirectional network l_forward_noise = LSTMLayer( layer, 64, learn_init=True, grad_clipping=None, only_return_final=False) l_backward_noise = LSTMLayer( layer, 64, learn_init=True, grad_clipping=None, only_return_final=False, backwards=True) layer = concat( [l_forward_noise, l_backward_noise], axis=2) pdb.set_trace() layer = DenseLayer(layer, 1024, num_leading_axes=2) layer = DenseLayer(layer, alphabet_size, num_leading_axes=2) layer = ReshapeLayer(layer, (batch_size*n_timesteps, -1)) layer = NonlinearityLayer(layer, softmax) layer = ReshapeLayer(layer, (batch_size, n_timesteps, -1)) layer = DimshuffleLayer(layer, (0, 'x', 2, 1)) layer = ExpressionLayer(layer, lambda X: X*2 - 1) print("Generator output:", layer.output_shape) return layer
# encoder l_encoder1 = layers.DenseLayer(l_in, num_units=num_hidden_units) l_encoder2 = layers.DenseLayer(l_encoder1, num_units=num_hidden_units) l_encoder3 = layers.DenseLayer(l_encoder2, num_units=num_hidden_units) l_encoder4 = layers.DenseLayer(l_encoder3, num_units=num_hidden_units) # learned representation l_observed = layers.DenseLayer(l_encoder4, num_units=output_dim, nonlinearity=T.nnet.softmax) l_latent = layers.DenseLayer(l_encoder4, num_units=latent_size, nonlinearity=None) # linear l_representation = layers.concat([l_observed, l_latent]) # decoder l_decoder1 = layers.DenseLayer(l_representation, num_units=num_hidden_units) l_decoder2 = layers.DenseLayer(l_decoder1, num_units=num_hidden_units) l_decoder3 = layers.DenseLayer(l_decoder2, num_units=num_hidden_units) l_decoder4 = layers.DenseLayer(l_decoder3, num_units=num_hidden_units) l_decoder_out = layers.DenseLayer(l_decoder4, num_units=input_dim, nonlinearity=nonlinearities.sigmoid) x_to_z = LightweightModel([l_in], [l_latent]) x_to_y = LightweightModel([l_in], [l_observed]) z_to_x = LightweightModel([l_observed, l_latent], [l_decoder_out]) model = Model() model.x_to_z = x_to_z model.x_to_y = x_to_y
def build_model(vocab_size, doc_var, qry_var, doc_mask_var, qry_mask_var, W_init=lasagne.init.Normal()): l_doc_in = L.InputLayer(shape=(None, None, 1), input_var=doc_var) l_qry_in = L.InputLayer(shape=(None, None, 1), input_var=qry_var) l_doc_embed = L.EmbeddingLayer(l_doc_in, vocab_size, EMBED_DIM, W=W_init) l_qry_embed = L.EmbeddingLayer(l_qry_in, vocab_size, EMBED_DIM, W=l_doc_embed.W) l_doc_mask = L.InputLayer(shape=(None, None), input_var=doc_mask_var) l_qry_mask = L.InputLayer(shape=(None, None), input_var=qry_mask_var) l_doc_fwd = L.LSTMLayer(l_doc_embed, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_doc_mask, gradient_steps=GRAD_STEPS, precompute_input=True) l_doc_bkd = L.LSTMLayer(l_doc_embed, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_doc_mask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True) l_qry_fwd = L.LSTMLayer(l_qry_embed, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_qry_mask, gradient_steps=GRAD_STEPS, precompute_input=True) l_qry_bkd = L.LSTMLayer(l_qry_embed, NUM_HIDDEN, grad_clipping=GRAD_CLIP, mask_input=l_qry_mask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True) l_doc_fwd_slice = L.SliceLayer(l_doc_fwd, -1, 1) l_doc_bkd_slice = L.SliceLayer(l_doc_bkd, 0, 1) l_qry_fwd_slice = L.SliceLayer(l_qry_fwd, -1, 1) l_qry_bkd_slice = L.SliceLayer(l_qry_bkd, 0, 1) r = L.DenseLayer(L.ElemwiseSumLayer([l_doc_fwd_slice, l_doc_bkd_slice]), num_units=NUM_HIDDEN, nonlinearity=lasagne.nonlinearities.tanh) u = L.DenseLayer(L.ElemwiseSumLayer([l_qry_fwd_slice, l_qry_bkd_slice]), num_units=NUM_HIDDEN, nonlinearity=lasagne.nonlinearities.tanh) g = L.DenseLayer(L.concat([r, u], axis=1), num_units=EMBED_DIM, W=lasagne.init.GlorotNormal(), nonlinearity=lasagne.nonlinearities.tanh) l_out = L.DenseLayer(g, num_units=vocab_size, W=l_doc_embed.W.T, nonlinearity=lasagne.nonlinearities.softmax, b=None) return l_out
def get_actor(self, avg=False): suf = '_avg' if avg else '' iw = L.InputLayer(shape=(None, self.args.sw)) # (100, 24) ew = L.EmbeddingLayer( iw, self.args.vw, self.args.nw, name='ew' + suf, W=HeNormal() if not avg else Constant()) # (100, 24, 256) ew.params[ew.W].remove('regularizable') if 'w' in self.args.freeze: ew.params[ew.W].remove('trainable') # for access from outside if not avg: self.Ew = ew.W # char embedding with CNN/LSTM ic = L.InputLayer(shape=(None, self.args.sw, self.args.max_len)) # (100, 24, 32) ec = self.get_char2word(ic, avg) # (100, 24, 256) it = L.InputLayer(shape=(None, self.args.st)) et = L.EmbeddingLayer(it, self.args.vt, self.args.nt, name='et' + suf, W=HeNormal() if not avg else Constant()) et.params[et.W].remove('regularizable') il = L.InputLayer(shape=(None, self.args.sl)) el = L.EmbeddingLayer(il, self.args.vl, self.args.nl, name='el' + suf, W=HeNormal() if not avg else Constant()) el.params[el.W].remove('regularizable') to_concat = [] if self.args.type == 'word': to_concat.append(ew) elif self.args.type == 'char': to_concat.append(ec) elif self.args.type == 'both': to_concat += [ew, ec] elif self.args.type == 'mix': to_concat.append(L.ElemwiseSumLayer([ew, ec])) if not self.args.untagged: to_concat.append(et) if not self.args.unlabeled: to_concat.append(el) x = L.concat(to_concat, axis=2) # (100, 24, 64+16+16) # additional: # get the more compact representation of each token by its word, tag and label, # before putting into the hidden layer if self.args.squeeze: x = L.DenseLayer( x, num_units=self.args.squeeze, name='h0' + suf, num_leading_axes=2, W=HeNormal('relu') if not avg else Constant()) # (100, 24, 64) h1 = L.DenseLayer( x, num_units=self.args.nh1, name='h1' + suf, W=HeNormal('relu') if not avg else Constant()) # (100, 512) h1 = L.dropout(h1, self.args.p1) h2 = L.DenseLayer( h1, num_units=self.args.nh2, name='h2' + suf, W=HeNormal('relu') if not avg else Constant()) # (100, 256) h2 = L.dropout(h2, self.args.p2) h3 = L.DenseLayer(h2, num_units=self.args.nh3, name='h3' + suf, W=HeNormal() if not avg else Constant(), nonlinearity=softmax) # (100, 125) num of actions return iw, ic, it, il, h3
def test_memory( game_title='SpaceInvaders-v0', n_parallel_games=3, replay_seq_len=2, ): """ :param game_title: name of atari game in Gym :param n_parallel_games: how many games we run in parallel :param replay_seq_len: how long is one replay session from a batch """ atari = gym.make(game_title) atari.reset() # Game Parameters n_actions = atari.action_space.n observation_shape = (None, ) + atari.observation_space.shape action_names = atari.get_action_meanings() del atari # ##### Agent observations # image observation at current tick goes here observation_layer = InputLayer(observation_shape, name="images input") # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2)) # Agent memory states memory_dict = OrderedDict([]) ###Window window_size = 3 # prev state input prev_window = InputLayer( (None, window_size) + tuple(observation_reshape.output_shape[1:]), name="previous window state") # our window window = WindowAugmentation(observation_reshape, prev_window, name="new window state") # pixel-wise maximum over the temporal window (to avoid flickering) window_max = ExpressionLayer(window, lambda a: a.max(axis=1), output_shape=(None, ) + window.output_shape[2:]) memory_dict[window] = prev_window ###Stack #prev stack stack_w, stack_h = 4, 5 stack_inputs = DenseLayer(observation_reshape, stack_w, name="prev_stack") stack_controls = DenseLayer(observation_reshape, 3, nonlinearity=lasagne.nonlinearities.softmax, name="prev_stack") prev_stack = InputLayer((None, stack_h, stack_w), name="previous stack state") stack = StackAugmentation(stack_inputs, prev_stack, stack_controls) memory_dict[stack] = prev_stack stack_top = lasagne.layers.SliceLayer(stack, 0, 1) ###RNN preset prev_rnn = InputLayer((None, 16), name="previous RNN state") new_rnn = RNNCell(prev_rnn, observation_reshape) memory_dict[new_rnn] = prev_rnn ###GRU preset prev_gru = InputLayer((None, 16), name="previous GRUcell state") new_gru = GRUCell(prev_gru, observation_reshape) memory_dict[new_gru] = prev_gru ###GRUmemorylayer prev_gru1 = InputLayer((None, 15), name="previous GRUcell state") new_gru1 = GRUMemoryLayer(15, observation_reshape, prev_gru1) memory_dict[new_gru1] = prev_gru1 #LSTM with peepholes prev_lstm0_cell = InputLayer( (None, 13), name="previous LSTMCell hidden state [with peepholes]") prev_lstm0_out = InputLayer( (None, 13), name="previous LSTMCell output state [with peepholes]") new_lstm0_cell, new_lstm0_out = LSTMCell( prev_lstm0_cell, prev_lstm0_out, input_or_inputs=observation_reshape, peepholes=True, name="newLSTM1 [with peepholes]") memory_dict[new_lstm0_cell] = prev_lstm0_cell memory_dict[new_lstm0_out] = prev_lstm0_out #LSTM without peepholes prev_lstm1_cell = InputLayer( (None, 14), name="previous LSTMCell hidden state [no peepholes]") prev_lstm1_out = InputLayer( (None, 14), name="previous LSTMCell output state [no peepholes]") new_lstm1_cell, new_lstm1_out = LSTMCell( prev_lstm1_cell, prev_lstm1_out, input_or_inputs=observation_reshape, peepholes=False, name="newLSTM1 [no peepholes]") memory_dict[new_lstm1_cell] = prev_lstm1_cell memory_dict[new_lstm1_out] = prev_lstm1_out ##concat everything for i in [flatten(window_max), stack_top, new_rnn, new_gru, new_gru1]: print(i.output_shape) all_memory = concat([ flatten(window_max), stack_top, new_rnn, new_gru, new_gru1, new_lstm0_out, new_lstm1_out, ]) # ##### Neural network body # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc # a simple lasagne network (try replacing with any other lasagne network and see what works best) nn = DenseLayer(all_memory, num_units=50, name='dense0') # Agent policy and action picking q_eval = DenseLayer(nn, num_units=n_actions, nonlinearity=lasagne.nonlinearities.linear, name="QEvaluator") # resolver resolver = EpsilonGreedyResolver(q_eval, epsilon=0.1, name="resolver") # agent agent = Agent(observation_layer, memory_dict, q_eval, resolver) # Since it's a single lasagne network, one can get it's weights, output, etc weights = lasagne.layers.get_all_params(resolver, trainable=True) # Agent step function print('compiling react') applier_fun = agent.get_react_function() # a nice pythonic interface def step(observation, prev_memories='zeros', batch_size=n_parallel_games): """ returns actions and new states given observation and prev state Prev state in default setup should be [prev window,]""" # default to zeros if prev_memories == 'zeros': prev_memories = [ np.zeros((batch_size, ) + tuple(mem.output_shape[1:]), dtype='float32') for mem in agent.agent_states ] res = applier_fun(np.array(observation), *prev_memories) action = res[0] memories = res[1:] return action, memories # # Create and manage a pool of atari sessions to play with pool = GamePool(game_title, n_parallel_games) observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50) print(np.array(action_names)[np.array(action_log)[:3, :5]]) # # experience replay pool # Create an environment with all default parameters env = SessionPoolEnvironment(observations=observation_layer, actions=resolver, agent_memories=agent.agent_states) def update_pool(env, pool, n_steps=100): """ a function that creates new sessions and ads them into the pool throwing the old ones away entirely for simplicity""" preceding_memory_states = list(pool.prev_memory_states) # get interaction sessions observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact( step, n_steps=n_steps) # load them into experience replay environment env.load_sessions(observation_tensor, action_tensor, reward_tensor, is_alive_tensor, preceding_memory_states) # load first sessions update_pool(env, pool, replay_seq_len) # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them. # ### Training via experience replay # get agent's Q-values obtained via experience replay _env_states, _observations, _memories, _imagined_actions, q_values_sequence = agent.get_sessions( env, session_length=replay_seq_len, batch_size=env.batch_size, optimize_experience_replay=True, ) # Evaluating loss function scaled_reward_seq = env.rewards # For SpaceInvaders, however, not scaling rewards is at least working elwise_mse_loss = qlearning.get_elementwise_objective( q_values_sequence, env.actions[0], scaled_reward_seq, env.is_alive, gamma_or_gammas=0.99, ) # compute mean over "alive" fragments mse_loss = elwise_mse_loss.sum() / env.is_alive.sum() # regularize network weights reg_l2 = regularize_network_params(resolver, l2) * 10**-4 loss = mse_loss + reg_l2 # Compute weight updates updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01) # mean session reward mean_session_reward = env.rewards.sum(axis=1).mean() # # Compile train and evaluation functions print('compiling') train_fun = theano.function([], [loss, mean_session_reward], updates=updates) evaluation_fun = theano.function( [], [loss, mse_loss, reg_l2, mean_session_reward]) print("I've compiled!") # # Training loop for epoch_counter in range(10): update_pool(env, pool, replay_seq_len) loss, avg_reward = train_fun() full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun() print("epoch %i,loss %.5f, rewards: %.5f " % (epoch_counter, full_loss, avg_reward_current)) print("rec %.3f reg %.3f" % (q_loss, l2_penalty))
def get_params_internal(self, **tags): # this gives ALL the vars (not the params values) return L.get_all_params( # this lasagne function also returns all var below the passed layers L.concat(self._output_layers), **tags )
def dense_fused_convnets(self, fusion_level, fusion_type, input_var1=None, input_var2=None, bottleneck_W=None, weights_dir=None): net = OrderedDict() net['input_rgb'] = InputLayer((None, 4, 128, 128), input_var=input_var1) layer = 0 for i in range(self._net_specs_dict['num_conv_layers']): # Add convolution layers net['conv_rgb{0:d}'.format(i + 1)] = Conv2DLayer( net.values()[layer], num_filters=self._net_specs_dict['num_conv_filters'][i], filter_size=(self._net_specs_dict['conv_filter_size'][i], ) * 2, pad='same') layer += 1 if self._net_specs_dict['num_conv_layers'] <= 2: # Add pooling layers net['pool_rgb{0:d}'.format(i + 1)] = MaxPool2DLayer( net.values()[layer], pool_size=(3, 3)) layer += 1 else: if i < 4: if (i + 1) % 2 == 0: # Add pooling layers net['pool_rgb{0:d}'.format(i + 1)] = MaxPool2DLayer( net.values()[layer], pool_size=(3, 3)) layer += 1 else: if (i + 1) == 7: # Add pooling layers net['pool_rgb{0:d}'.format(i + 1)] = MaxPool2DLayer( net.values()[layer], pool_size=(3, 3)) layer += 1 # Fc-layers net['fc1_rgb'] = DenseLayer(net.values()[layer], self._net_specs_dict['num_fc_units'][0]) layer += 1 if fusion_level == 2: # Add dropout layer net['dropout1_rgb'] = dropout(net['fc1_rgb'], p=self._model_hp_dict['p']) layer += 1 net['fc2_rgb'] = DenseLayer( net['dropout1_rgb'], self._net_specs_dict['num_fc_units'][1]) layer += 1 net['input_depth'] = InputLayer((None, 1, 128, 128), input_var=input_var2) layer += 1 for i in range(self._net_specs_dict['num_conv_layers']): # Add convolution layers net['conv_depth{0:d}'.format(i + 1)] = Conv2DLayer( net.values()[layer], num_filters=self._net_specs_dict['num_conv_filters'][i], filter_size=(self._net_specs_dict['conv_filter_size'][i], ) * 2, pad='same') layer += 1 if self._net_specs_dict['num_conv_layers'] <= 2: # Add pooling layers net['pool_depth{0:d}'.format(i + 1)] = MaxPool2DLayer( net.values()[layer], pool_size=(3, 3)) layer += 1 else: if i < 4: if (i + 1) % 2 == 0: # Add pooling layers net['pool_depth{0:d}'.format(i+1)] =\ MaxPool2DLayer(net.values()[layer], pool_size=(3, 3)) layer += 1 else: if (i + 1) == 7: # Add pooling layers net['pool_depth{0:d}'.format(i+1)] =\ MaxPool2DLayer(net.values()[layer], pool_size=(3, 3)) layer += 1 # Fc-layers net['fc1_depth'] = DenseLayer(net.values()[layer], self._net_specs_dict['num_fc_units'][0]) layer += 1 if fusion_level == 2: # Add dropout layer net['dropout1_depth'] = dropout(net['fc1_depth'], p=self._model_hp_dict['p']) layer += 1 net['fc2_depth'] = DenseLayer( net['dropout1_depth'], self._net_specs_dict['num_fc_units'][1]) layer += 1 # Fuse ConvNets by fusion_level and fusion_type if fusion_type == self.MAX: net['merge'] =\ ElemwiseMergeLayer([net['fc%i_rgb' % fusion_level], net['fc%i_depth' % fusion_level]], T.maximum) layer += 1 elif fusion_type == self.SUM: net['merge'] =\ ElemwiseMergeLayer([net['fc%i_rgb' % fusion_level], net['fc%i_depth' % fusion_level]], T.add) layer += 1 elif fusion_type == self.CONCAT: net['merge'] = concat([ net['fc%i_rgb' % fusion_level], net['fc%i_depth' % fusion_level] ]) layer += 1 elif fusion_type == self.CONCATCONV: net['fc%i_rgb_res' % fusion_level] =\ reshape(net['fc%i_rgb' % fusion_level], ([0], 1, [1])) layer += 1 net['fc%i_depth_res' % fusion_level] =\ reshape(net['fc%i_depth' % fusion_level], ([0], 1, [1])) layer += 1 net['concat'] = concat([ net['fc%i_rgb_res' % fusion_level], net['fc%i_depth_res' % fusion_level] ]) layer += 1 net['merge_con'] = Conv1DLayer(net['concat'], num_filters=1, filter_size=(1, ), nonlinearity=None) layer += 1 net['merge'] = reshape(net['merge_con'], ([0], [2])) layer += 1 if fusion_level == 1: # Add dropout layer net['dropout1'] = dropout(net['merge'], p=self._model_hp_dict['p']) layer += 1 net['fc2'] = DenseLayer(net['dropout1'], self._net_specs_dict['num_fc_units'][1]) layer += 1 # Add dropout layer net['dropout2'] = dropout(net['fc2'], p=self._model_hp_dict['p']) layer += 1 else: # Add dropout layer net['dropout2'] = dropout(net['merge'], p=self._model_hp_dict['p']) layer += 1 # Add output layer(linear activation because it's regression) if bottleneck_W is not None: # Add bottleneck layer net['bottleneck'] = DenseLayer(net['dropout2'], 30) # Add output layer(linear activation because it's regression) net['output'] = DenseLayer( net['bottleneck'], 3 * self._num_joints, W=bottleneck_W[0:30], nonlinearity=lasagne.nonlinearities.tanh) else: # Add output layer(linear activation because it's regression) net['output'] = DenseLayer( net['dropout2'], 3 * self._num_joints, nonlinearity=lasagne.nonlinearities.tanh) if weights_dir is not None: lw = LoadWeights(weights_dir, net) lw.load_weights_numpy() return net
def build_network(self, vocab_size, input_var, mask_var, docidx_var, docidx_mask, skip_connect=True): l_in = L.InputLayer(shape=(None, None, 1), input_var=input_var) l_mask = L.InputLayer(shape=(None, None), input_var=mask_var) l_embed = L.EmbeddingLayer(l_in, input_size=vocab_size, output_size=EMBED_DIM, W=self.params['W_emb']) l_embed_noise = L.dropout(l_embed, p=DROPOUT_RATE) # NOTE: Moved initialization of forget gate biases to init_params #forget_gate_1 = L.Gate(b=lasagne.init.Constant(3)) #forget_gate_2 = L.Gate(b=lasagne.init.Constant(3)) # NOTE: LSTM layer provided by Lasagne is slightly different from that used in DeepMind's paper. # In the paper the cell-to-* weights are not diagonal. # the 1st lstm layer in_gate = L.Gate(W_in=self.params['W_lstm1_xi'], W_hid=self.params['W_lstm1_hi'], W_cell=self.params['W_lstm1_ci'], b=self.params['b_lstm1_i'], nonlinearity=lasagne.nonlinearities.sigmoid) forget_gate = L.Gate(W_in=self.params['W_lstm1_xf'], W_hid=self.params['W_lstm1_hf'], W_cell=self.params['W_lstm1_cf'], b=self.params['b_lstm1_f'], nonlinearity=lasagne.nonlinearities.sigmoid) out_gate = L.Gate(W_in=self.params['W_lstm1_xo'], W_hid=self.params['W_lstm1_ho'], W_cell=self.params['W_lstm1_co'], b=self.params['b_lstm1_o'], nonlinearity=lasagne.nonlinearities.sigmoid) cell_gate = L.Gate(W_in=self.params['W_lstm1_xc'], W_hid=self.params['W_lstm1_hc'], W_cell=None, b=self.params['b_lstm1_c'], nonlinearity=lasagne.nonlinearities.tanh) l_fwd_1 = L.LSTMLayer(l_embed_noise, NUM_HIDDEN, ingate=in_gate, forgetgate=forget_gate, cell=cell_gate, outgate=out_gate, peepholes=True, grad_clipping=GRAD_CLIP, mask_input=l_mask, gradient_steps=GRAD_STEPS, precompute_input=True) # the 2nd lstm layer if skip_connect: # construct skip connection from the lookup table to the 2nd layer batch_size, seq_len, _ = input_var.shape # concatenate the last dimension of l_fwd_1 and embed l_fwd_1_shp = L.ReshapeLayer(l_fwd_1, (-1, NUM_HIDDEN)) l_embed_shp = L.ReshapeLayer(l_embed, (-1, EMBED_DIM)) to_next_layer = L.ReshapeLayer( L.concat([l_fwd_1_shp, l_embed_shp], axis=1), (batch_size, seq_len, NUM_HIDDEN + EMBED_DIM)) else: to_next_layer = l_fwd_1 to_next_layer_noise = L.dropout(to_next_layer, p=DROPOUT_RATE) in_gate = L.Gate(W_in=self.params['W_lstm2_xi'], W_hid=self.params['W_lstm2_hi'], W_cell=self.params['W_lstm2_ci'], b=self.params['b_lstm2_i'], nonlinearity=lasagne.nonlinearities.sigmoid) forget_gate = L.Gate(W_in=self.params['W_lstm2_xf'], W_hid=self.params['W_lstm2_hf'], W_cell=self.params['W_lstm2_cf'], b=self.params['b_lstm2_f'], nonlinearity=lasagne.nonlinearities.sigmoid) out_gate = L.Gate(W_in=self.params['W_lstm2_xo'], W_hid=self.params['W_lstm2_ho'], W_cell=self.params['W_lstm2_co'], b=self.params['b_lstm2_o'], nonlinearity=lasagne.nonlinearities.sigmoid) cell_gate = L.Gate(W_in=self.params['W_lstm2_xc'], W_hid=self.params['W_lstm2_hc'], W_cell=None, b=self.params['b_lstm2_c'], nonlinearity=lasagne.nonlinearities.tanh) l_fwd_2 = L.LSTMLayer(to_next_layer_noise, NUM_HIDDEN, ingate=in_gate, forgetgate=forget_gate, cell=cell_gate, outgate=out_gate, peepholes=True, grad_clipping=GRAD_CLIP, mask_input=l_mask, gradient_steps=GRAD_STEPS, precompute_input=True) # slice final states of both lstm layers l_fwd_1_slice = L.SliceLayer(l_fwd_1, -1, 1) l_fwd_2_slice = L.SliceLayer(l_fwd_2, -1, 1) # g will be used to score the words based on their embeddings g = L.DenseLayer(L.concat([l_fwd_1_slice, l_fwd_2_slice], axis=1), num_units=EMBED_DIM, W=self.params['W_dense'], b=self.params['b_dense'], nonlinearity=lasagne.nonlinearities.tanh) ## get outputs #g_out = L.get_output(g) # B x D #g_out_val = L.get_output(g, deterministic=True) # B x D ## compute softmax probs #probs,_ = theano.scan(fn=lambda g,d,dm,W: T.nnet.softmax(T.dot(g,W[d,:].T)*dm), # outputs_info=None, # sequences=[g_out,docidx_var,docidx_mask], # non_sequences=self.params['W_emb']) #predicted_probs = probs.reshape(docidx_var.shape) # B x N #probs_val,_ = theano.scan(fn=lambda g,d,dm,W: T.nnet.softmax(T.dot(g,W[d,:].T)*dm), # outputs_info=None, # sequences=[g_out_val,docidx_var,docidx_mask], # non_sequences=self.params['W_emb']) #predicted_probs_val = probs_val.reshape(docidx_var.shape) # B x N #return predicted_probs, predicted_probs_val # W is shared with the lookup table l_out = L.DenseLayer(g, num_units=vocab_size, W=self.params['W_emb'].T, nonlinearity=lasagne.nonlinearities.softmax, b=None) return l_out
def fused_convnets(self, fusion_level, fusion_type, input_var1=None, input_var2=None, bottleneck_W=None, weights_dir=None): net = OrderedDict() net['input_rgb'] = InputLayer((None, 4, 128, 128), input_var=input_var1) layer = 0 for i in range(fusion_level): # Add convolution layers net['conv_rgb{0:d}'.format(i + 1)] = Conv2DLayer( net.values()[layer], num_filters=self._net_specs_dict['num_conv_filters'][i], filter_size=(self._net_specs_dict['conv_filter_size'][i], ) * 2, pad='same') layer += 1 if self._net_specs_dict['num_conv_layers'] <= 2 and\ i != fusion_level - 1: # Add pooling layers net['pool_rgb{0:d}'.format(i + 1)] = MaxPool2DLayer( net.values()[layer], pool_size=(3, 3)) layer += 1 else: if i < 4: if (i + 1) % 2 == 0 and i != fusion_level - 1: # Add pooling layers net['pool_rgb{0:d}'.format(i + 1)] = MaxPool2DLayer( net.values()[layer], pool_size=(3, 3)) layer += 1 else: if (i + 1) == 7 and i != fusion_level - 1: # Add pooling layers net['pool_rgb{0:d}'.format(i + 1)] = MaxPool2DLayer( net.values()[layer], pool_size=(3, 3)) layer += 1 net['input_depth'] = InputLayer((None, 1, 128, 128), input_var=input_var2) layer += 1 for i in range(fusion_level): # Add convolution layers net['conv_depth{0:d}'.format(i + 1)] = Conv2DLayer( net.values()[layer], num_filters=self._net_specs_dict['num_conv_filters'][i], filter_size=(self._net_specs_dict['conv_filter_size'][i], ) * 2, pad='same') layer += 1 if self._net_specs_dict['num_conv_layers'] <= 2 and\ i != fusion_level - 1: # Add pooling layers net['pool_depth{0:d}'.format(i + 1)] = MaxPool2DLayer( net.values()[layer], pool_size=(3, 3)) layer += 1 else: if i < 4: if (i + 1) % 2 == 0 and i != fusion_level - 1: # Add pooling layers net['pool_depth{0:d}'.format(i+1)] =\ MaxPool2DLayer(net.values()[layer], pool_size=(3, 3)) layer += 1 else: if (i + 1) == 7 and i != fusion_level - 1: # Add pooling layers net['pool_depth{0:d}'.format(i+1)] =\ MaxPool2DLayer(net.values()[layer], pool_size=(3, 3)) layer += 1 # Fuse ConvNets by fusion_level and fusion_type if fusion_type == self.MAX: net['merge'] =\ ElemwiseMergeLayer([net['conv_rgb{0:d}'.format(fusion_level)], net['conv_depth{0:d}'.format(fusion_level)] ], T.maximum) layer += 1 elif fusion_type == self.SUM: net['merge'] =\ ElemwiseMergeLayer([net['conv_rgb{0:d}'.format(fusion_level)], net['conv_depth{0:d}'.format(fusion_level)] ], T.add) layer += 1 elif fusion_type == self.CONCAT: net['merge'] = concat([ net['conv_rgb{0:d}'.format(fusion_level)], net['conv_depth{0:d}'.format(fusion_level)] ]) layer += 1 elif fusion_type == self.CONCATCONV: net['concat'] = concat([ net['conv_rgb{0:d}'.format(fusion_level)], net['conv_depth{0:d}'.format(fusion_level)] ]) layer += 1 net['merge'] = Conv2DLayer( net['concat'], num_filters=self._net_specs_dict['num_conv_filters'][ fusion_level - 1], filter_size=(1, 1), nonlinearity=None) layer += 1 # Max-pooling to the merged if fusion_level in [2, 4, 7]: net['pool_merged'] = MaxPool2DLayer(net['merge'], pool_size=(3, 3)) layer += 1 # Continue the rest of the convolutional part of the network, # if the fusion took place before the last convolutional layer, # else just connect the convolutional part with the fully connected # part if self._net_specs_dict['num_conv_layers'] > fusion_level: for i in range(fusion_level, self._net_specs_dict['num_conv_layers']): # Add convolution layers net['conv_merged{0:d}'.format(i + 1)] = Conv2DLayer( net.values()[layer], num_filters=self._net_specs_dict['num_conv_filters'][i], filter_size=(self._net_specs_dict['conv_filter_size'][i], ) * 2, pad='same') layer += 1 if self._net_specs_dict['num_conv_layers'] <= 2: # Add pooling layers net['pool_merged{0:d}'.format(i + 1)] = MaxPool2DLayer( net.values()[layer], pool_size=(3, 3)) layer += 1 else: if i < 4: if (i + 1) % 2 == 0: # Add pooling layers net['pool_merged{0:d}'.format(i+1)] =\ MaxPool2DLayer(net.values()[layer], pool_size=(3, 3)) layer += 1 else: if (i + 1) == 7: # Add pooling layers net['pool_merged{0:d}'.format(i+1)] =\ MaxPool2DLayer(net.values()[layer], pool_size=(3, 3)) layer += 1 # Fc-layers net['fc1'] = DenseLayer(net.values()[layer], self._net_specs_dict['num_fc_units'][0]) # Add dropout layer net['dropout1'] = dropout(net['fc1'], p=self._model_hp_dict['p']) net['fc2'] = DenseLayer(net['dropout1'], self._net_specs_dict['num_fc_units'][1]) # Add dropout layer net['dropout2'] = dropout(net['fc2'], p=self._model_hp_dict['p']) if bottleneck_W is not None: # Add bottleneck layer net['bottleneck'] = DenseLayer(net['dropout2'], 30) # Add output layer(linear activation because it's regression) net['output'] = DenseLayer( net['bottleneck'], 3 * self._num_joints, W=bottleneck_W[0:30], nonlinearity=lasagne.nonlinearities.tanh) else: # Add output layer(linear activation because it's regression) net['output'] = DenseLayer( net['dropout2'], 3 * self._num_joints, nonlinearity=lasagne.nonlinearities.tanh) if weights_dir is not None: lw = LoadWeights(weights_dir, net) lw.load_weights_numpy() return net
def build_generator_lstm(input_var, noise_size, cond_var=None, n_conds=0, arch='lstm', with_BatchNorm=True, batch_size=None, n_steps=None): from lasagne.layers import ( InputLayer, DenseLayer, LSTMLayer, ReshapeLayer, DimshuffleLayer, concat, ExpressionLayer, NonlinearityLayer, DropoutLayer) from lasagne.init import Constant, HeNormal from lasagne.nonlinearities import rectify, softmax non_lin = rectify layer = InputLayer( shape=(batch_size, n_steps, noise_size), input_var=input_var) if cond_var is not None: layer = BatchNorm(DenseLayer( layer, noise_size, nonlinearity=non_lin), with_BatchNorm) layer = concat( [layer, InputLayer(shape=(batch_size, n_steps, n_conds), input_var=cond_var)]) if arch == 'lstm': layer = batch_norm(DenseLayer(layer, 1024, num_leading_axes=2)) # recurrent layers for bidirectional network l_forward_noise = BatchNorm(LSTMLayer( layer, 512, learn_init=True, grad_clipping=100, only_return_final=False), with_BatchNorm) l_backward_noise = BatchNorm(LSTMLayer( layer, 512, learn_init=True, grad_clipping=100, only_return_final=False, backwards=True), with_BatchNorm) layer = concat([l_forward_noise, l_backward_noise], axis=2) # dense layers layer = BatchNorm(DenseLayer( layer, 1024, num_leading_axes=2), with_BatchNorm) layer = BatchNorm(DenseLayer( layer, 128, num_leading_axes=2), with_BatchNorm) # reshape to apply softmax per timestep layer = ReshapeLayer(layer, (-1, [2])) layer = NonlinearityLayer(layer, softmax) layer = ReshapeLayer(layer, (input_var.shape[0], -1, [1])) layer = DimshuffleLayer(layer, (0, 'x', 2, 1)) layer = ExpressionLayer(layer, lambda X: X*2 - 1) elif arch == 1: # input layers l_in = InputLayer( shape=params['input_shape'], input_var=params['input_var'], name='g_in') l_noise = InputLayer( shape=params['noise_shape'], input_var=params['noise_var'], name='g_noise') l_cond = InputLayer( shape=params['cond_shape'], input_var=params['cond_var'], name='g_cond') l_mask = InputLayer( shape=params['mask_shape'], input_var=params['mask_var'], name='g_mask') # recurrent layers for bidirectional network l_forward_data = LSTMLayer( l_in, params['n_units'][0], mask_input=l_mask, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True, grad_clipping=params['grad_clip'], only_return_final=False, nonlinearity=params['non_linearities'][0]) l_forward_noise = LSTMLayer( l_noise, params['n_units'][0], mask_input=l_mask, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True, grad_clipping=params['grad_clip'], only_return_final=False, nonlinearity=params['non_linearities'][1]) l_backward_data = LSTMLayer( l_in, params['n_units'][0], mask_input=l_mask, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True, grad_clipping=params['grad_clip'], only_return_final=False, backwards=True, nonlinearity=params['non_linearities'][0]) l_backward_noise = LSTMLayer( l_noise, params['n_units'][0], mask_input=l_mask, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, learn_init=True, grad_clipping=params['grad_clip'], only_return_final=False, backwards=True, nonlinearity=params['non_linearities'][1]) # concatenate output of forward and backward layers l_lstm_concat = concat( [l_forward_data, l_forward_noise, l_backward_data, l_backward_noise], axis=2) # dense layer on output of data and noise lstms, w/dropout l_lstm_dense = DenseLayer( DropoutLayer(l_lstm_concat, p=0.5), num_units=params['n_units'][1], num_leading_axes=2, W=HeNormal(gain='relu'), b=Constant(0.1), nonlinearity=params['non_linearities'][2]) # batch norm for lstm dense # l_lstm_dense = lasagne.layer.BatchNorm(l_lstm_dense) # concatenate dense layer of lstsm with condition l_lstm_cond_concat = concat( [l_lstm_dense, l_cond], axis=2) # dense layer with dense layer lstm and condition, w/dropout l_out = DenseLayer( DropoutLayer(l_lstm_cond_concat, p=0.5), num_units=params['n_units'][2], num_leading_axes=2, W=HeNormal(gain=1.0), b=Constant(0.1), nonlinearity=params['non_linearities'][3]) elif arch == 2: raise Exception("arch 2 not implemented") elif arch == 3: raise Exception("arch 2 not implemented") print("Generator output:", layer.output_shape) return layer
def build_network(self, K, vocab_size, W_init): l_docin = L.InputLayer(shape=(None, None, 1), input_var=self.inps[0]) l_doctokin = L.InputLayer(shape=(None, None), input_var=self.inps[1]) l_qin = L.InputLayer(shape=(None, None, 1), input_var=self.inps[2]) l_qtokin = L.InputLayer(shape=(None, None), input_var=self.inps[3]) l_docmask = L.InputLayer(shape=(None, None), input_var=self.inps[6]) l_qmask = L.InputLayer(shape=(None, None), input_var=self.inps[7]) l_tokin = L.InputLayer(shape=(None, MAX_WORD_LEN), input_var=self.inps[8]) l_tokmask = L.InputLayer(shape=(None, MAX_WORD_LEN), input_var=self.inps[9]) l_featin = L.InputLayer(shape=(None, None), input_var=self.inps[11]) doc_shp = self.inps[1].shape qry_shp = self.inps[3].shape l_docembed = L.EmbeddingLayer(l_docin, input_size=vocab_size, output_size=self.embed_dim, W=W_init) # B x N x 1 x DE l_doce = L.ReshapeLayer( l_docembed, (doc_shp[0], doc_shp[1], self.embed_dim)) # B x N x DE l_qembed = L.EmbeddingLayer(l_qin, input_size=vocab_size, output_size=self.embed_dim, W=l_docembed.W) l_qembed = L.ReshapeLayer( l_qembed, (qry_shp[0], qry_shp[1], self.embed_dim)) # B x N x DE l_fembed = L.EmbeddingLayer(l_featin, input_size=2, output_size=2) # B x N x 2 if self.train_emb == 0: l_docembed.params[l_docembed.W].remove('trainable') # char embeddings if self.use_chars: l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, 2 * self.char_dim) # T x L x D l_fgru = L.GRULayer(l_lookup, self.char_dim, grad_clipping=GRAD_CLIP, mask_input=l_tokmask, gradient_steps=GRAD_STEPS, precompute_input=True, only_return_final=True) l_bgru = L.GRULayer(l_lookup, 2 * self.char_dim, grad_clipping=GRAD_CLIP, mask_input=l_tokmask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True, only_return_final=True) # T x 2D l_fwdembed = L.DenseLayer(l_fgru, self.embed_dim / 2, nonlinearity=None) # T x DE/2 l_bckembed = L.DenseLayer(l_bgru, self.embed_dim / 2, nonlinearity=None) # T x DE/2 l_embed = L.ElemwiseSumLayer([l_fwdembed, l_bckembed], coeffs=1) l_docchar_embed = IndexLayer([l_doctokin, l_embed]) # B x N x DE/2 l_qchar_embed = IndexLayer([l_qtokin, l_embed]) # B x Q x DE/2 l_doce = L.ConcatLayer([l_doce, l_docchar_embed], axis=2) l_qembed = L.ConcatLayer([l_qembed, l_qchar_embed], axis=2) l_fwd_q = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True, only_return_final=False) l_bkd_q = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True, only_return_final=False) l_q = L.ConcatLayer([l_fwd_q, l_bkd_q]) # B x Q x 2D q = L.get_output(l_q) # B x Q x 2D q = q[T.arange(q.shape[0]), self.inps[12], :] # B x 2D l_qs = [l_q] for i in range(K - 1): l_fwd_doc_1 = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_doc_1 = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \ backwards=True) l_doc_1 = L.concat([l_fwd_doc_1, l_bkd_doc_1], axis=2) # B x N x DE l_fwd_q_1 = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_q_1 = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True) l_q_c_1 = L.ConcatLayer([l_fwd_q_1, l_bkd_q_1], axis=2) # B x Q x DE l_qs.append(l_q_c_1) qd = L.get_output(l_q_c_1) # B x Q x DE dd = L.get_output(l_doc_1) # B x N x DE M = T.batched_dot(dd, qd.dimshuffle((0, 2, 1))) # B x N x Q alphas = T.nnet.softmax( T.reshape(M, (M.shape[0] * M.shape[1], M.shape[2]))) alphas_r = T.reshape(alphas, (M.shape[0],M.shape[1],M.shape[2]))* \ self.inps[7][:,np.newaxis,:] # B x N x Q alphas_r = alphas_r / alphas_r.sum(axis=2)[:, :, np.newaxis] # B x N x Q q_rep = T.batched_dot(alphas_r, qd) # B x N x DE l_q_rep_in = L.InputLayer(shape=(None, None, 2 * self.nhidden), input_var=q_rep) l_doc_2_in = L.ElemwiseMergeLayer([l_doc_1, l_q_rep_in], T.mul) l_doce = L.dropout(l_doc_2_in, p=self.dropout) # B x N x DE if self.use_feat: l_doce = L.ConcatLayer([l_doce, l_fembed], axis=2) # B x N x DE+2 l_fwd_doc = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True) l_bkd_doc = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \ backwards=True) l_doc = L.concat([l_fwd_doc, l_bkd_doc], axis=2) d = L.get_output(l_doc) # B x N x 2D p = T.batched_dot(d, q) # B x N pm = T.nnet.softmax(p) * self.inps[10] pm = pm / pm.sum(axis=1)[:, np.newaxis] final = T.batched_dot(pm, self.inps[4]) dv = L.get_output(l_doc, deterministic=True) # B x N x 2D p = T.batched_dot(dv, q) # B x N pm = T.nnet.softmax(p) * self.inps[10] pm = pm / pm.sum(axis=1)[:, np.newaxis] final_v = T.batched_dot(pm, self.inps[4]) return final, final_v, l_doc, l_qs, l_docembed.W
def build_generator(input_var, noise_size, cond_var=None, n_conds=0, arch=0, with_BatchNorm=True, batch_size=None, n_steps=None): from lasagne.layers import InputLayer, ReshapeLayer, DenseLayer, concat from lasagne.layers import Upscale2DLayer, Conv2DLayer from lasagne.layers import TransposedConv2DLayer as Deconv2DLayer from lasagne.nonlinearities import LeakyRectify, rectify from lasagne.init import GlorotUniform, Normal, Orthogonal # non_lin = LeakyRectify(0.01) non_lin = rectify # init = Orthogonal(np.sqrt(2/(1+0.01**2))) init = Normal(0.02, 0.0) # init = GlorotUniform() layer = InputLayer(shape=(batch_size, noise_size), input_var=input_var) if cond_var is not None: layer = BatchNorm(DenseLayer( layer, noise_size, nonlinearity=non_lin), with_BatchNorm) layer = concat([ layer, InputLayer(shape=(batch_size, n_conds), input_var=cond_var)]) if arch == 'dcgan': # DCGAN layer = BatchNorm(DenseLayer( layer, 1024*4*4, W=init, b=None, nonlinearity=non_lin)) layer = ReshapeLayer(layer, ([0], 1024, 4, 4)) layer = BatchNorm(Deconv2DLayer( layer, 512, 5, stride=2, crop=(2, 2), W=init, b=None, output_size=8, nonlinearity=non_lin), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 256, 5, stride=2, crop=(2, 2), W=init, b=None, output_size=16, nonlinearity=non_lin), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 128, 5, stride=2, crop=(2, 2), W=init, b=None, output_size=32, nonlinearity=non_lin), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 64, 5, stride=2, crop=(2, 2), W=init, b=None, output_size=64, nonlinearity=non_lin), with_BatchNorm) layer = Deconv2DLayer( layer, 1, 5, stride=2, crop=(2, 2), W=init, b=None, output_size=128, nonlinearity=tanh_temperature) elif arch == 'mnist': # Jan Schluechter MNIST generator # fully-connected layers layer = BatchNorm(DenseLayer( layer, 1024, W=init, b=None), with_BatchNorm) # project and reshape layer = BatchNorm(DenseLayer( layer, 1024*8*8, W=init, b=None), with_BatchNorm) layer = ReshapeLayer(layer, ([0], 1024, 8, 8)) # fractional-stride convolutions layer = BatchNorm(Deconv2DLayer( layer, 512, 5, stride=2, crop='same', W=init, b=None, output_size=16, nonlinearity=non_lin), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 256, 5, stride=2, crop='same', W=init, b=None, output_size=32, nonlinearity=non_lin), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 128, 5, stride=2, crop='same', W=init, b=None, output_size=64, nonlinearity=non_lin), with_BatchNorm) layer = Deconv2DLayer( layer, 1, 5, stride=2, crop='same', W=init, b=None, output_size=128, nonlinearity=tanh_temperature) elif 'cont-enc': # build generator from concatenated prefix and noise features layer = ReshapeLayer(layer, ([0], layer.output_shape[1], 1, 1)) layer = BatchNorm(Deconv2DLayer( layer, 1024, 4, stride=1, crop=0, W=init), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 512, 4, stride=2, crop=1, W=init), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 256, 4, stride=2, crop=1, W=init), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 128, 4, stride=2, crop=1, W=init), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 128, 4, stride=2, crop=1, W=init), with_BatchNorm) layer = Deconv2DLayer( layer, 1, 4, stride=2, crop=1, W=init, nonlinearity=tanh_temperature) elif 'lsgan': layer = batch_norm(DenseLayer(layer, 1024)) layer = batch_norm(DenseLayer(layer, 1024*8*8)) layer = ReshapeLayer(layer, ([0], 1024, 8, 8)) layer = batch_norm(Deconv2DLayer( layer, 256, 5, stride=2, crop='same', output_size=16)) layer = batch_norm(Deconv2DLayer( layer, 256, 5, stride=2, crop='same', output_size=32)) layer = batch_norm(Deconv2DLayer( layer, 256, 5, stride=2, crop='same', output_size=64)) layer = Deconv2DLayer( layer, 1, 5, stride=2, crop='same', output_size=128, nonlinearity=tanh_temperature) elif arch == 2: # non-overlapping transposed convolutions # fully-connected layers layer = BatchNorm(DenseLayer( layer, 1024, W=init, b=None), with_BatchNorm) layer = BatchNorm(DenseLayer( layer, 1024, W=init, b=None), with_BatchNorm) # project and reshape layer = BatchNorm(DenseLayer(layer, 256*36*36), with_BatchNorm) layer = ReshapeLayer(layer, ([0], 256, 36, 36)) # two fractional-stride convolutions layer = BatchNorm(Deconv2DLayer( layer, 256, 4, stride=2, crop='full', b=None, nonlinearity=non_lin), with_BatchNorm) layer = Deconv2DLayer( layer, 1, 8, stride=2, crop='full', b=None, nonlinearity=tanh_temperature) elif arch == 3: # resize-convolution, more full layer weights less convolutions # fully-connected layers layer = BatchNorm(DenseLayer( layer, 1024, W=init, b=None), with_BatchNorm) layer = BatchNorm(DenseLayer( layer, 1024, W=init, b=None), with_BatchNorm) # project and reshape layer = BatchNorm(DenseLayer(layer, 32*68*68), with_BatchNorm) layer = ReshapeLayer(layer, ([0], 32, 68, 68)) # resize-convolutions layer = BatchNorm(Conv2DLayer( layer, 256, 3, stride=1, pad='valid'), with_BatchNorm) layer = Upscale2DLayer(layer, (2, 2)) layer = Conv2DLayer( layer, 1, 5, stride=1, pad='valid', nonlinearity=tanh_temperature) elif arch == 4: # resize-convolution, less full layer weights more convolutions # fully-connected layers layer = BatchNorm(DenseLayer( layer, 1024, W=init, b=None), with_BatchNorm) layer = BatchNorm(DenseLayer( layer, 1024, W=init, b=None), with_BatchNorm) # project and reshape layer = BatchNorm(DenseLayer(layer, 128*18*18), with_BatchNorm) layer = ReshapeLayer(layer, ([0], 128, 18, 18)) # resize-convolutions layer = Upscale2DLayer(layer, (2, 2), mode='bilinear') layer = BatchNorm(Conv2DLayer( layer, 256, 3, stride=1, pad='valid', nonlinearity=non_lin), with_BatchNorm) layer = Upscale2DLayer(layer, (2, 2), mode='bilinear') layer = BatchNorm(Conv2DLayer( layer, 256, 3, stride=1, pad='valid', nonlinearity=non_lin), with_BatchNorm) layer = Upscale2DLayer(layer, (2, 2), mode='bilinear') layer = Conv2DLayer( layer, 1, 5, stride=1, pad='valid', nonlinearity=tanh_temperature) elif arch == 'crepe_up': # CREPE transposed with upscaling # fully-connected layers layer = BatchNorm(DenseLayer( layer, 1024, W=init, b=None), with_BatchNorm) layer = BatchNorm(DenseLayer( layer, 1024, W=init, b=None), with_BatchNorm) # project and reshape layer = BatchNorm(DenseLayer(layer, 2**15*1*3), with_BatchNorm) layer = ReshapeLayer(layer, ([0], 2**15, 1, 3)) # temporal convolutions layer = BatchNorm(Deconv2DLayer( layer, 256, (1, 3), stride=1, crop=0, W=init, b=None, nonlinearity=non_lin), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 256, (1, 3), stride=1, crop=0, W=init, b=None, nonlinearity=non_lin), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 256, (1, 3), stride=1, crop=0, W=init, b=None, nonlinearity=non_lin), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 256, (1, 3), stride=1, crop=0, W=init, b=None, nonlinearity=non_lin), with_BatchNorm) layer = Upscale2DLayer(layer, (1, 3), mode='repeat') layer = BatchNorm(Deconv2DLayer( layer, 512, (1, 9), stride=1, crop=0, W=init, b=None, nonlinearity=non_lin), with_BatchNorm) layer = Upscale2DLayer(layer, (1, 3), mode='repeat') layer = Deconv2DLayer( layer, 1, (128, 6), stride=1, crop=0, W=init, b=None, nonlinearity=tanh_temperature) elif arch == 'crepe_noup_a': # CREPE transposed no upscaling # fully-connected layer layer = BatchNorm(DenseLayer( layer, 1024, W=init, b=None), with_BatchNorm) # project and reshape layer = BatchNorm(DenseLayer( layer, 1024*1*3, W=init, b=None), with_BatchNorm) layer = ReshapeLayer(layer, ([0], 1024, 1, 3)) # temporal convolutions layer = BatchNorm(Deconv2DLayer( layer, 256, (1, 3), stride=1, crop=0, W=init, b=None, nonlinearity=non_lin), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 256, (1, 3), stride=1, crop=0, W=init, b=None, nonlinearity=non_lin), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 256, (1, 3), stride=1, crop=0, W=init, b=None, nonlinearity=non_lin), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 256, (1, 3), stride=1, crop=0, W=init, b=None, nonlinearity=non_lin), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 256, (1, 3), stride=1, crop=0, W=init, b=None, nonlinearity=non_lin), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 512, (1, 7), stride=1, crop=0, W=init, b=None, nonlinearity=non_lin), with_BatchNorm) layer = BatchNorm(Deconv2DLayer( layer, 1024, (128, 7), stride=3, crop=0, W=init, b=None, nonlinearity=non_lin), with_BatchNorm) layer = Deconv2DLayer( layer, 1, (1, 8), stride=1, crop=0, W=init, b=None, nonlinearity=tanh_temperature) elif arch == 'crepe_noup_b': # CREPE transposed no upscaling # fully-connected layer layer = BatchNorm(DenseLayer(layer, 1024)) # project and reshape layer = BatchNorm(DenseLayer(layer, 1024*1*3)) layer = ReshapeLayer(layer, ([0], 1024, 1, 3)) # temporal convolutions layer = BatchNorm(Deconv2DLayer( layer, 256, (1, 3), stride=1, crop=0, nonlinearity=non_lin)) layer = BatchNorm(Deconv2DLayer( layer, 256, (1, 3), stride=1, crop=0, nonlinearity=non_lin)) layer = BatchNorm(Deconv2DLayer( layer, 256, (1, 3), stride=1, crop=0, nonlinearity=non_lin)) layer = BatchNorm(Deconv2DLayer( layer, 256, (1, 3), stride=1, crop=0, nonlinearity=non_lin)) layer = BatchNorm(Deconv2DLayer( layer, 256, (1, 3), stride=3, crop=0, nonlinearity=non_lin)) layer = Deconv2DLayer( layer, 512, (1, 9), stride=1, crop=0, nonlinearity=non_lin) layer = Deconv2DLayer( layer, 1, (128, 8), stride=3, crop=0, nonlinearity=tanh_temperature) else: return None print("Generator output:", layer.output_shape) return layer
def reference_model(): net = {} net['data'] = InputLayer(shape=(None, 3, 227, 227)) # conv1 net['conv1'] = Conv2DLayer( net['data'], num_filters=96, filter_size=(11, 11), stride = 4, nonlinearity=lasagne.nonlinearities.rectify) # pool1 net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=(3, 3), stride=2) # norm1 net['norm1'] = LocalResponseNormalization2DLayer(net['pool1'], n=5, alpha=0.0001/5.0, beta = 0.75, k=1) # conv2 # The caffe reference model uses a parameter called group. # This parameter splits input to the convolutional layer. # The first half of the filters operate on the first half # of the input from the previous layer. Similarly, the # second half operate on the second half of the input. # # Lasagne does not have this group parameter, but we can # do it ourselves. # # see https://github.com/BVLC/caffe/issues/778 # also see https://code.google.com/p/cuda-convnet/wiki/LayerParams # before conv2 split the data net['conv2_data1'] = SliceLayer(net['norm1'], indices=slice(0, 48), axis=1) net['conv2_data2'] = SliceLayer(net['norm1'], indices=slice(48,96), axis=1) # now do the convolutions net['conv2_part1'] = Conv2DLayer(net['conv2_data1'], num_filters=128, filter_size=(5, 5), pad = 2) net['conv2_part2'] = Conv2DLayer(net['conv2_data2'], num_filters=128, filter_size=(5,5), pad = 2) # now combine net['conv2'] = concat((net['conv2_part1'],net['conv2_part2']),axis=1) # pool2 net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=(3, 3), stride = 2) # norm2 net['norm2'] = LocalResponseNormalization2DLayer(net['pool2'], n=5, alpha=0.0001/5.0, beta = 0.75, k=1) # conv3 # no group net['conv3'] = Conv2DLayer(net['norm2'], num_filters=384, filter_size=(3, 3), pad = 1) # conv4 # group = 2 net['conv4_data1'] = SliceLayer(net['conv3'], indices=slice(0, 192), axis=1) net['conv4_data2'] = SliceLayer(net['conv3'], indices=slice(192,384), axis=1) net['conv4_part1'] = Conv2DLayer(net['conv4_data1'], num_filters=192, filter_size=(3, 3), pad = 1) net['conv4_part2'] = Conv2DLayer(net['conv4_data2'], num_filters=192, filter_size=(3,3), pad = 1) net['conv4'] = concat((net['conv4_part1'],net['conv4_part2']),axis=1) # conv5 # group 2 net['conv5_data1'] = SliceLayer(net['conv4'], indices=slice(0, 192), axis=1) net['conv5_data2'] = SliceLayer(net['conv4'], indices=slice(192,384), axis=1) net['conv5_part1'] = Conv2DLayer(net['conv5_data1'], num_filters=128, filter_size=(3, 3), pad = 1) net['conv5_part2'] = Conv2DLayer(net['conv5_data2'], num_filters=128, filter_size=(3,3), pad = 1) net['conv5'] = concat((net['conv5_part1'],net['conv5_part2']),axis=1) # pool 5 net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=(3, 3), stride = 2) # fc6 net['fc6'] = DenseLayer( net['pool5'],num_units=4096, nonlinearity=lasagne.nonlinearities.rectify) # fc7 net['fc7'] = DenseLayer( net['fc6'], num_units=4096, nonlinearity=lasagne.nonlinearities.rectify) # fc8 net['fc8'] = DenseLayer( net['fc7'], num_units=1000, nonlinearity=lasagne.nonlinearities.softmax) return net
def get_params_internal(self, **tags): return L.get_all_params( L.concat(self._output_layers), **tags )#, key=lambda x: x.name)
def build_model(x=None, layer='fc8', shape=(None, 3, 227, 227), up_scale=4): net = {'data': InputLayer(shape=shape, input_var=x)} net['data_s'] = Upscale2DLayer(net['data'], up_scale) net['conv1'] = Conv2DLayer(net['data_s'], num_filters=96, filter_size=(11, 11), stride=4, nonlinearity=lasagne.nonlinearities.rectify) if layer is 'conv1': return net # pool1 net['pool1'] = MaxPool2DLayer(net['conv1'], pool_size=(3, 3), stride=2) # norm1 net['norm1'] = LocalResponseNormalization2DLayer(net['pool1'], n=5, alpha=0.0001 / 5.0, beta=0.75, k=1) # conv2 # before conv2 split the data net['conv2_data1'] = SliceLayer(net['norm1'], indices=slice(0, 48), axis=1) net['conv2_data2'] = SliceLayer(net['norm1'], indices=slice(48, 96), axis=1) # now do the convolutions net['conv2_part1'] = Conv2DLayer(net['conv2_data1'], num_filters=128, filter_size=(5, 5), pad=2) net['conv2_part2'] = Conv2DLayer(net['conv2_data2'], num_filters=128, filter_size=(5, 5), pad=2) # now combine net['conv2'] = concat((net['conv2_part1'], net['conv2_part2']), axis=1) if layer is 'conv2': return net # pool2 net['pool2'] = MaxPool2DLayer(net['conv2'], pool_size=(3, 3), stride=2) # norm2 net['norm2'] = LocalResponseNormalization2DLayer(net['pool2'], n=5, alpha=0.0001 / 5.0, beta=0.75, k=1) # conv3 # no group net['conv3'] = Conv2DLayer(net['norm2'], num_filters=384, filter_size=(3, 3), pad=1) if layer is 'conv3': return net # conv4 net['conv4_data1'] = SliceLayer(net['conv3'], indices=slice(0, 192), axis=1) net['conv4_data2'] = SliceLayer(net['conv3'], indices=slice(192, 384), axis=1) net['conv4_part1'] = Conv2DLayer(net['conv4_data1'], num_filters=192, filter_size=(3, 3), pad=1) net['conv4_part2'] = Conv2DLayer(net['conv4_data2'], num_filters=192, filter_size=(3, 3), pad=1) net['conv4'] = concat((net['conv4_part1'], net['conv4_part2']), axis=1) if layer is 'conv4': return net # conv5 # group 2 net['conv5_data1'] = SliceLayer(net['conv4'], indices=slice(0, 192), axis=1) net['conv5_data2'] = SliceLayer(net['conv4'], indices=slice(192, 384), axis=1) net['conv5_part1'] = Conv2DLayer(net['conv5_data1'], num_filters=128, filter_size=(3, 3), pad=1) net['conv5_part2'] = Conv2DLayer(net['conv5_data2'], num_filters=128, filter_size=(3, 3), pad=1) net['conv5'] = concat((net['conv5_part1'], net['conv5_part2']), axis=1) if layer is 'conv5': return net # pool 5 net['pool5'] = MaxPool2DLayer(net['conv5'], pool_size=(3, 3), stride=2) # fc6 net['fc6'] = DenseLayer(net['pool5'], num_units=4096, nonlinearity=lasagne.nonlinearities.rectify) if layer is 'fc6': return net # fc7 net['fc7'] = DenseLayer(net['fc6'], num_units=4096, nonlinearity=lasagne.nonlinearities.rectify) if layer is 'fc7': return net # fc8 net['fc8'] = DenseLayer(net['fc7'], num_units=1000, nonlinearity=lasagne.nonlinearities.softmax) if layer is 'fc8': # st() return net
def build_critic(input_var=None, cond_var=None, n_conds=0, arch=0, with_BatchNorm=True, loss_type='wgan'): from lasagne.layers import ( InputLayer, Conv2DLayer, DenseLayer, MaxPool2DLayer, concat, dropout, flatten) from lasagne.nonlinearities import rectify, LeakyRectify from lasagne.init import GlorotUniform # Normal lrelu = LeakyRectify(0.2) layer = InputLayer( shape=(None, 1, 128, 128), input_var=input_var, name='d_in_data') # init = Normal(0.02, 0.0) init = GlorotUniform() if cond_var: # class: from data or from generator input layer_cond = InputLayer( shape=(None, n_conds), input_var=cond_var, name='d_in_condition') layer_cond = BatchNorm(DenseLayer( layer_cond, 1024, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) if arch == 'dcgan': # DCGAN inspired layer = BatchNorm(Conv2DLayer( layer, 32, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 64, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 128, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 256, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 512, 4, stride=2, pad=1, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) elif arch == 'cont-enc': # convolution layers layer = BatchNorm(Conv2DLayer( layer, 64, 4, stride=2, pad=1, W=init, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 64, 4, stride=2, pad=1, W=init, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 128, 4, stride=2, pad=1, W=init, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 256, 4, stride=2, pad=1, W=init, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 512, 4, stride=2, pad=1, W=init, nonlinearity=lrelu), with_BatchNorm) elif arch == 'mnist': # Jan Schluechter's MNIST discriminator # convolution layers layer = BatchNorm(Conv2DLayer( layer, 128, 5, stride=2, pad='same', W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 128, 5, stride=2, pad='same', W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 128, 5, stride=2, pad='same', W=init, b=None, nonlinearity=lrelu), with_BatchNorm) # layer = BatchNorm(Conv2DLayer( # layer, 128, 5, stride=2, pad='same', W=init, b=None, # nonlinearity=lrelu), with_BatchNorm) # fully-connected layer # layer = BatchNorm(DenseLayer( # layer, 1024, W=init, b=None, nonlinearity=lrelu), with_BatchNorm) elif arch == 'lsgan': layer = batch_norm(Conv2DLayer( layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu)) layer = batch_norm(Conv2DLayer( layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu)) layer = batch_norm(Conv2DLayer( layer, 256, 5, stride=2, pad='same', nonlinearity=lrelu)) elif arch == 'crepe': # CREPE # form words from sequence of characters layer = BatchNorm(Conv2DLayer( layer, 1024, (128, 7), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = MaxPool2DLayer(layer, (1, 3)) # temporal convolution, 7-gram layer = BatchNorm(Conv2DLayer( layer, 512, (1, 7), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = MaxPool2DLayer(layer, (1, 3)) # temporal convolution, 3-gram layer = BatchNorm(Conv2DLayer( layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = BatchNorm(Conv2DLayer( layer, 256, (1, 3), W=init, b=None, nonlinearity=lrelu), with_BatchNorm) layer = flatten(layer) # fully-connected layers layer = dropout(DenseLayer( layer, 1024, W=init, b=None, nonlinearity=rectify)) layer = dropout(DenseLayer( layer, 1024, W=init, b=None, nonlinearity=rectify)) else: raise Exception("Model architecture {} is not supported".format(arch)) # output layer (linear and without bias) if cond_var is not None: layer = DenseLayer(layer, 1024, nonlinearity=lrelu, b=None) layer = concat([layer, layer_cond]) layer = DenseLayer(layer, 1, b=None, nonlinearity=None) print("Critic output:", layer.output_shape) return layer
def get_char2word(self, ic, avg=False): suf = '_avg' if avg else '' ec = L.EmbeddingLayer( ic, self.args.vc, self.args.nc, name='ec' + suf, W=HeNormal() if not avg else Constant()) # (100, 24, 32, 16) ec.params[ec.W].remove('regularizable') if self.args.char_model == 'CNN': lds = L.dimshuffle(ec, (0, 3, 1, 2)) # (100, 16, 24, 32) ls = [] for n in self.args.ngrams: lconv = L.Conv2DLayer( lds, self.args.nf, (1, n), untie_biases=True, W=HeNormal('relu') if not avg else Constant(), name='conv_%d' % n + suf) # (100, 64/4, 24, 32-n+1) lpool = L.MaxPool2DLayer( lconv, (1, self.args.max_len - n + 1)) # (100, 64, 24, 1) lpool = L.flatten(lpool, outdim=3) # (100, 16, 24) lpool = L.dimshuffle(lpool, (0, 2, 1)) # (100, 24, 16) ls.append(lpool) xc = L.concat(ls, axis=2) # (100, 24, 64) return xc elif self.args.char_model == 'LSTM': ml = L.ExpressionLayer( ic, lambda x: T.neq(x, 0)) # mask layer (100, 24, 32) ml = L.reshape(ml, (-1, self.args.max_len)) # (2400, 32) gate_params = L.recurrent.Gate(W_in=Orthogonal(), W_hid=Orthogonal()) cell_params = L.recurrent.Gate(W_in=Orthogonal(), W_hid=Orthogonal(), W_cell=None, nonlinearity=tanh) lstm_in = L.reshape( ec, (-1, self.args.max_len, self.args.nc)) # (2400, 32, 16) lstm_f = L.LSTMLayer( lstm_in, self.args.nw / 2, mask_input=ml, grad_clipping=10., learn_init=True, peepholes=False, precompute_input=True, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, # unroll_scan=True, only_return_final=True, name='forward' + suf) # (2400, 64) lstm_b = L.LSTMLayer( lstm_in, self.args.nw / 2, mask_input=ml, grad_clipping=10., learn_init=True, peepholes=False, precompute_input=True, ingate=gate_params, forgetgate=gate_params, cell=cell_params, outgate=gate_params, # unroll_scan=True, only_return_final=True, backwards=True, name='backward' + suf) # (2400, 64) remove_reg(lstm_f) remove_reg(lstm_b) if avg: set_zero(lstm_f) set_zero(lstm_b) xc = L.concat([lstm_f, lstm_b], axis=1) # (2400, 128) xc = L.reshape(xc, (-1, self.args.sw, self.args.nw)) # (100, 24, 256) return xc
def test_memory(game_title='SpaceInvaders-v0', n_parallel_games=3, replay_seq_len=2, ): """ :param game_title: name of atari game in Gym :param n_parallel_games: how many games we run in parallel :param replay_seq_len: how long is one replay session from a batch """ atari = gym.make(game_title) atari.reset() # Game Parameters n_actions = atari.action_space.n observation_shape = (None,) + atari.observation_space.shape action_names = atari.get_action_meanings() del atari # ##### Agent observations # image observation at current tick goes here observation_layer = InputLayer(observation_shape, name="images input") # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2)) # Agent memory states memory_dict = OrderedDict([]) ###Window window_size = 3 # prev state input prev_window = InputLayer((None, window_size) + tuple(observation_reshape.output_shape[1:]), name="previous window state") # our window window = WindowAugmentation(observation_reshape, prev_window, name="new window state") # pixel-wise maximum over the temporal window (to avoid flickering) window_max = ExpressionLayer(window, lambda a: a.max(axis=1), output_shape=(None,) + window.output_shape[2:]) memory_dict[window] = prev_window ###Stack #prev stack stack_w,stack_h = 4, 5 stack_inputs = DenseLayer(observation_reshape,stack_w,name="prev_stack") stack_controls = DenseLayer(observation_reshape,3, nonlinearity=lasagne.nonlinearities.softmax, name="prev_stack") prev_stack = InputLayer((None,stack_h,stack_w), name="previous stack state") stack = StackAugmentation(stack_inputs,prev_stack, stack_controls) memory_dict[stack] = prev_stack stack_top = lasagne.layers.SliceLayer(stack,0,1) ###RNN preset prev_rnn = InputLayer((None,16), name="previous RNN state") new_rnn = RNNCell(prev_rnn,observation_reshape) memory_dict[new_rnn] = prev_rnn ###GRU preset prev_gru = InputLayer((None,16), name="previous GRUcell state") new_gru = GRUCell(prev_gru,observation_reshape) memory_dict[new_gru] = prev_gru ###GRUmemorylayer prev_gru1 = InputLayer((None,15), name="previous GRUcell state") new_gru1 = GRUMemoryLayer(15,observation_reshape,prev_gru1) memory_dict[new_gru1] = prev_gru1 #LSTM with peepholes prev_lstm0_cell = InputLayer((None,13), name="previous LSTMCell hidden state [with peepholes]") prev_lstm0_out = InputLayer((None,13), name="previous LSTMCell output state [with peepholes]") new_lstm0_cell,new_lstm0_out = LSTMCell(prev_lstm0_cell,prev_lstm0_out, input_or_inputs = observation_reshape, peepholes=True,name="newLSTM1 [with peepholes]") memory_dict[new_lstm0_cell] = prev_lstm0_cell memory_dict[new_lstm0_out] = prev_lstm0_out #LSTM without peepholes prev_lstm1_cell = InputLayer((None,14), name="previous LSTMCell hidden state [no peepholes]") prev_lstm1_out = InputLayer((None,14), name="previous LSTMCell output state [no peepholes]") new_lstm1_cell,new_lstm1_out = LSTMCell(prev_lstm1_cell,prev_lstm1_out, input_or_inputs = observation_reshape, peepholes=False,name="newLSTM1 [no peepholes]") memory_dict[new_lstm1_cell] = prev_lstm1_cell memory_dict[new_lstm1_out] = prev_lstm1_out ##concat everything for i in [flatten(window_max),stack_top,new_rnn,new_gru,new_gru1]: print(i.output_shape) all_memory = concat([flatten(window_max),stack_top,new_rnn,new_gru,new_gru1,new_lstm0_out,new_lstm1_out,]) # ##### Neural network body # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc # a simple lasagne network (try replacing with any other lasagne network and see what works best) nn = DenseLayer(all_memory, num_units=50, name='dense0') # Agent policy and action picking q_eval = DenseLayer(nn, num_units=n_actions, nonlinearity=lasagne.nonlinearities.linear, name="QEvaluator") # resolver resolver = EpsilonGreedyResolver(q_eval, epsilon=0.1, name="resolver") # agent agent = Agent(observation_layer, memory_dict, q_eval, resolver) # Since it's a single lasagne network, one can get it's weights, output, etc weights = lasagne.layers.get_all_params(resolver, trainable=True) # Agent step function print('compiling react') applier_fun = agent.get_react_function() # a nice pythonic interface def step(observation, prev_memories='zeros', batch_size=n_parallel_games): """ returns actions and new states given observation and prev state Prev state in default setup should be [prev window,]""" # default to zeros if prev_memories == 'zeros': prev_memories = [np.zeros((batch_size,) + tuple(mem.output_shape[1:]), dtype='float32') for mem in agent.agent_states] res = applier_fun(np.array(observation), *prev_memories) action = res[0] memories = res[1:] return action, memories # # Create and manage a pool of atari sessions to play with pool = GamePool(game_title, n_parallel_games) observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50) print(np.array(action_names)[np.array(action_log)[:3, :5]]) # # experience replay pool # Create an environment with all default parameters env = SessionPoolEnvironment(observations=observation_layer, actions=resolver, agent_memories=agent.agent_states) def update_pool(env, pool, n_steps=100): """ a function that creates new sessions and ads them into the pool throwing the old ones away entirely for simplicity""" preceding_memory_states = list(pool.prev_memory_states) # get interaction sessions observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact(step, n_steps=n_steps) # load them into experience replay environment env.load_sessions(observation_tensor, action_tensor, reward_tensor, is_alive_tensor, preceding_memory_states) # load first sessions update_pool(env, pool, replay_seq_len) # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them. # ### Training via experience replay # get agent's Q-values obtained via experience replay _env_states, _observations, _memories, _imagined_actions, q_values_sequence = agent.get_sessions( env, session_length=replay_seq_len, batch_size=env.batch_size, optimize_experience_replay=True, ) # Evaluating loss function scaled_reward_seq = env.rewards # For SpaceInvaders, however, not scaling rewards is at least working elwise_mse_loss = qlearning.get_elementwise_objective(q_values_sequence, env.actions[0], scaled_reward_seq, env.is_alive, gamma_or_gammas=0.99, ) # compute mean over "alive" fragments mse_loss = elwise_mse_loss.sum() / env.is_alive.sum() # regularize network weights reg_l2 = regularize_network_params(resolver, l2) * 10 ** -4 loss = mse_loss + reg_l2 # Compute weight updates updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01) # mean session reward mean_session_reward = env.rewards.sum(axis=1).mean() # # Compile train and evaluation functions print('compiling') train_fun = theano.function([], [loss, mean_session_reward], updates=updates) evaluation_fun = theano.function([], [loss, mse_loss, reg_l2, mean_session_reward]) print("I've compiled!") # # Training loop for epoch_counter in range(10): update_pool(env, pool, replay_seq_len) loss, avg_reward = train_fun() full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun() print("epoch %i,loss %.5f, rewards: %.5f " % ( epoch_counter, full_loss, avg_reward_current)) print("rec %.3f reg %.3f" % (q_loss, l2_penalty))