def build_network(self, vocab_size, input_var, mask_var, W_init):

        l_in = L.InputLayer(shape=(None, None, 1), input_var=input_var)
        l_mask = L.InputLayer(shape=(None, None), input_var=mask_var)
        l_embed = L.EmbeddingLayer(l_in,
                                   input_size=vocab_size,
                                   output_size=EMBED_DIM,
                                   W=W_init)

        l_fwd_1 = L.LSTMLayer(l_embed,
                              NUM_HIDDEN,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True)
        l_bkd_1 = L.LSTMLayer(l_embed,
                              NUM_HIDDEN,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True,
                              backwards=True)

        l_all_1 = L.concat([l_fwd_1, l_bkd_1], axis=2)

        l_fwd_2 = L.LSTMLayer(l_all_1,
                              NUM_HIDDEN,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True)
        l_bkd_2 = L.LSTMLayer(l_all_1,
                              NUM_HIDDEN,
                              grad_clipping=GRAD_CLIP,
                              mask_input=l_mask,
                              gradient_steps=GRAD_STEPS,
                              precompute_input=True,
                              backwards=True)

        l_fwd_1_slice = L.SliceLayer(l_fwd_1, -1, 1)
        l_bkd_1_slice = L.SliceLayer(l_bkd_1, 0, 1)
        y_1 = L.ElemwiseSumLayer([l_fwd_1_slice, l_bkd_1_slice])

        l_fwd_2_slice = L.SliceLayer(l_fwd_2, -1, 1)
        l_bkd_2_slice = L.SliceLayer(l_bkd_2, 0, 1)
        y_2 = L.ElemwiseSumLayer([l_fwd_2_slice, l_bkd_2_slice])

        y = L.concat([y_1, y_2], axis=1)
        g = L.DenseLayer(y,
                         num_units=EMBED_DIM,
                         nonlinearity=lasagne.nonlinearities.tanh)
        l_out = L.DenseLayer(g,
                             num_units=vocab_size,
                             W=l_embed.W.T,
                             nonlinearity=lasagne.nonlinearities.softmax)

        return l_out
def build_autoencoder_network():
    input_var = T.tensor4('input_var');

    layer = layers.InputLayer(shape=(None, 3, PS, PS), input_var=input_var);
    layer = batch_norm(layers.Conv2DLayer(layer, 100,  filter_size=(5,5), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Conv2DLayer(layer, 120,  filter_size=(5,5), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Conv2DLayer(layer, 120,  filter_size=(1,1), stride=1, pad='same', nonlinearity=leaky_rectify));
    pool1 =            layers.MaxPool2DLayer(layer, (2, 2), 2);
    layer = batch_norm(layers.Conv2DLayer(pool1, 240,  filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Conv2DLayer(layer, 320,  filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Conv2DLayer(layer, 320,  filter_size=(1,1), stride=1, pad='same', nonlinearity=leaky_rectify));
    pool2 =            layers.MaxPool2DLayer(layer, (2, 2), 2);
    layer = batch_norm(layers.Conv2DLayer(pool2, 640,  filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    prely = batch_norm(layers.Conv2DLayer(layer, 1024, filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));

    featm    = batch_norm(layers.Conv2DLayer(prely, 640, filter_size=(1,1), nonlinearity=leaky_rectify));
    feat_map = batch_norm(layers.Conv2DLayer(featm, 100, filter_size=(1,1), nonlinearity=rectify, name="feat_map"));
    maskm    = batch_norm(layers.Conv2DLayer(prely, 100, filter_size=(1,1), nonlinearity=leaky_rectify));
    mask_rep = batch_norm(layers.Conv2DLayer(maskm, 1,   filter_size=(1,1), nonlinearity=None),   beta=None, gamma=None);
    mask_map = SoftThresPerc(mask_rep, perc=90.0, alpha=0.1, beta=init.Constant(0.5), tight=100.0, name="mask_map");
    layer    = ChInnerProdMerge(feat_map, mask_map, name="encoder");

    layer = batch_norm(layers.Deconv2DLayer(layer, 1024, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 640,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 320,  filter_size=(1,1), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer =            layers.InverseLayer(layer, pool2);
    layer = batch_norm(layers.Deconv2DLayer(layer, 320,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 320,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 120,  filter_size=(1,1), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer =            layers.InverseLayer(layer, pool1);
    layer = batch_norm(layers.Deconv2DLayer(layer, 120,  filter_size=(5,5), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 100,  filter_size=(5,5), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer =            layers.Deconv2DLayer(layer, 3,    filter_size=(1,1), stride=1, crop='same', nonlinearity=identity);

    glblf = batch_norm(layers.Conv2DLayer(prely, 128,  filter_size=(1,1), nonlinearity=leaky_rectify));
    glblf = layers.Pool2DLayer(glblf, pool_size=(5,5), stride=5, mode='average_inc_pad');
    glblf = batch_norm(layers.Conv2DLayer(glblf, 64,   filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Conv2DLayer(glblf, 5,    filter_size=(1,1), nonlinearity=rectify), name="global_feature");

    glblf = batch_norm(layers.Deconv2DLayer(glblf, 256, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(9,9), stride=5, crop=(2,2),  nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 64,  filter_size=(4,4), stride=2, crop=(1,1),  nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 64,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 64,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 32,  filter_size=(4,4), stride=2, crop=(1,1),  nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 32,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 32,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf =            layers.Deconv2DLayer(glblf, 3,   filter_size=(1,1), stride=1, crop='same', nonlinearity=identity);

    layer = layers.ElemwiseSumLayer([layer, glblf]);

    network = ReshapeLayer(layer, ([0], -1));
    mask_var = lasagne.layers.get_output(mask_map);
    output_var = lasagne.layers.get_output(network);

    return network, input_var, mask_var, output_var;
Exemple #3
0
 def build_network(self):
     l_char1_in = L.InputLayer(shape=(None, None, self.max_word_len),
                               input_var=self.inps[0])
     l_char2_in = L.InputLayer(shape=(None, None, self.max_word_len),
                               input_var=self.inps[1])
     l_mask1_in = L.InputLayer(shape=(None, None, self.max_word_len),
                               input_var=self.inps[2])
     l_mask2_in = L.InputLayer(shape=(None, None, self.max_word_len),
                               input_var=self.inps[3])
     l_char_in = L.ConcatLayer([l_char1_in, l_char2_in],
                               axis=1)  # B x (ND+NQ) x L
     l_char_mask = L.ConcatLayer([l_mask1_in, l_mask2_in], axis=1)
     shp = (self.inps[0].shape[0],
            self.inps[0].shape[1] + self.inps[1].shape[1],
            self.inps[1].shape[2])
     l_index_reshaped = L.ReshapeLayer(l_char_in,
                                       (shp[0] * shp[1], shp[2]))  # BN x L
     l_mask_reshaped = L.ReshapeLayer(l_char_mask,
                                      (shp[0] * shp[1], shp[2]))  # BN x L
     l_lookup = L.EmbeddingLayer(l_index_reshaped, self.num_chars,
                                 self.char_dim)  # BN x L x D
     l_fgru = L.GRULayer(l_lookup,
                         2 * self.char_dim,
                         grad_clipping=10,
                         gradient_steps=-1,
                         precompute_input=True,
                         only_return_final=True,
                         mask_input=l_mask_reshaped)
     l_bgru = L.GRULayer(l_lookup,
                         2 * self.char_dim,
                         grad_clipping=10,
                         gradient_steps=-1,
                         precompute_input=True,
                         backwards=True,
                         only_return_final=True,
                         mask_input=l_mask_reshaped)  # BN x 2D
     l_fwdembed = L.DenseLayer(l_fgru,
                               self.embed_dim / 2,
                               nonlinearity=None)  # BN x DE
     l_bckembed = L.DenseLayer(l_bgru,
                               self.embed_dim / 2,
                               nonlinearity=None)  # BN x DE
     l_embed = L.ElemwiseSumLayer([l_fwdembed, l_bckembed], coeffs=1)
     l_char_embed = L.ReshapeLayer(l_embed,
                                   (shp[0], shp[1], self.embed_dim / 2))
     l_embed1 = L.SliceLayer(l_char_embed,
                             slice(0, self.inps[0].shape[1]),
                             axis=1)
     l_embed2 = L.SliceLayer(l_char_embed,
                             slice(-self.inps[1].shape[1], None),
                             axis=1)
     return l_embed1, l_embed2
Exemple #4
0
def build_autoencoder_network():
    input_var = T.tensor4('input_var');

    layer = layers.InputLayer(shape=(None, 3, PS, PS), input_var=input_var);
    layer = batch_norm(layers.Conv2DLayer(layer, 100,  filter_size=(5,5), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Conv2DLayer(layer, 120,  filter_size=(5,5), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = layers.Pool2DLayer(layer, pool_size=(2,2), stride=2, mode='average_inc_pad');
    layer = batch_norm(layers.Conv2DLayer(layer, 240,  filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Conv2DLayer(layer, 320,  filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = layers.Pool2DLayer(layer, pool_size=(2,2), stride=2, mode='average_inc_pad');
    layer = batch_norm(layers.Conv2DLayer(layer, 640,  filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    prely = batch_norm(layers.Conv2DLayer(layer, 1024, filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));

    featm    = batch_norm(layers.Conv2DLayer(prely, 640, filter_size=(1,1), nonlinearity=leaky_rectify));
    feat_map = batch_norm(layers.Conv2DLayer(featm, 100, filter_size=(1,1), nonlinearity=rectify, name="feat_map"));
    layer    = feat_map;

    layer = batch_norm(layers.Deconv2DLayer(layer, 1024, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 640,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 640,  filter_size=(4,4), stride=2, crop=(1,1),  nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 320,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 320,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 240,  filter_size=(4,4), stride=2, crop=(1,1),  nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 120,  filter_size=(5,5), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 100,  filter_size=(5,5), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer =            layers.Deconv2DLayer(layer, 3,    filter_size=(1,1), stride=1, crop='same', nonlinearity=identity);

    glblf = batch_norm(layers.Conv2DLayer(prely, 128,  filter_size=(1,1), nonlinearity=leaky_rectify));
    glblf = layers.Pool2DLayer(glblf, pool_size=(5,5), stride=5, mode='average_inc_pad');
    glblf = batch_norm(layers.Conv2DLayer(glblf, 64,   filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Conv2DLayer(glblf, 5,    filter_size=(1,1), nonlinearity=rectify), name="global_feature");

    glblf = batch_norm(layers.Deconv2DLayer(glblf, 256, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(9,9), stride=5, crop=(2,2),  nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 64,  filter_size=(4,4), stride=2, crop=(1,1),  nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 64,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 64,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 32,  filter_size=(4,4), stride=2, crop=(1,1),  nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 32,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 32,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf =            layers.Deconv2DLayer(glblf, 3,   filter_size=(1,1), stride=1, crop='same', nonlinearity=identity);

    layer = layers.ElemwiseSumLayer([layer, glblf]);

    network = ReshapeLayer(layer, ([0], -1));
    output_var = lasagne.layers.get_output(network);

    return network, input_var, output_var;
    def get_conv_input(self, sidx, tidx, avg=False):
        suf = '_avg' if avg else ''

        feat_embs = [
            self.manager.feats[name].get_emb_layer(sidx, tidx, avg=avg)
            for name in self.args.source_feats
        ]

        # TODO: change the meaning
        if self.args.lex == 'mix':
            concat_emb = L.ElemwiseSumLayer(feat_embs)  # (100, 15, 256)
        else:
            concat_emb = L.concat(feat_embs, axis=2)  # (100, 15, 256+100)

        pos = np.array([0] * (self.args.window_size / 2) + [1] + [0] *
                       (self.args.window_size / 2)).astype(
                           theano.config.floatX)
        post = theano.shared(pos[np.newaxis, :, np.newaxis],
                             borrow=True)  # (1, 15, 1)
        posl = L.InputLayer(
            (None, self.args.window_size, 1),
            input_var=T.extra_ops.repeat(post, sidx.shape[0],
                                         axis=0))  # (100, 15, 1)
        conv_in = L.concat([concat_emb, posl], axis=2)  # (100, 15, 256+1)

        if self.args.pos_emb:
            posint = L.flatten(
                L.ExpressionLayer(posl,
                                  lambda x: T.cast(x, 'int64')))  # (100, 15)
            pos_emb = L.EmbeddingLayer(
                posint,
                self.args.window_size,
                8,
                name='epos' + suf,
                W=Normal(0.01) if not avg else Constant())  # (100, 15, 8)
            pos_emb.params[pos_emb.W].remove('regularizable')
            conv_in = L.concat([concat_emb, posl, pos_emb],
                               axis=2)  # (100, 15, 256+1+8)

        # # squeeze
        # if self.args.squeeze:
        #     conv_in = L.DenseLayer(conv_in, num_units=self.args.squeeze, name='squeeze'+suf, num_leading_axes=2,
        #                     W=HeNormal('relu')) # (100, 15, 256)

        conv_in = L.dimshuffle(conv_in, (0, 2, 1))  # (100, 256+1, 15)

        return conv_in
Exemple #6
0
def build_model(n_input,
                n_hidden,
                optimizer=adagrad,
                l2_weight=1e-4,
                l1_weight=1e-2):
    '''
	build NN model to estimating model function
	'''
    global LR

    input_A = L.InputLayer((None, n_input), name='A')
    layer_A = L.DenseLayer(input_A, n_hidden, b=None, nonlinearity=identity)

    input_B = L.InputLayer((None, n_input), name='B')
    layer_B = L.DenseLayer(input_B, n_hidden, b=None, nonlinearity=identity)

    merge_layer = L.ElemwiseSumLayer((layer_A, layer_B))

    output_layer = L.DenseLayer(merge_layer, 1, b=None,
                                nonlinearity=identity)  # output is scalar

    x1 = T.matrix('x1')
    x2 = T.matrix('x2')
    y = T.matrix('y')

    out = L.get_output(output_layer, {input_A: x1, input_B: x2})
    params = L.get_all_params(output_layer)
    loss = T.mean(squared_error(out, y))

    # add l1 penalty
    l1_penalty = regularize_layer_params([layer_A, layer_B, output_layer], l1)

    # add l2 penalty
    l2_penalty = regularize_layer_params([layer_A, layer_B, output_layer], l2)

    # get loss + penalties
    loss = loss + l1_penalty * l1_weight + l2_penalty * l2_weight

    updates_sgd = optimizer(loss, params, learning_rate=LR)
    updates = apply_momentum(updates_sgd, params, momentum=0.9)
    # updates = optimizer(loss,params,learning_rate=LR)

    f_train = theano.function([x1, x2, y], loss, updates=updates)
    f_test = theano.function([x1, x2, y], loss)
    f_out = theano.function([x1, x2], out)

    return f_train, f_test, f_out, output_layer
Exemple #7
0
    def _build_network(self, state_var, action_var):
        """Builds critic network:; inputs: (state, action), outputs: Q-val."""

        # States -> Hidden
        state_in = nn.InputLayer((None, ) + self.state_shape, state_var)
        states = nn.DenseLayer(state_in, 30, W_init, b_init, relu)
        states = nn.DenseLayer(states, 30, W_init, b_init, nonlinearity=None)

        # Actions -> Hidden
        action_in = nn.InputLayer((None, self.num_actions), action_var)
        actions = nn.DenseLayer(action_in,
                                30,
                                W_init,
                                b=None,
                                nonlinearity=None)

        # States_h + Actions_h -> Output
        net = nn.ElemwiseSumLayer([states, actions])
        net = nn.NonlinearityLayer(net, relu)
        return nn.DenseLayer(net, 1, W_out, b_out, nonlinearity=None)
Exemple #8
0
    def model(self, query_input, batch_size, query_vocab_size,
              context_vocab_size, emb_dim_size):
        l_input = L.InputLayer(shape=(batch_size, ), input_var=query_input)
        l_embed_continuous = L.EmbeddingLayer(l_input,
                                              input_size=query_vocab_size,
                                              output_size=emb_dim_size)
        l_values_discrete = L.EmbeddingLayer(l_input,
                                             input_size=query_vocab_size,
                                             output_size=emb_dim_size)
        l_probabilities_discrete = L.NonlinearityLayer(
            l_values_discrete, nonlinearity=lasagne.nonlinearities.softmax)
        l_embed_discrete = StochasticLayer(l_probabilities_discrete,
                                           estimator='MF')
        l_merge = L.ElemwiseSumLayer([l_embed_continuous, l_embed_discrete])
        l_out = L.DenseLayer(l_merge,
                             num_units=emb_dim_size,
                             nonlinearity=lasagne.nonlinearities.softmax)

        l_merge_2 = L.ElemwiseMergeLayer([l_out, l_embed_discrete],
                                         merge_function=T.mul)
        l_final_out = L.DenseLayer(l_merge_2, num_units=context_vocab_size)
        return l_values_discrete, l_final_out
Exemple #9
0
def build_segmenter_simple_absurd_res():
    sys.setrecursionlimit(1500)
    inp = ll.InputLayer(shape=(None, 1, None, None), name='input')
    n_layers = 64  # should get a 128 x 128 receptive field
    layers = [inp]
    for i in range(n_layers):
        # every 2 layers, add a skip connection
        layers.append(
            ll.Conv2DLayer(layers[-1],
                           num_filters=8,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=linear,
                           name='conv%d' % (i + 1)))
        layers.append(ll.BatchNormLayer(layers[-1], name='bn%i' % (i + 1)))
        if (i % 2 == 0) and (i != 0):
            layers.append(
                ll.ElemwiseSumLayer([
                    layers[-1],  # prev layer
                    layers[-6],
                ]  # 3 actual layers per block, skip the previous block
                                    ))
        layers.append(ll.NonlinearityLayer(layers[-1], nonlinearity=rectify))

    # our output layer is also convolutional, remember that our Y is going to be the same exact size as the
    conv_final = ll.Conv2DLayer(layers[-1],
                                num_filters=2,
                                filter_size=(3, 3),
                                pad='same',
                                W=Orthogonal(),
                                name='conv_final',
                                nonlinearity=linear)
    # we need to reshape it to be a (batch*n*m x 3), i.e. unroll s.t. the feature dimension is preserved
    softmax = Softmax4D(conv_final, name='4dsoftmax')

    return [softmax]
Exemple #10
0
def build_autoencoder_network():
    input_var = T.tensor4('input_var')

    layer = layers.InputLayer(shape=(None, 3, PS, PS), input_var=input_var)
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           80,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           80,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           80,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           80,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           100,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           100,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           100,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    prely = batch_norm(
        layers.Conv2DLayer(layer,
                           100,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))

    featm = batch_norm(
        layers.Conv2DLayer(prely,
                           180,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    feat_map = batch_norm(
        layers.Conv2DLayer(featm,
                           120,
                           filter_size=(1, 1),
                           nonlinearity=rectify,
                           name="feat_map"))
    maskm = batch_norm(
        layers.Conv2DLayer(prely,
                           120,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    mask_rep = batch_norm(layers.Conv2DLayer(maskm,
                                             1,
                                             filter_size=(1, 1),
                                             nonlinearity=None),
                          beta=None,
                          gamma=None)
    mask_map = SoftThresPerc(mask_rep,
                             perc=99.9,
                             alpha=0.5,
                             beta=init.Constant(0.5),
                             tight=100.0,
                             name="mask_map")
    layer = ChInnerProdMerge(feat_map, mask_map, name="encoder")

    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             100,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             100,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             100,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             100,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             80,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             80,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             80,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             80,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = layers.Deconv2DLayer(layer,
                                 3,
                                 filter_size=(1, 1),
                                 stride=1,
                                 crop='same',
                                 nonlinearity=identity)

    glblf = batch_norm(
        layers.Conv2DLayer(prely,
                           100,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    glblf = layers.Pool2DLayer(glblf,
                               pool_size=(5, 5),
                               stride=5,
                               mode='average_inc_pad')
    glblf = batch_norm(
        layers.Conv2DLayer(glblf,
                           64,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    glblf = batch_norm(layers.Conv2DLayer(glblf,
                                          3,
                                          filter_size=(1, 1),
                                          nonlinearity=rectify),
                       name="global_feature")

    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(9, 9),
                             stride=5,
                             crop=(2, 2),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             48,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             48,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             48,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = layers.Deconv2DLayer(glblf,
                                 3,
                                 filter_size=(1, 1),
                                 stride=1,
                                 crop='same',
                                 nonlinearity=identity)

    layer = layers.ElemwiseSumLayer([layer, glblf])

    network = ReshapeLayer(layer, ([0], -1))
    layers.set_all_param_values(network,
                                pickle.load(open(filename_model_ae, 'rb')))
    feat_var = lasagne.layers.get_output(feat_map, deterministic=True)
    mask_var = lasagne.layers.get_output(mask_map, deterministic=True)
    outp_var = lasagne.layers.get_output(network, deterministic=True)

    return network, input_var, feat_var, mask_var, outp_var
def build_autoencoder_network():
    input_var = T.tensor4('input_var')

    layer = layers.InputLayer(shape=(None, 3, PS, PS), input_var=input_var)
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           100,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           120,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = layers.Pool2DLayer(layer,
                               pool_size=(2, 2),
                               stride=2,
                               mode='average_inc_pad')
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           240,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           320,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = layers.Pool2DLayer(layer,
                               pool_size=(2, 2),
                               stride=2,
                               mode='average_inc_pad')
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           640,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    prely = batch_norm(
        layers.Conv2DLayer(layer,
                           1024,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))

    featm = batch_norm(
        layers.Conv2DLayer(prely,
                           640,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    feat_map = batch_norm(
        layers.Conv2DLayer(featm,
                           100,
                           filter_size=(1, 1),
                           nonlinearity=rectify,
                           name="feat_map"))
    mask_map = SoftThresPerc(feat_map,
                             perc=98.4,
                             alpha=0.1,
                             beta=init.Constant(0.5),
                             tight=20.0,
                             name="mask_map")
    layer = mask_map

    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             1024,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             640,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             640,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             320,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             320,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             240,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             120,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             100,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = layers.Deconv2DLayer(layer,
                                 3,
                                 filter_size=(1, 1),
                                 stride=1,
                                 crop='same',
                                 nonlinearity=identity)

    glblf = batch_norm(
        layers.Conv2DLayer(prely,
                           128,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    glblf = layers.Pool2DLayer(glblf,
                               pool_size=(5, 5),
                               stride=5,
                               mode='average_inc_pad')
    glblf = batch_norm(
        layers.Conv2DLayer(glblf,
                           64,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    glblf = batch_norm(layers.Conv2DLayer(glblf,
                                          5,
                                          filter_size=(1, 1),
                                          nonlinearity=rectify),
                       name="global_feature")

    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             256,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(9, 9),
                             stride=5,
                             crop=(2, 2),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = layers.Deconv2DLayer(glblf,
                                 3,
                                 filter_size=(1, 1),
                                 stride=1,
                                 crop='same',
                                 nonlinearity=identity)

    layer = layers.ElemwiseSumLayer([layer, glblf])

    network = ReshapeLayer(layer, ([0], -1))
    mask_var = lasagne.layers.get_output(mask_map)
    output_var = lasagne.layers.get_output(network)

    return network, input_var, mask_var, output_var
Exemple #12
0
def build_autoencoder_network():
    input_var = T.tensor4('input_var')

    layer = layers.InputLayer(shape=(None, 3, PS, PS), input_var=input_var)
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           80,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           80,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           80,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           80,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           100,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           100,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           100,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    prely = batch_norm(
        layers.Conv2DLayer(layer,
                           100,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))

    featm = batch_norm(
        layers.Conv2DLayer(prely,
                           180,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    feat_map = batch_norm(
        layers.Conv2DLayer(featm,
                           120,
                           filter_size=(1, 1),
                           nonlinearity=rectify,
                           name="feat_map"))
    maskm = batch_norm(
        layers.Conv2DLayer(prely,
                           100,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    mask_rep = batch_norm(layers.Conv2DLayer(maskm,
                                             1,
                                             filter_size=(1, 1),
                                             nonlinearity=None),
                          beta=None,
                          gamma=None)
    mask_map = SoftThresPerc(mask_rep,
                             perc=90.0,
                             alpha=0.5,
                             beta=init.Constant(0.1),
                             tight=100.0,
                             name="mask_map")
    layer = ChInnerProdMerge(feat_map, mask_map, name="encoder")

    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             100,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             100,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             100,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             100,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             80,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             80,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             80,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             80,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = layers.Deconv2DLayer(layer,
                                 3,
                                 filter_size=(1, 1),
                                 stride=1,
                                 crop='same',
                                 nonlinearity=identity)

    glblf = batch_norm(
        layers.Conv2DLayer(prely,
                           100,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    glblf = layers.Pool2DLayer(glblf,
                               pool_size=(20, 20),
                               stride=20,
                               mode='average_inc_pad')
    glblf = batch_norm(
        layers.Conv2DLayer(glblf,
                           64,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    glblf = batch_norm(layers.Conv2DLayer(glblf,
                                          3,
                                          filter_size=(1, 1),
                                          nonlinearity=rectify),
                       name="global_feature")

    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = layers.Upscale2DLayer(glblf, scale_factor=20)
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             48,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             48,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             48,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = layers.Deconv2DLayer(glblf,
                                 3,
                                 filter_size=(1, 1),
                                 stride=1,
                                 crop='same',
                                 nonlinearity=identity)

    layer = layers.ElemwiseSumLayer([layer, glblf])

    network = ReshapeLayer(layer, ([0], -1))
    mask_var = lasagne.layers.get_output(mask_map)
    output_var = lasagne.layers.get_output(network)

    return network, input_var, mask_var, output_var
Exemple #13
0
def build_model(vocab_size,
                doc_var,
                qry_var,
                doc_mask_var,
                qry_mask_var,
                W_init=lasagne.init.Normal()):

    l_doc_in = L.InputLayer(shape=(None, None, 1), input_var=doc_var)
    l_qry_in = L.InputLayer(shape=(None, None, 1), input_var=qry_var)

    l_doc_embed = L.EmbeddingLayer(l_doc_in, vocab_size, EMBED_DIM, W=W_init)
    l_qry_embed = L.EmbeddingLayer(l_qry_in,
                                   vocab_size,
                                   EMBED_DIM,
                                   W=l_doc_embed.W)

    l_doc_mask = L.InputLayer(shape=(None, None), input_var=doc_mask_var)
    l_qry_mask = L.InputLayer(shape=(None, None), input_var=qry_mask_var)

    l_doc_fwd = L.LSTMLayer(l_doc_embed,
                            NUM_HIDDEN,
                            grad_clipping=GRAD_CLIP,
                            mask_input=l_doc_mask,
                            gradient_steps=GRAD_STEPS,
                            precompute_input=True)
    l_doc_bkd = L.LSTMLayer(l_doc_embed,
                            NUM_HIDDEN,
                            grad_clipping=GRAD_CLIP,
                            mask_input=l_doc_mask,
                            gradient_steps=GRAD_STEPS,
                            precompute_input=True,
                            backwards=True)
    l_qry_fwd = L.LSTMLayer(l_qry_embed,
                            NUM_HIDDEN,
                            grad_clipping=GRAD_CLIP,
                            mask_input=l_qry_mask,
                            gradient_steps=GRAD_STEPS,
                            precompute_input=True)
    l_qry_bkd = L.LSTMLayer(l_qry_embed,
                            NUM_HIDDEN,
                            grad_clipping=GRAD_CLIP,
                            mask_input=l_qry_mask,
                            gradient_steps=GRAD_STEPS,
                            precompute_input=True,
                            backwards=True)

    l_doc_fwd_slice = L.SliceLayer(l_doc_fwd, -1, 1)
    l_doc_bkd_slice = L.SliceLayer(l_doc_bkd, 0, 1)
    l_qry_fwd_slice = L.SliceLayer(l_qry_fwd, -1, 1)
    l_qry_bkd_slice = L.SliceLayer(l_qry_bkd, 0, 1)

    r = L.DenseLayer(L.ElemwiseSumLayer([l_doc_fwd_slice, l_doc_bkd_slice]),
                     num_units=NUM_HIDDEN,
                     nonlinearity=lasagne.nonlinearities.tanh)
    u = L.DenseLayer(L.ElemwiseSumLayer([l_qry_fwd_slice, l_qry_bkd_slice]),
                     num_units=NUM_HIDDEN,
                     nonlinearity=lasagne.nonlinearities.tanh)

    g = L.DenseLayer(L.concat([r, u], axis=1),
                     num_units=EMBED_DIM,
                     W=lasagne.init.GlorotNormal(),
                     nonlinearity=lasagne.nonlinearities.tanh)

    l_out = L.DenseLayer(g,
                         num_units=vocab_size,
                         W=l_doc_embed.W.T,
                         nonlinearity=lasagne.nonlinearities.softmax,
                         b=None)

    return l_out
Exemple #14
0
def build_network_from_ae(classn):
    input_var = T.tensor4('input_var');

    layer = layers.InputLayer(shape=(None, 3, PS, PS), input_var=input_var);
    layer = batch_norm(layers.Conv2DLayer(layer, 100,  filter_size=(5,5), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Conv2DLayer(layer, 120,  filter_size=(5,5), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = layers.Pool2DLayer(layer, pool_size=(2,2), stride=2, mode='average_inc_pad');
    layer = batch_norm(layers.Conv2DLayer(layer, 240,  filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Conv2DLayer(layer, 320,  filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = layers.Pool2DLayer(layer, pool_size=(2,2), stride=2, mode='average_inc_pad');
    layer = batch_norm(layers.Conv2DLayer(layer, 640,  filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    prely = batch_norm(layers.Conv2DLayer(layer, 1024, filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));

    featm    = batch_norm(layers.Conv2DLayer(prely, 640, filter_size=(1,1), nonlinearity=leaky_rectify));
    feat_map = batch_norm(layers.Conv2DLayer(featm, 100, filter_size=(1,1), nonlinearity=rectify, name="feat_map"));
    maskm    = batch_norm(layers.Conv2DLayer(prely, 100, filter_size=(1,1), nonlinearity=leaky_rectify));
    mask_rep = batch_norm(layers.Conv2DLayer(maskm, 1,   filter_size=(1,1), nonlinearity=None),   beta=None, gamma=None);
    mask_map = SoftThresPerc(mask_rep, perc=0.0, alpha=0.1, beta=init.Constant(0.5), tight=100.0, bias=-10, name="mask_map");
    enlyr    = ChInnerProdMerge(feat_map, mask_map, name="encoder");

    layer = batch_norm(layers.Deconv2DLayer(enlyr, 1024, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 640,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 640,  filter_size=(4,4), stride=2, crop=(1,1),  nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 320,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 320,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 240,  filter_size=(4,4), stride=2, crop=(1,1),  nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 120,  filter_size=(5,5), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 100,  filter_size=(5,5), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer =            layers.Deconv2DLayer(layer, 3,    filter_size=(1,1), stride=1, crop='same', nonlinearity=identity);

    glblf = batch_norm(layers.Conv2DLayer(prely, 128,  filter_size=(1,1), nonlinearity=leaky_rectify));
    glblf = layers.Pool2DLayer(glblf, pool_size=(5,5), stride=5, mode='average_inc_pad');
    glblf = batch_norm(layers.Conv2DLayer(glblf, 64,   filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    gllyr = batch_norm(layers.Conv2DLayer(glblf, 5,    filter_size=(1,1), nonlinearity=rectify), name="global_feature");

    glblf = batch_norm(layers.Deconv2DLayer(gllyr, 256, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(9,9), stride=5, crop=(2,2),  nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 64,  filter_size=(4,4), stride=2, crop=(1,1),  nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 64,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 64,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 32,  filter_size=(4,4), stride=2, crop=(1,1),  nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 32,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 32,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf =            layers.Deconv2DLayer(glblf, 3,   filter_size=(1,1), stride=1, crop='same', nonlinearity=identity);

    layer = layers.ElemwiseSumLayer([layer, glblf]);

    network = ReshapeLayer(layer, ([0], -1));
    layers.set_all_param_values(network, pickle.load(open(filename_model_ae, 'rb')));
    mask_map.beta.set_value(np.float32(-10.0*mask_map.beta.get_value()));

    # Adding more layers
    aug_var = T.matrix('aug_var');
    target_var = T.imatrix('targets');
    add_a = layers.Conv2DLayer(enlyr, 320, filter_size=(1,1), nonlinearity=leaky_rectify);
    add_b = layers.Conv2DLayer(add_a, 320, filter_size=(1,1), nonlinearity=leaky_rectify);
    add_c = layers.Conv2DLayer(add_b, 320, filter_size=(1,1), nonlinearity=leaky_rectify);
    add_d = layers.Conv2DLayer(add_c, 320, filter_size=(1,1), nonlinearity=leaky_rectify);
    add_0 = layers.Pool2DLayer(add_d, pool_size=(15,15), stride=15, mode='average_inc_pad');
    add_1 = layers.DenseLayer(add_0, 100, nonlinearity=leaky_rectify);

    add_2 = layers.DenseLayer(gllyr, 320, nonlinearity=leaky_rectify);
    add_3 = layers.DenseLayer(add_2, 320, nonlinearity=leaky_rectify);
    add_4 = layers.DenseLayer(add_3, 100, nonlinearity=leaky_rectify);

    aug_layer = layers.InputLayer(shape=(None, aug_fea_n), input_var=aug_var);

    cat_layer = lasagne.layers.ConcatLayer([add_1, add_4, aug_layer], axis=1);

    hidden_layer = layers.DenseLayer(cat_layer, 80, nonlinearity=leaky_rectify);
    network = layers.DenseLayer(hidden_layer, classn, nonlinearity=sigmoid);

    new_params = [add_a.W, add_a.b, add_b.W, add_b.b, add_c.W, add_c.b, add_d.W, add_d.b, add_1.W, add_1.b, add_2.W, add_2.b, add_3.W, add_3.b, add_4.W, add_4.b, hidden_layer.W, hidden_layer.b, network.W, network.b];

    return network, new_params, input_var, aug_var, target_var;
def build_network_from_ae(classn):
    input_var = T.tensor4('input_var');

    layer = layers.InputLayer(shape=(None, 3, PS, PS), input_var=input_var);
    layer = batch_norm(layers.Conv2DLayer(layer, 100,  filter_size=(5,5), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Conv2DLayer(layer, 120,  filter_size=(5,5), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = layers.Pool2DLayer(layer, pool_size=(2,2), stride=2, mode='average_inc_pad');
    layer = batch_norm(layers.Conv2DLayer(layer, 240,  filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Conv2DLayer(layer, 320,  filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    layer = layers.Pool2DLayer(layer, pool_size=(2,2), stride=2, mode='average_inc_pad');
    layer = batch_norm(layers.Conv2DLayer(layer, 640,  filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    prely = batch_norm(layers.Conv2DLayer(layer, 1024, filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));

    featm    = batch_norm(layers.Conv2DLayer(prely, 640, filter_size=(1,1), nonlinearity=leaky_rectify));
    feat_map = batch_norm(layers.Conv2DLayer(featm, 100, filter_size=(1,1), nonlinearity=rectify, name="feat_map"));
    maskm    = batch_norm(layers.Conv2DLayer(prely, 100, filter_size=(1,1), nonlinearity=leaky_rectify));
    mask_rep = batch_norm(layers.Conv2DLayer(maskm, 1,   filter_size=(1,1), nonlinearity=None),   beta=None, gamma=None);
    mask_map = SoftThresPerc(mask_rep, perc=98.4, alpha=0.1, beta=init.Constant(0.5), tight=100.0, name="mask_map");
    enlyr    = ChInnerProdMerge(feat_map, mask_map, name="encoder");

    layer = batch_norm(layers.Deconv2DLayer(enlyr, 1024, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 640,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 640,  filter_size=(4,4), stride=2, crop=(1,1),  nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 320,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 320,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 240,  filter_size=(4,4), stride=2, crop=(1,1),  nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 120,  filter_size=(5,5), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer = batch_norm(layers.Deconv2DLayer(layer, 100,  filter_size=(5,5), stride=1, crop='same', nonlinearity=leaky_rectify));
    layer =            layers.Deconv2DLayer(layer, 3,    filter_size=(1,1), stride=1, crop='same', nonlinearity=identity);

    glblf = batch_norm(layers.Conv2DLayer(prely, 128,  filter_size=(1,1), nonlinearity=leaky_rectify));
    glblf = layers.Pool2DLayer(glblf, pool_size=(5,5), stride=5, mode='average_inc_pad');
    glblf = batch_norm(layers.Conv2DLayer(glblf, 64,   filter_size=(3,3), stride=1, pad='same', nonlinearity=leaky_rectify));
    gllyr = batch_norm(layers.Conv2DLayer(glblf, 5,    filter_size=(1,1), nonlinearity=rectify), name="global_feature");

    glblf = batch_norm(layers.Deconv2DLayer(gllyr, 256, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(9,9), stride=5, crop=(2,2),  nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 128, filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 64,  filter_size=(4,4), stride=2, crop=(1,1),  nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 64,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 64,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 32,  filter_size=(4,4), stride=2, crop=(1,1),  nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 32,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf = batch_norm(layers.Deconv2DLayer(glblf, 32,  filter_size=(3,3), stride=1, crop='same', nonlinearity=leaky_rectify));
    glblf =            layers.Deconv2DLayer(glblf, 3,   filter_size=(1,1), stride=1, crop='same', nonlinearity=identity);

    layer = layers.ElemwiseSumLayer([layer, glblf]);

    network = ReshapeLayer(layer, ([0], -1));
    layers.set_all_param_values(network, pickle.load(open(filename_model_ae, 'rb')));
    old_params = layers.get_all_params(network, trainable=True);

    # Adding more layers
    aug_var = T.matrix('aug_var');
    target_var = T.imatrix('targets');
    add_a = batch_norm(layers.Conv2DLayer(enlyr, 320, filter_size=(1,1), nonlinearity=leaky_rectify));
    add_b = batch_norm(layers.Conv2DLayer(add_a, 320, filter_size=(1,1), nonlinearity=leaky_rectify));
    add_c = batch_norm(layers.Conv2DLayer(add_b, 320, filter_size=(1,1), nonlinearity=leaky_rectify));
    add_d = batch_norm(layers.Conv2DLayer(add_c, 320, filter_size=(1,1), nonlinearity=leaky_rectify));
    add_0 = layers.Pool2DLayer(add_d, pool_size=(25,25), stride=25, mode='average_inc_pad');
    add_1 = batch_norm(layers.DenseLayer(add_0, 100, nonlinearity=leaky_rectify));

    add_2 = batch_norm(layers.DenseLayer(gllyr, 320, nonlinearity=leaky_rectify));
    add_3 = batch_norm(layers.DenseLayer(add_2, 320, nonlinearity=leaky_rectify));
    add_4 = batch_norm(layers.DenseLayer(add_3, 100, nonlinearity=leaky_rectify));

    aug_layer = layers.InputLayer(shape=(None, aug_fea_n), input_var=aug_var);

    cat_layer = lasagne.layers.ConcatLayer([add_1, add_4, aug_layer], axis=1);

    hidden_layer = layers.DenseLayer(cat_layer, 80, nonlinearity=leaky_rectify);
    network = layers.DenseLayer(hidden_layer, classn, nonlinearity=sigmoid);

    layers.set_all_param_values(network, pickle.load(open('model_vals/deep_conv_classification_alt48_luad10_skcm10_lr0.py_e32_cv0.pkl', 'rb')));

    all_params = layers.get_all_params(network, trainable=True);
    new_params = [x for x in all_params if x not in old_params];

    return network, new_params, input_var, aug_var, target_var;
Exemple #16
0
def build_1Dregression_v1(input_var=None,
                          input_width=None,
                          nin_units=12,
                          h_num_units=[64, 64],
                          h_grad_clip=1.0,
                          output_width=1):
    """
    A stacked bidirectional RNN network for regression, alternating
    with dense layers and merging of the two directions, followed by
    a feature mean pooling in the time direction, with a linear
    dim-reduction layer at the start
    
    Args:
        input_var (theano 3-tensor): minibatch of input sequence vectors
        input_width (int): length of input sequences
        nin_units (list): number of NIN features
        h_num_units (int list): no. of units in hidden layer in each stack
                                from bottom to top
        h_grad_clip (float): gradient clipping maximum value 
        output_width (int): size of output layer (e.g. =1 for 1D regression)
    Returns:
        output layer (Lasagne layer object)
    """

    # Non-linearity hyperparameter
    nonlin = lasagne.nonlinearities.LeakyRectify(leakiness=0.15)

    # Input layer
    l_in = LL.InputLayer(shape=(None, 22, input_width), input_var=input_var)
    batchsize = l_in.input_var.shape[0]

    # NIN-layer
    l_in = LL.NINLayer(l_in,
                       num_units=nin_units,
                       nonlinearity=lasagne.nonlinearities.linear)

    l_in_1 = LL.DimshuffleLayer(l_in, (0, 2, 1))

    # RNN layers
    for h in h_num_units:
        # Forward layers
        l_forward_0 = LL.RecurrentLayer(l_in_1,
                                        nonlinearity=nonlin,
                                        num_units=h,
                                        backwards=False,
                                        learn_init=True,
                                        grad_clipping=h_grad_clip,
                                        unroll_scan=True,
                                        precompute_input=True)

        l_forward_0a = LL.ReshapeLayer(l_forward_0, (-1, h))
        l_forward_0b = LL.DenseLayer(l_forward_0a,
                                     num_units=h,
                                     nonlinearity=nonlin)
        l_forward_0c = LL.ReshapeLayer(l_forward_0b,
                                       (batchsize, input_width, h))

        # Backward layers
        l_backward_0 = LL.RecurrentLayer(l_in_1,
                                         nonlinearity=nonlin,
                                         num_units=h,
                                         backwards=True,
                                         learn_init=True,
                                         grad_clipping=h_grad_clip,
                                         unroll_scan=True,
                                         precompute_input=True)

        l_backward_0a = LL.ReshapeLayer(l_backward_0, (-1, h))
        l_backward_0b = LL.DenseLayer(l_backward_0a,
                                      num_units=h,
                                      nonlinearity=nonlin)
        l_backward_0c = LL.ReshapeLayer(l_backward_0b,
                                        (batchsize, input_width, h))

        l_in_1 = LL.ElemwiseSumLayer([l_forward_0c, l_backward_0c])

    # Output layers
    network_0a = LL.ReshapeLayer(l_in_1, (-1, h_num_units[-1]))
    network_0b = LL.DenseLayer(network_0a,
                               num_units=output_width,
                               nonlinearity=nonlin)
    network_0c = LL.ReshapeLayer(network_0b,
                                 (batchsize, input_width, output_width))

    output_net_1 = LL.FlattenLayer(network_0c, outdim=2)
    output_net_2 = LL.FeaturePoolLayer(output_net_1,
                                       pool_size=input_width,
                                       pool_function=T.mean)

    return output_net_2
Exemple #17
0
def run_experiment(args):
    import os
    # set environment variables for theano
    os.environ['THEANO_FLAGS'] = "lib.cnmem=" + str(args.mem) + ",device=gpu" + str(args.gpu)

    import threading
    import Queue
    import inspect
    import shutil
    import time
    import logging
    import six
    import collections
    import itertools
    import random
    import numpy as np
    import scipy
    import theano
    import theano.tensor as T
    import lasagne
    import lasagne.layers as ll
    import lasagne.nonlinearities as ln
    import parmesan

    import layers
    import utils
    import cfdataset

#----------------------------------------------------------------
# Arguments and Settings
    floatX = theano.config.floatX
    logger = logging.getLogger()
    np.random.seed(args.seed)

    # copy file for reproducibility
    dirname = utils.setup_logging(args.message, args.loglv)
    script_src = os.path.abspath(inspect.getfile(inspect.currentframe()))
    script_dst = os.path.join(dirname, os.path.split(script_src)[1])
    shutil.copyfile(script_src, script_dst)

    # print arguments
    args_dict = collections.OrderedDict(sorted(vars(args).items()))
    for k, v in six.iteritems(args_dict):
        logger.info("  %20s: %s" % (k, v))

    # get arguments
    D_u, D_v = args.D_u, args.D_v
    lr = args.lr
    weight_decay = args.weight_decay
    lookahead = args.lookahead
    max_epoch = args.max_epoch
    batch_size_u, batch_size_v = args.batch_size_u, args.batch_size_v
    nonlin_enc = layers.get_nonlin(args.nonlin_enc)
    nonlin_dec = layers.get_nonlin(args.nonlin_dec)
    negative_ratio = args.negative_ratio

#----------------------------------------------------------------
# Dataset
    dataset = cfdataset.CF_implicit_data(name=args.dataset)

    N_u, N_v = dataset.N_users, dataset.N_items
    T_matrix = dataset.T_matrix.astype(floatX)
    R_matrix = dataset.R_matrix.astype(floatX)
    R_negative_matrix = 1 - R_matrix
    assert np.all(R_matrix == (T_matrix > 0.5))
    assert np.all((R_negative_matrix == 1) == (T_matrix == 0))

    R_test = dataset.R_latest
    T_matrix[np.arange(N_u), R_test] = 0
    R_matrix[np.arange(N_u), R_test] = 0
    assert np.all(R_matrix == (T_matrix > 0.5))

    R_matrix_for_test = R_matrix.copy()

    R_valid = dataset.R_2nd_latest
    T_matrix[np.arange(N_u), R_valid] = 0
    R_matrix[np.arange(N_u), R_valid] = 0
    assert np.all(R_matrix == (T_matrix > 0.5))

    N_interaction = dataset.N_interaction - N_u * 2

    assert np.all(R_valid != R_test)
    assert np.all(R_matrix_for_test[np.arange(N_u), R_valid] == 1)
    assert np.all(R_matrix_for_test[np.arange(N_u), R_test] == 0)
    assert np.all(R_matrix[np.arange(N_u), R_valid] == 0)
    assert np.all(R_matrix[np.arange(N_u), R_test] == 0)
    assert np.all(T_matrix[np.arange(N_u), R_valid] == 0)
    assert np.all(T_matrix[np.arange(N_u), R_test] == 0)
    assert N_interaction == np.count_nonzero(R_matrix)
    assert N_interaction + N_u == np.count_nonzero(R_matrix_for_test)

    logger.info("%d users, %d items, %d training interactions (%d total, 2 * %d held out for validation and test)." % (N_u, N_v, N_interaction, dataset.N_interaction, N_u))

#----------------------------------------------------------------
# numpy variables
    # encoded vectors
    np_enc_u_h = np.zeros((N_u, D_u), dtype=floatX)
    np_enc_v_h = np.zeros((N_v, D_v), dtype=floatX)

#----------------------------------------------------------------
# Symbolic variables
    sym_lr = T.fscalar('lr')

    sym_Ru_pos = T.fmatrix('Ru_pos')
    sym_dr_Ru_pos = T.fscalar('dr_Ru_pos')
    sym_uid_origin_pos = T.ivector('uid_origin_pos')
    sym_uid_minibatch_pos = T.ivector('uid_minibatch_pos')

    sym_Ru_neg = T.fmatrix('Ru_neg')
    sym_dr_Ru_neg = T.fscalar('dr_Ru_neg')
    sym_uid_origin_neg = T.ivector('uid_origin_neg')
    sym_uid_minibatch_neg = T.ivector('uid_minibatch_neg')

    sym_Rv = T.fmatrix('Rv')
    sym_dr_Rv = T.fscalar('dr_Rv')
    sym_vid_origin_pos = T.ivector('vid_origin_pos')
    sym_vid_minibatch_pos = T.ivector('vid_minibatch_pos')
    sym_vid_origin_neg = T.ivector('vid_origin_neg')
    sym_vid_minibatch_neg = T.ivector('vid_minibatch_neg')

    sym_R_minibatch = T.fvector('R_minibatch')

#----------------------------------------------------------------
# Model setup (training model)
    logger.info("Setting up model ...")

    # Input layers
    l_in_Ru_pos = ll.InputLayer((None, N_v), input_var=sym_Ru_pos, name='l_in_Ru_pos')
    l_in_uid_origin_pos = ll.InputLayer((None,), input_var=sym_uid_origin_pos, name='l_in_uid_origin_pos')
    l_in_uid_minibatch_pos = ll.InputLayer((None,), input_var=sym_uid_minibatch_pos, name='l_in_uid_minibatch_pos')

    l_in_Ru_neg = ll.InputLayer((None, N_v), input_var=sym_Ru_neg, name='l_in_Ru_neg')
    l_in_uid_origin_neg = ll.InputLayer((None,), input_var=sym_uid_origin_neg, name='l_in_uid_origin_neg')
    l_in_uid_minibatch_neg = ll.InputLayer((None,), input_var=sym_uid_minibatch_neg, name='l_in_uid_minibatch_neg')

    l_in_Rv = ll.InputLayer((None, N_u), input_var=sym_Rv, name='l_in_Rv')
    l_in_vid_origin_pos = ll.InputLayer((None,), input_var=sym_vid_origin_pos, name='l_in_vid_origin_pos')
    l_in_vid_minibatch_pos = ll.InputLayer((None,), input_var=sym_vid_minibatch_pos, name='l_in_vid_minibatch_pos')
    l_in_vid_origin_neg = ll.InputLayer((None,), input_var=sym_vid_origin_neg, name='l_in_vid_origin_neg')
    l_in_vid_minibatch_neg = ll.InputLayer((None,), input_var=sym_vid_minibatch_neg, name='l_in_vid_minibatch_neg')

    # Dropout layers
    l_in_Ru_pos = ll.DropoutLayer(l_in_Ru_pos, p=sym_dr_Ru_pos, rescale=False, name='Dropout-l_in_Ru_pos')
    l_in_Ru_neg = ll.DropoutLayer(l_in_Ru_neg, p=sym_dr_Ru_neg, rescale=False, name='Dropout-l_in_Ru_neg')
    l_in_Rv = ll.DropoutLayer(l_in_Rv, p=sym_dr_Rv, rescale=False, name='Dropout-l_in_Rv')

    # User encoder model h(Ru)
    l_enc_u_h_pos = ll.DenseLayer(l_in_Ru_pos, num_units=D_u, nonlinearity=nonlin_enc, name='l_enc_u_h_pos')
    l_enc_u_h_neg = ll.DenseLayer(l_in_Ru_neg, num_units=D_u, nonlinearity=nonlin_enc, W=l_enc_u_h_pos.W, b=l_enc_u_h_pos.b, name='l_enc_u_h_neg')

    # Item encoder model h(Rv)
    l_enc_v_h = ll.DenseLayer(l_in_Rv, num_units=D_v, nonlinearity=nonlin_enc, name='l_enc_v_h')

    # User decoder model s(h(Ru))
    l_dec_u_s_pos = layers.SimpleDecodeLayer([l_enc_u_h_pos, l_in_vid_origin_pos, l_in_uid_minibatch_pos], num_units=N_v, nonlinearity=None, name='l_dec_u_s_pos')
    l_dec_u_s_neg = layers.SimpleDecodeLayer([l_enc_u_h_neg, l_in_vid_origin_neg, l_in_uid_minibatch_neg], num_units=N_v, V=l_dec_u_s_pos.V, Q=l_dec_u_s_pos.Q, b=l_dec_u_s_pos.b, nonlinearity=None, name='l_dec_u_s_neg')
    l_dec_u_s_all = ll.ConcatLayer([l_dec_u_s_pos ,l_dec_u_s_neg], axis=0)

    # Item decoder model s(h(Rv))
    l_dec_v_s_pos = layers.SimpleDecodeLayer([l_enc_v_h, l_in_uid_origin_pos, l_in_vid_minibatch_pos], num_units=N_u, nonlinearity=None, name='l_dec_v_s_pos')
    l_dec_v_s_neg = layers.SimpleDecodeLayer([l_enc_v_h, l_in_uid_origin_neg, l_in_vid_minibatch_neg], num_units=N_u, V=l_dec_v_s_pos.V, Q=l_dec_v_s_pos.Q, b=l_dec_v_s_pos.b, nonlinearity=None, name='l_dec_v_s_neg')
    l_dec_v_s_all = ll.ConcatLayer([l_dec_v_s_pos ,l_dec_v_s_neg], axis=0)

    # Likelihood model p(R)
    l_uv_s_train = ll.ElemwiseSumLayer([l_dec_u_s_all, l_dec_v_s_all], name='l_uv_s_train')
    l_r_train = ll.NonlinearityLayer(l_uv_s_train, nonlinearity=ln.sigmoid, name='l_r_train')
    l_uv_s_test = ll.ElemwiseSumLayer([l_dec_u_s_pos, l_dec_v_s_pos], name='l_uv_s_test')
    l_r_test = ll.NonlinearityLayer(l_uv_s_test, nonlinearity=ln.sigmoid, name='l_r_test')

#----------------------------------------------------------------
# Likelihood and RMSE
    # training
    p_r_train, = ll.get_output([l_r_train], deterministic=False)

    log_p_r = T.mean(parmesan.distributions.log_bernoulli(sym_R_minibatch, p_r_train, eps=1e-6))
    regularization = lasagne.regularization.regularize_network_params([l_r_train], lasagne.regularization.l2)
    cost_function = - log_p_r + weight_decay * regularization

    SE_train = T.sum(T.sqr(sym_R_minibatch - p_r_train))

    # test
    sym_enc_u_h = T.fmatrix('enc_u_h')
    sym_enc_v_h = T.fmatrix('enc_v_h')
    enc_u_h_out, enc_v_h_out = ll.get_output([l_enc_u_h_pos, l_enc_v_h], deterministic=True)
    p_r_test, = ll.get_output([l_r_test], inputs={l_enc_u_h_pos:sym_enc_u_h, l_enc_v_h:sym_enc_v_h}, deterministic=True)
    test_scores = p_r_test.reshape((-1, 101))
    ranking = test_scores.argsort()[:,::-1].argmin(axis=1)

#----------------------------------------------------------------
# Gradients
    clip_grad = 1
    max_norm = 5

    params = ll.get_all_params([l_r_train,], trainable=True)
    for p in params:
        logger.debug("%s: %s" % (p, p.get_value().shape))

    grads = T.grad(cost_function, params)
    mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm)
    cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads]

    #updates = lasagne.updates.adam(cgrads, params, beta1=0.9, beta2=0.999, epsilon=1e-4, learning_rate=sym_lr)
    updates, sym_vars_list = utils.adam(cgrads, params, beta1=0.9, beta2=0.999, epsilon=1e-4, learning_rate=sym_lr)

    # moving average
    params_avg=[]
    for param in params:
        value = param.get_value(borrow=True)
        params_avg.append(theano.shared(np.zeros(value.shape, dtype=value.dtype),
                              broadcastable=param.broadcastable,
                              name=param.name + '_avg'))
    avg_updates = [(a, a + 0.01 * (p - a)) for p, a in zip(params, params_avg)]
    avg_givens = [(p, a) for p, a in zip(params, params_avg)]
    all_updates = updates.items() + avg_updates

#----------------------------------------------------------------
# Compile
    # training function
    logger.info("Compiling train_model ...")
    train_model = theano.function(
            inputs=[sym_lr,
                sym_uid_origin_pos, sym_uid_minibatch_pos, sym_vid_origin_pos, sym_vid_minibatch_pos,
                sym_uid_origin_neg, sym_uid_minibatch_neg, sym_vid_origin_neg, sym_vid_minibatch_neg,
                sym_Ru_pos, sym_Ru_neg, sym_Rv,
                sym_R_minibatch, sym_dr_Ru_pos, sym_dr_Ru_neg, sym_dr_Rv],
            outputs=[log_p_r, SE_train],
            updates=all_updates,
            )

    # encoders
    logger.info("Compiling encode_model ...")
    u_encode_model = theano.function(inputs=[sym_Ru_pos], outputs=enc_u_h_out)
    v_encode_model = theano.function(inputs=[sym_Rv], outputs=enc_v_h_out)

    u_encode_avg_model = theano.function(inputs=[sym_Ru_pos], outputs=enc_u_h_out, givens=avg_givens, on_unused_input='ignore')
    v_encode_avg_model = theano.function(inputs=[sym_Rv], outputs=enc_v_h_out, givens=avg_givens, on_unused_input='ignore')

    # test function
    logger.info("Compiling test_model ...")
    test_model = theano.function(
            inputs=[sym_uid_origin_pos, sym_uid_minibatch_pos, sym_vid_origin_pos, sym_vid_minibatch_pos, sym_enc_u_h, sym_enc_v_h],
            outputs=[ranking],
            )

    test_avg_model = theano.function(
            inputs=[sym_uid_origin_pos, sym_uid_minibatch_pos, sym_vid_origin_pos, sym_vid_minibatch_pos, sym_enc_u_h, sym_enc_v_h],
            outputs=[ranking],
            givens=avg_givens, on_unused_input='ignore',
            )

#----------------------------------------------------------------
# Predict function
    def compute_hidden_for(for_which_set='test', avg_model=False):
        assert for_which_set in ['valid', 'test']
        if for_which_set == 'valid':
            R_matrix_cond = R_matrix
        else:
            R_matrix_cond = R_matrix_for_test

        # preconpute hidden representation
        u_end = 0
        while u_end < N_u:
            u_start, u_end = u_end, min(u_end + batch_size_u, N_u)
            # create user mini-batch
            u_batch_ids = np.arange(u_start, u_end).astype('int32')
            # create conditionals
            Ru_minibatch = R_matrix_cond[u_batch_ids,:]
            # encode
            if avg_model:
                np_enc_u_h[u_batch_ids] = u_encode_avg_model(Ru_minibatch)
            else:
                np_enc_u_h[u_batch_ids] = u_encode_model(Ru_minibatch)

        v_end = 0
        while v_end < N_v:
            v_start, v_end = v_end, min(v_end + batch_size_v, N_v)
            # create item mini-batch
            v_batch_ids = np.arange(v_start, v_end).astype('int32')
            # create conditionals
            Rv_minibatch = R_matrix_cond[:,v_batch_ids].T
            # encode
            if avg_model:
                np_enc_v_h[v_batch_ids] = v_encode_avg_model(Rv_minibatch)
            else:
                np_enc_v_h[v_batch_ids] = v_encode_model(Rv_minibatch)

    def predict_once(which_set='test', avg_model=False):
        assert which_set in ['valid', 'test']
        if which_set == 'valid':
            R_predict = R_valid
        else:
            R_predict = R_test

        # test statistics
        rankings = []

        # loop users
        u_end = 0
        while u_end < N_u:
            u_start, u_end = u_end, min(u_end + batch_size_u, N_u)

            # create user mini-batch and item mini-batch
            u_batch_ids = np.arange(u_start, u_end).astype('int32')

            vid_negative = np.asarray([np.random.choice(np.where(row)[0], 100, replace=False) for row in R_negative_matrix[u_batch_ids]], dtype='int32')
            vid = np.concatenate([R_predict[u_batch_ids].reshape(-1,1), vid_negative], axis=1).flatten()
            uid_origin = np.repeat(u_batch_ids, 101)
            uid_minibatch = uid_origin - u_start

            # get encoded vectors
            Ru_encoded = np_enc_u_h[u_batch_ids]

            if avg_model:
                rankings_minibatch, = test_avg_model(uid_origin, uid_minibatch, vid, vid, Ru_encoded, np_enc_v_h)
            else:
                rankings_minibatch, = test_model(uid_origin, uid_minibatch, vid, vid, Ru_encoded, np_enc_v_h)
            rankings.append(rankings_minibatch)

        rankings = np.concatenate(rankings)
        HR = np.mean(rankings < 10)
        NDCG = np.mean((rankings < 10) / np.log2(rankings + 2))

        return HR, NDCG

    def predict(which_set='test', avg=10, avg_model=False):
        compute_hidden_for(for_which_set=which_set, avg_model=avg_model)
        HR_list = []
        NDCG_list = []
        for i in range(avg):
            hr, ndcg = predict_once(which_set=which_set, avg_model=avg_model)
            HR_list.append(hr)
            NDCG_list.append(ndcg)
        HR_mean = np.mean(HR_list)
        NDCG_mean = np.mean(NDCG_list)
        HR_std = np.std(HR_list)
        NDCG_std = np.std(NDCG_list)
        # print info after test finished
        eval_msg = which_set if not avg_model else which_set + ' (avg model)'
        logger.critical("%-20s HR = %.3f +- %.3f, NDCG = %.3f +- %.3f." % (eval_msg, HR_mean, HR_std, NDCG_mean, NDCG_std))
        return HR_mean, NDCG_mean

#----------------------------------------------------------------
# Training
    best_valid_result = - np.inf
    best_model = None
    best_auxiliary = None
    n_epocs_without_improvement = 0

    minibatch_queue = Queue.Queue(maxsize=10)

    # function for preparing minibatches
    def prepare_minibatch(minibatch_list):
        # loop mini-batches
        for u_batch_ids, v_batch_ids in minibatch_list:
            Rv_minibatch = R_matrix[:,v_batch_ids].T
            Rv_minibatch[:,u_batch_ids] = 0
            Ru_minibatch_neg = R_matrix[u_batch_ids,:]
            #Ru_minibatch_neg[:,v_batch_ids] = 0

            # create training samples mini-batch
            T_matrix_minibatch = T_matrix[np.ix_(u_batch_ids, v_batch_ids)]
            T_matrix_minibatch_sparse = scipy.sparse.coo_matrix(T_matrix_minibatch)
            n_interactions_minibatch = T_matrix_minibatch_sparse.count_nonzero()
            Ru_minibatch_pos = ((T_matrix[u_batch_ids[T_matrix_minibatch_sparse.row]] < T_matrix_minibatch_sparse.data.reshape(n_interactions_minibatch, 1)) & (T_matrix[u_batch_ids[T_matrix_minibatch_sparse.row]] > 0)).astype(floatX)

            uid_minibatch_pos = np.arange(n_interactions_minibatch).astype('int32')
            uid_origin_pos = u_batch_ids[T_matrix_minibatch_sparse.row]
            vid_minibatch_pos = T_matrix_minibatch_sparse.col
            vid_origin_pos = v_batch_ids[vid_minibatch_pos]

            R_matrix_negative_minibatch = 1 - R_matrix[np.ix_(u_batch_ids, v_batch_ids)]
            R_matrix_negative_minibatch_sparse = scipy.sparse.coo_matrix(R_matrix_negative_minibatch)
            n_negative_total = R_matrix_negative_minibatch_sparse.count_nonzero()
            assert n_negative_total + n_interactions_minibatch == u_batch_ids.size * v_batch_ids.size
            choice_negative = np.random.choice(n_negative_total, min(n_negative_total, np.int(n_interactions_minibatch * negative_ratio)), replace=False)

            uid_minibatch_neg = R_matrix_negative_minibatch_sparse.row[choice_negative]
            uid_origin_neg = u_batch_ids[uid_minibatch_neg]
            vid_minibatch_neg = R_matrix_negative_minibatch_sparse.col[choice_negative]
            vid_origin_neg = v_batch_ids[vid_minibatch_neg]

            R_minibatch = np.concatenate([np.ones_like(T_matrix_minibatch_sparse.data), R_matrix_negative_minibatch_sparse.data[choice_negative] * 0])

            n_pred_step = R_minibatch.shape[0]
            if n_pred_step == 0:
                raise ValueError('No interactions in this minibatch.')

            dr_Ru_pos = min(max(1 - 2 * np.random.rand(), 0), 0.8)
            dr_Ru_neg = 0.2
            dr_Rv = min(max(1 - 2 * np.random.rand(), 0), 0.8)

            # package everything into a tuple
            data_minibatch_package = (
                    uid_origin_pos, uid_minibatch_pos, vid_origin_pos, vid_minibatch_pos,
                    uid_origin_neg, uid_minibatch_neg, vid_origin_neg, vid_minibatch_neg,
                    Ru_minibatch_pos, Ru_minibatch_neg, Rv_minibatch,
                    R_minibatch, dr_Ru_pos, dr_Ru_neg, dr_Rv)

            # enqueue
            minibatch_queue.put((n_pred_step, data_minibatch_package))

    logger.warning("Training started.")
    # loop epoch
    for epoch in range(1, 1+max_epoch):
        epoch_start_time = time.time()

        # training statistics
        LL_epoch, SE_epoch= 0, 0
        n_pred_epoch = 0

        u_order = np.array_split(np.random.permutation(N_u).astype('int32'), N_u // batch_size_u + 1)
        v_order = np.array_split(np.random.permutation(N_v).astype('int32'), N_v // batch_size_v + 1)
        minibatch_order = list(itertools.product(u_order, v_order))
        random.shuffle(minibatch_order)

        n_threads = 5
        n_minibatch_thread = len(minibatch_order) // n_threads + 1
        for t in range(n_threads):
            thr = threading.Thread(target=prepare_minibatch, args=(minibatch_order[t*n_minibatch_thread:(t+1)*n_minibatch_thread],))
            thr.setDaemon(True)
            thr.start()

        for step in range(len(minibatch_order)):
            n_pred_step, data_minibatch_package = minibatch_queue.get()
            # update parameters and calculate likelihood and RMSE
            LL_step, SE_step = train_model(lr, *data_minibatch_package)
            minibatch_queue.task_done()
            LL_epoch += LL_step * n_pred_step
            SE_epoch += SE_step
            n_pred_epoch += n_pred_step

        assert minibatch_queue.qsize() == 0

        # print info after epoch finished
        LL_epoch /= n_pred_epoch
        RMSE_epoch = np.sqrt(SE_epoch/n_pred_epoch)

        epoch_end_time = time.time()
        logger.info("Epoch %d, training RMSE = %f, LL = %f (%d training ratings). Elapsed time %.1fs." % (epoch, RMSE_epoch, LL_epoch, n_pred_epoch, epoch_end_time-epoch_start_time))

        # validation
        HR_valid, NDCG_valid = predict('valid')
        HR_test, NDCG_test = predict('test')
        HR_test, NDCG_test = predict('test', avg_model=True)

        # termination
        #if NDCG_valid > best_valid_result:
        if HR_valid > best_valid_result:
            n_epocs_without_improvement = 0
            #best_valid_result = NDCG_valid
            best_valid_result = HR_valid
            best_model = ll.get_all_param_values([l_r_train,], trainable=True)
            best_auxiliary = utils.get_all_shvar_values(sym_vars_list)
            logger.debug("New best model found!")
        else:
            n_epocs_without_improvement += 1
            if n_epocs_without_improvement >= lookahead:
                ll.set_all_param_values([l_r_train,], best_model, trainable=True)
                utils.set_all_shvar_values(sym_vars_list, best_auxiliary)
                if lr > 1e-5:
                    n_epocs_without_improvement = 0
                    lr /= 4
                    logger.error("Learning rate = %f now." % lr)
                else:
                    logger.error("Training finished.")
                    break

#----------------------------------------------------------------
# Test
    HR_test, NDCG_test = predict('test')
    HR_test, NDCG_test = predict('test', avg_model=True)

#----------------------------------------------------------------
# Summarization
    for k, v in six.iteritems(args_dict):
        logger.info("  %20s: %s" % (k, v))
Exemple #18
0
def build_network_from_ae(classn):
    input_var = T.tensor4('input_var')

    layer = layers.InputLayer(shape=(None, 3, PS, PS), input_var=input_var)
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           100,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           120,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = layers.Pool2DLayer(layer,
                               pool_size=(2, 2),
                               stride=2,
                               mode='average_inc_pad')
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           240,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           320,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = layers.Pool2DLayer(layer,
                               pool_size=(2, 2),
                               stride=2,
                               mode='average_inc_pad')
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           640,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    prely = batch_norm(
        layers.Conv2DLayer(layer,
                           1024,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))

    featm = batch_norm(
        layers.Conv2DLayer(prely,
                           640,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    feat_map = batch_norm(
        layers.Conv2DLayer(featm,
                           100,
                           filter_size=(1, 1),
                           nonlinearity=rectify,
                           name="feat_map"))
    mask_map = feat_map
    enlyr = feat_map

    layer = batch_norm(
        layers.Deconv2DLayer(enlyr,
                             1024,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             640,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             640,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             320,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             320,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             240,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             120,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             100,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = layers.Deconv2DLayer(layer,
                                 3,
                                 filter_size=(1, 1),
                                 stride=1,
                                 crop='same',
                                 nonlinearity=identity)

    glblf = batch_norm(
        layers.Conv2DLayer(prely,
                           128,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    glblf = layers.Pool2DLayer(glblf,
                               pool_size=(5, 5),
                               stride=5,
                               mode='average_inc_pad')
    glblf = batch_norm(
        layers.Conv2DLayer(glblf,
                           64,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    gllyr = batch_norm(layers.Conv2DLayer(glblf,
                                          5,
                                          filter_size=(1, 1),
                                          nonlinearity=rectify),
                       name="global_feature")

    glblf = batch_norm(
        layers.Deconv2DLayer(gllyr,
                             256,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(9, 9),
                             stride=5,
                             crop=(2, 2),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = layers.Deconv2DLayer(glblf,
                                 3,
                                 filter_size=(1, 1),
                                 stride=1,
                                 crop='same',
                                 nonlinearity=identity)

    layer = layers.ElemwiseSumLayer([layer, glblf])

    network = ReshapeLayer(layer, ([0], -1))
    layers.set_all_param_values(network,
                                pickle.load(open(filename_model_ae, 'rb')))
    old_params = layers.get_all_params(network, trainable=True)

    # Adding more layers
    aug_var = T.matrix('aug_var')
    target_var = T.imatrix('targets')
    add_a = batch_norm(
        layers.Conv2DLayer(enlyr,
                           320,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    add_b = batch_norm(
        layers.Conv2DLayer(add_a,
                           320,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    add_c = batch_norm(
        layers.Conv2DLayer(add_b,
                           320,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    add_d = batch_norm(
        layers.Conv2DLayer(add_c,
                           320,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    add_0 = layers.Pool2DLayer(add_d,
                               pool_size=(15, 15),
                               stride=15,
                               mode='average_inc_pad')
    add_1 = batch_norm(
        layers.DenseLayer(add_0, 100, nonlinearity=leaky_rectify))

    add_2 = batch_norm(
        layers.DenseLayer(gllyr, 320, nonlinearity=leaky_rectify))
    add_3 = batch_norm(
        layers.DenseLayer(add_2, 320, nonlinearity=leaky_rectify))
    add_4 = batch_norm(
        layers.DenseLayer(add_3, 100, nonlinearity=leaky_rectify))

    aug_layer = layers.InputLayer(shape=(None, aug_fea_n), input_var=aug_var)

    cat_layer = lasagne.layers.ConcatLayer([add_1, add_4, aug_layer], axis=1)

    hidden_layer = layers.DenseLayer(cat_layer, 80, nonlinearity=leaky_rectify)
    network = layers.DenseLayer(hidden_layer, classn, nonlinearity=sigmoid)

    all_params = layers.get_all_params(network, trainable=True)
    new_params = [x for x in all_params if x not in old_params]

    return network, new_params, input_var, aug_var, target_var
    def build_network(self, K, vocab_size, W_init):

        l_docin = L.InputLayer(shape=(None, None, 1), input_var=self.inps[0])
        l_doctokin = L.InputLayer(shape=(None, None), input_var=self.inps[1])
        l_qin = L.InputLayer(shape=(None, None, 1), input_var=self.inps[2])
        l_qtokin = L.InputLayer(shape=(None, None), input_var=self.inps[3])
        l_docmask = L.InputLayer(shape=(None, None), input_var=self.inps[6])
        l_qmask = L.InputLayer(shape=(None, None), input_var=self.inps[7])
        l_tokin = L.InputLayer(shape=(None, MAX_WORD_LEN),
                               input_var=self.inps[8])
        l_tokmask = L.InputLayer(shape=(None, MAX_WORD_LEN),
                                 input_var=self.inps[9])
        l_featin = L.InputLayer(shape=(None, None), input_var=self.inps[11])

        doc_shp = self.inps[1].shape
        qry_shp = self.inps[3].shape

        l_docembed = L.EmbeddingLayer(l_docin,
                                      input_size=vocab_size,
                                      output_size=self.embed_dim,
                                      W=W_init)  # B x N x 1 x DE
        l_doce = L.ReshapeLayer(
            l_docembed, (doc_shp[0], doc_shp[1], self.embed_dim))  # B x N x DE
        l_qembed = L.EmbeddingLayer(l_qin,
                                    input_size=vocab_size,
                                    output_size=self.embed_dim,
                                    W=l_docembed.W)
        l_qembed = L.ReshapeLayer(
            l_qembed, (qry_shp[0], qry_shp[1], self.embed_dim))  # B x N x DE
        l_fembed = L.EmbeddingLayer(l_featin, input_size=2,
                                    output_size=2)  # B x N x 2

        if self.train_emb == 0:
            l_docembed.params[l_docembed.W].remove('trainable')

        # char embeddings
        if self.use_chars:
            l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars,
                                        2 * self.char_dim)  # T x L x D
            l_fgru = L.GRULayer(l_lookup,
                                self.char_dim,
                                grad_clipping=GRAD_CLIP,
                                mask_input=l_tokmask,
                                gradient_steps=GRAD_STEPS,
                                precompute_input=True,
                                only_return_final=True)
            l_bgru = L.GRULayer(l_lookup,
                                2 * self.char_dim,
                                grad_clipping=GRAD_CLIP,
                                mask_input=l_tokmask,
                                gradient_steps=GRAD_STEPS,
                                precompute_input=True,
                                backwards=True,
                                only_return_final=True)  # T x 2D
            l_fwdembed = L.DenseLayer(l_fgru,
                                      self.embed_dim / 2,
                                      nonlinearity=None)  # T x DE/2
            l_bckembed = L.DenseLayer(l_bgru,
                                      self.embed_dim / 2,
                                      nonlinearity=None)  # T x DE/2
            l_embed = L.ElemwiseSumLayer([l_fwdembed, l_bckembed], coeffs=1)
            l_docchar_embed = IndexLayer([l_doctokin, l_embed])  # B x N x DE/2
            l_qchar_embed = IndexLayer([l_qtokin, l_embed])  # B x Q x DE/2

            l_doce = L.ConcatLayer([l_doce, l_docchar_embed], axis=2)
            l_qembed = L.ConcatLayer([l_qembed, l_qchar_embed], axis=2)

        l_fwd_q = L.GRULayer(l_qembed,
                             self.nhidden,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True,
                             only_return_final=False)
        l_bkd_q = L.GRULayer(l_qembed,
                             self.nhidden,
                             grad_clipping=GRAD_CLIP,
                             mask_input=l_qmask,
                             gradient_steps=GRAD_STEPS,
                             precompute_input=True,
                             backwards=True,
                             only_return_final=False)

        l_q = L.ConcatLayer([l_fwd_q, l_bkd_q])  # B x Q x 2D
        q = L.get_output(l_q)  # B x Q x 2D
        q = q[T.arange(q.shape[0]), self.inps[12], :]  # B x 2D

        l_qs = [l_q]
        for i in range(K - 1):
            l_fwd_doc_1 = L.GRULayer(l_doce,
                                     self.nhidden,
                                     grad_clipping=GRAD_CLIP,
                                     mask_input=l_docmask,
                                     gradient_steps=GRAD_STEPS,
                                     precompute_input=True)
            l_bkd_doc_1 = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP,
                    mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \
                            backwards=True)

            l_doc_1 = L.concat([l_fwd_doc_1, l_bkd_doc_1],
                               axis=2)  # B x N x DE

            l_fwd_q_1 = L.GRULayer(l_qembed,
                                   self.nhidden,
                                   grad_clipping=GRAD_CLIP,
                                   mask_input=l_qmask,
                                   gradient_steps=GRAD_STEPS,
                                   precompute_input=True)
            l_bkd_q_1 = L.GRULayer(l_qembed,
                                   self.nhidden,
                                   grad_clipping=GRAD_CLIP,
                                   mask_input=l_qmask,
                                   gradient_steps=GRAD_STEPS,
                                   precompute_input=True,
                                   backwards=True)

            l_q_c_1 = L.ConcatLayer([l_fwd_q_1, l_bkd_q_1],
                                    axis=2)  # B x Q x DE
            l_qs.append(l_q_c_1)

            qd = L.get_output(l_q_c_1)  # B x Q x DE
            dd = L.get_output(l_doc_1)  # B x N x DE
            M = T.batched_dot(dd, qd.dimshuffle((0, 2, 1)))  # B x N x Q
            alphas = T.nnet.softmax(
                T.reshape(M, (M.shape[0] * M.shape[1], M.shape[2])))
            alphas_r = T.reshape(alphas, (M.shape[0],M.shape[1],M.shape[2]))* \
                    self.inps[7][:,np.newaxis,:] # B x N x Q
            alphas_r = alphas_r / alphas_r.sum(axis=2)[:, :,
                                                       np.newaxis]  # B x N x Q
            q_rep = T.batched_dot(alphas_r, qd)  # B x N x DE

            l_q_rep_in = L.InputLayer(shape=(None, None, 2 * self.nhidden),
                                      input_var=q_rep)
            l_doc_2_in = L.ElemwiseMergeLayer([l_doc_1, l_q_rep_in], T.mul)
            l_doce = L.dropout(l_doc_2_in, p=self.dropout)  # B x N x DE

        if self.use_feat:
            l_doce = L.ConcatLayer([l_doce, l_fembed], axis=2)  # B x N x DE+2
        l_fwd_doc = L.GRULayer(l_doce,
                               self.nhidden,
                               grad_clipping=GRAD_CLIP,
                               mask_input=l_docmask,
                               gradient_steps=GRAD_STEPS,
                               precompute_input=True)
        l_bkd_doc = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP,
                mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \
                        backwards=True)

        l_doc = L.concat([l_fwd_doc, l_bkd_doc], axis=2)

        d = L.get_output(l_doc)  # B x N x 2D
        p = T.batched_dot(d, q)  # B x N
        pm = T.nnet.softmax(p) * self.inps[10]
        pm = pm / pm.sum(axis=1)[:, np.newaxis]
        final = T.batched_dot(pm, self.inps[4])

        dv = L.get_output(l_doc, deterministic=True)  # B x N x 2D
        p = T.batched_dot(dv, q)  # B x N
        pm = T.nnet.softmax(p) * self.inps[10]
        pm = pm / pm.sum(axis=1)[:, np.newaxis]
        final_v = T.batched_dot(pm, self.inps[4])

        return final, final_v, l_doc, l_qs, l_docembed.W
Exemple #20
0
def build_model(hyparams,
                vmap,
                log,
                nclasses=2,
                batchsize=None,
                invar=None,
                maskvar=None,
                maxlen=MAXLEN):

    embedding_dim = hyparams.embedding_dim
    nhidden = hyparams.nhidden
    bidirectional = hyparams.bidirectional
    pool = hyparams.pool
    grad_clip = hyparams.grad_clip
    init = hyparams.init

    net = OrderedDict()

    V = len(vmap)
    W = lasagne.init.Normal()

    gate_params = layer.recurrent.Gate(W_in=lasagne.init.Orthogonal(),
                                       W_hid=lasagne.init.Orthogonal(),
                                       b=lasagne.init.Constant(0.))
    cell_params = layer.recurrent.Gate(
        W_in=lasagne.init.Orthogonal(),
        W_hid=lasagne.init.Orthogonal(),
        W_cell=None,
        b=lasagne.init.Constant(0.),
        nonlinearity=lasagne.nonlinearities.tanh)

    net['input'] = layer.InputLayer((batchsize, maxlen), input_var=invar)
    net['mask'] = layer.InputLayer((batchsize, maxlen), input_var=maskvar)
    ASSUME = {net['input']: (200, 140), net['mask']: (200, 140)}
    net['emb'] = layer.EmbeddingLayer(net['input'],
                                      input_size=V,
                                      output_size=embedding_dim,
                                      W=W)
    net['fwd1'] = layer.LSTMLayer(net['emb'],
                                  num_units=nhidden,
                                  grad_clipping=grad_clip,
                                  nonlinearity=lasagne.nonlinearities.tanh,
                                  mask_input=net['mask'],
                                  ingate=gate_params,
                                  forgetgate=gate_params,
                                  cell=cell_params,
                                  outgate=gate_params,
                                  learn_init=True)
    if bidirectional:
        net['bwd1'] = layer.LSTMLayer(net['emb'],
                                      num_units=nhidden,
                                      grad_clipping=grad_clip,
                                      nonlinearity=lasagne.nonlinearities.tanh,
                                      mask_input=net['mask'],
                                      ingate=gate_params,
                                      forgetgate=gate_params,
                                      cell=cell_params,
                                      outgate=gate_params,
                                      learn_init=True,
                                      backwards=True)
        if pool == 'mean':

            def tmean(a, b):
                agg = theano.tensor.add(a, b)
                agg /= 2.
                return agg

            net['pool'] = layer.ElemwiseMergeLayer([net['fwd1'], net['bwd1']],
                                                   tmean)
        elif pool == 'sum':
            net['pool'] = layer.ElemwiseSumLayer([net['fwd1'], net['bwd1']])
        else:
            net['pool'] = layer.ConcatLayer([net['fwd1'], net['bwd1']])
    else:
        net['pool'] = layer.ConcatLayer([net['fwd1']])
    net['dropout1'] = layer.DropoutLayer(net['pool'], p=0.5)
    if init == 'identity':
        gate_params2 = layer.recurrent.Gate(W_in=np.eye(nhidden,
                                                        dtype=np.float32),
                                            W_hid=np.eye(nhidden,
                                                         dtype=np.float32),
                                            b=lasagne.init.Constant(0.))
        cell_params2 = layer.recurrent.Gate(
            W_in=np.eye(nhidden, dtype=np.float32),
            W_hid=np.eye(nhidden, dtype=np.float32),
            W_cell=None,
            b=lasagne.init.Constant(0.),
            nonlinearity=lasagne.nonlinearities.rectify)
        net['fwd2'] = layer.LSTMLayer(net['dropout1'],
                                      num_units=nhidden,
                                      grad_clipping=grad_clip,
                                      nonlinearity=lasagne.nonlinearities.tanh,
                                      mask_input=net['mask'],
                                      ingate=gate_params2,
                                      forgetgate=gate_params2,
                                      cell=cell_params2,
                                      outgate=gate_params2,
                                      learn_init=True,
                                      only_return_final=True)
    else:
        net['fwd2'] = layer.LSTMLayer(net['dropout1'],
                                      num_units=nhidden,
                                      grad_clipping=grad_clip,
                                      nonlinearity=lasagne.nonlinearities.tanh,
                                      mask_input=net['mask'],
                                      ingate=gate_params,
                                      forgetgate=gate_params,
                                      cell=cell_params,
                                      outgate=gate_params,
                                      learn_init=True,
                                      only_return_final=True)
    net['dropout2'] = layer.DropoutLayer(net['fwd2'], p=0.6)
    net['softmax'] = layer.DenseLayer(
        net['dropout2'],
        num_units=nclasses,
        nonlinearity=lasagne.nonlinearities.softmax)
    logstr = '========== MODEL ========== \n'
    logstr += 'vocab size: %d\n' % V
    logstr += 'embedding dim: %d\n' % embedding_dim
    logstr += 'nhidden: %d\n' % nhidden
    logstr += 'pooling: %s\n' % pool
    for lname, lyr in net.items():
        logstr += '%s %s\n' % (lname, str(get_output_shape(lyr, ASSUME)))
    logstr += '=========================== \n'
    print logstr
    log.write(logstr)
    log.flush()
    return net
Exemple #21
0
def build_rnn_net(input_var=None,
                  input_width=None,
                  input_dim=None,
                  nin_units=80,
                  h_num_units=[64, 64],
                  h_grad_clip=1.0,
                  output_width=1):
    """
    A stacked bidirectional RNN network for regression, alternating
    with dense layers and merging of the two directions, followed by
    a feature mean pooling in the time direction, with a linear
    dim-reduction layer at the start
    add dropout for generalizations
    
    Args:
        input_var (theano 3-tensor): minibatch of input sequence vectors
        input_width (int): length of input sequences
        nin_units (list): number of NIN features
        h_num_units (int list): no. of units in hidden layer in each stack
                                from bottom to top
        h_grad_clip (float): gradient clipping maximum value 
        output_width (int): size of output layer (e.g. =1 for 1D regression)
    Returns:
        output layer (Lasagne layer object)
    """

    # Non-linearity hyperparameter
    leaky_ratio = 0.3
    nonlin = lasagne.nonlinearities.LeakyRectify(leakiness=leaky_ratio)

    # Input layer
    l_in = LL.InputLayer(shape=(None, input_width, input_dim),
                         input_var=input_var)
    batchsize = l_in.input_var.shape[0]

    # NIN-layer
    #l_in_1 = LL.NINLayer(l_in, num_units=nin_units,
    #nonlinearity=lasagne.nonlinearities.linear)
    l_in_1 = l_in
    #l_in_d = LL.DropoutLayer(l_in, p = 0.8) Do not use drop out now, for the first rnn layer is 256

    # currently, we do not drop features
    # RNN layers
    # dropout in the first two (total three) or three (total five) layers
    counter = -1
    drop_ends = 2
    for h in h_num_units:
        counter += 1
        # Forward layers
        l_forward_0 = LL.RecurrentLayer(
            l_in_1,
            nonlinearity=nonlin,
            num_units=h,
            W_in_to_hid=lasagne.init.Normal(0.01, 0),
            #W_in_to_hid=lasagne.init.He(initializer, math.sqrt(2/(1+0.15**2))),
            W_hid_to_hid=lasagne.init.Orthogonal(
                math.sqrt(2 / (1 + leaky_ratio**2))),
            backwards=False,
            learn_init=True,
            grad_clipping=h_grad_clip,
            #gradient_steps = 20,
            unroll_scan=True,
            precompute_input=True)

        l_forward_0a = LL.ReshapeLayer(l_forward_0, (-1, h))

        if (counter < drop_ends and counter % 2 != 0):
            l_forward_0a = LL.DropoutLayer(l_forward_0a, p=0.2)
        else:
            l_forward_0a = l_forward_0a

        l_forward_0b = LL.DenseLayer(l_forward_0a,
                                     num_units=h,
                                     nonlinearity=nonlin)
        l_forward_0c = LL.ReshapeLayer(l_forward_0b,
                                       (batchsize, input_width, h))

        l_forward_out = l_forward_0c

        # Backward layers
        l_backward_0 = LL.RecurrentLayer(
            l_in_1,
            nonlinearity=nonlin,
            num_units=h,
            W_in_to_hid=lasagne.init.Normal(0.01, 0),
            #W_in_to_hid=lasagne.init.He(initializer, math.sqrt(2/(1+0.15**2))),
            W_hid_to_hid=lasagne.init.Orthogonal(
                math.sqrt(2 / (1 + leaky_ratio**2))),
            backwards=True,
            learn_init=True,
            grad_clipping=h_grad_clip,
            #gradient_steps = 20,
            unroll_scan=True,
            precompute_input=True)

        l_backward_0a = LL.ReshapeLayer(l_backward_0, (-1, h))

        if (counter < drop_ends and counter % 2 == 0):
            l_backward_0a = LL.DropoutLayer(l_backward_0a, p=0.2)
        else:
            l_backward_0a = l_backward_0a

        l_backward_0b = LL.DenseLayer(l_backward_0a,
                                      num_units=h,
                                      nonlinearity=nonlin)
        l_backward_0c = LL.ReshapeLayer(l_backward_0b,
                                        (batchsize, input_width, h))

        l_backward_out = l_backward_0c

        l_in_1 = LL.ElemwiseSumLayer([l_forward_out, l_backward_out])

    # Output layers
    network_0a = LL.DenseLayer(l_in_1,
                               num_units=1,
                               num_leading_axes=2,
                               nonlinearity=nonlin)

    output_net = LL.FlattenLayer(network_0a, outdim=2)

    return output_net
Exemple #22
0
    def build_RNN(self,
                  n_hidden_list=(100, ),
                  bidirectional=False,
                  addDenseLayers=False,
                  seed=int(time.time()),
                  debug=False,
                  logger=logger_RNNtools):
        # some inspiration from http://colinraffel.com/talks/hammer2015recurrent.pdf

        # if debug:
        #     logger_RNNtools.debug('\nInputs:');
        #     logger_RNNtools.debug('  X.shape:    %s', self.X[0].shape)
        #     logger_RNNtools.debug('  X[0].shape: %s %s %s \n%s', self.X[0][0].shape, type(self.X[0][0]),
        #                           type(self.X[0][0][0]), self.X[0][0][:5])
        #
        #     logger_RNNtools.debug('Targets: ');
        #     logger_RNNtools.debug('  Y.shape:    %s', self.Y.shape)
        #     logger_RNNtools.debug('  Y[0].shape: %s %s %s \n%s', self.Y[0].shape, type(self.Y[0]), type(self.Y[0][0]),
        #                           self.Y[0][:5])
        #     logger_RNNtools.debug('Layers: ')

        # fix these at initialization because it allows for compiler opimizations
        num_output_units = self.num_output_units
        num_features = self.num_features
        batch_size = self.batch_size

        audio_inputs = self.audio_inputs_var
        audio_masks = self.audio_masks_var  #set MATRIX, not iMatrix!! Otherwise all mask calculations are done by CPU, and everything will be ~2x slowed down!! Also in general_tools.generate_masks()
        valid_indices = self.audio_valid_indices_var

        net = {}
        # net['l1_in_valid'] = L.InputLayer(shape=(batch_size, None), input_var=valid_indices)

        # shape = (batch_size, batch_max_seq_length, num_features)
        net['l1_in'] = L.InputLayer(shape=(batch_size, None, num_features),
                                    input_var=audio_inputs)
        # We could do this and set all input_vars to None, but that is slower -> fix batch_size and num_features at initialization
        # batch_size, n_time_steps, n_features = net['l1_in'].input_var.shape

        # This input will be used to provide the network with masks.
        # Masks are matrices of shape (batch_size, n_time_steps);
        net['l1_mask'] = L.InputLayer(shape=(batch_size, None),
                                      input_var=audio_masks)

        if debug:
            get_l_in = L.get_output(net['l1_in'])
            l_in_val = get_l_in.eval({net['l1_in'].input_var: self.X})
            # logger_RNNtools.debug(l_in_val)
            logger_RNNtools.debug('  l_in size: %s', l_in_val.shape)

            get_l_mask = L.get_output(net['l1_mask'])
            l_mask_val = get_l_mask.eval(
                {net['l1_mask'].input_var: self.masks})
            # logger_RNNtools.debug(l_in_val)
            logger_RNNtools.debug('  l_mask size: %s', l_mask_val.shape)

            n_batch, n_time_steps, n_features = net['l1_in'].input_var.shape
            logger_RNNtools.debug(
                "  n_batch: %s | n_time_steps: %s | n_features: %s", n_batch,
                n_time_steps, n_features)

        ## LSTM parameters
        # All gates have initializers for the input-to-gate and hidden state-to-gate
        # weight matrices, the cell-to-gate weight vector, the bias vector, and the nonlinearity.
        # The convention is that gates use the standard sigmoid nonlinearity,
        # which is the default for the Gate class.
        gate_parameters = L.recurrent.Gate(W_in=lasagne.init.Orthogonal(),
                                           W_hid=lasagne.init.Orthogonal(),
                                           b=lasagne.init.Constant(0.))
        cell_parameters = L.recurrent.Gate(
            W_in=lasagne.init.Orthogonal(),
            W_hid=lasagne.init.Orthogonal(),
            # Setting W_cell to None denotes that no cell connection will be used.
            W_cell=None,
            b=lasagne.init.Constant(0.),
            # By convention, the cell nonlinearity is tanh in an LSTM.
            nonlinearity=lasagne.nonlinearities.tanh)

        # generate layers of stacked LSTMs, possibly bidirectional
        net['l2_lstm'] = []

        for i in range(len(n_hidden_list)):
            n_hidden = n_hidden_list[i]

            if i == 0: input = net['l1_in']
            else: input = net['l2_lstm'][i - 1]

            nextForwardLSTMLayer = L.recurrent.LSTMLayer(
                input,
                n_hidden,
                # We need to specify a separate input for masks
                mask_input=net['l1_mask'],
                # Here, we supply the gate parameters for each gate
                ingate=gate_parameters,
                forgetgate=gate_parameters,
                cell=cell_parameters,
                outgate=gate_parameters,
                # We'll learn the initialization and use gradient clipping
                learn_init=True,
                grad_clipping=100.)
            net['l2_lstm'].append(nextForwardLSTMLayer)

            if bidirectional:
                input = net['l2_lstm'][-1]
                # Use backward LSTM
                # The "backwards" layer is the same as the first,
                # except that the backwards argument is set to True.
                nextBackwardLSTMLayer = L.recurrent.LSTMLayer(
                    input,
                    n_hidden,
                    ingate=gate_parameters,
                    mask_input=net['l1_mask'],
                    forgetgate=gate_parameters,
                    cell=cell_parameters,
                    outgate=gate_parameters,
                    learn_init=True,
                    grad_clipping=100.,
                    backwards=True)
                net['l2_lstm'].append(nextBackwardLSTMLayer)

                # if debug:
                #     # Backwards LSTM
                #     get_l_lstm_back = theano.function([net['l1_in'].input_var, net['l1_mask'].input_var],
                #                                       L.get_output(net['l2_lstm'][-1]))
                #     l_lstmBack_val = get_l_lstm_back(self.X, self.masks)
                #     logger_RNNtools.debug('  l_lstm_back size: %s', l_lstmBack_val.shape)

                # We'll combine the forward and backward layer output by summing.
                # Merge layers take in lists of layers to merge as input.
                # The output of l_sum will be of shape (n_batch, max_n_time_steps, n_features)
                net['l2_lstm'].append(
                    L.ElemwiseSumLayer(
                        [net['l2_lstm'][-2], net['l2_lstm'][-1]]))

        # we need to convert (batch_size, seq_length, num_features) to (batch_size * seq_length, num_features) because Dense networks can't deal with 2 unknown sizes
        net['l3_reshape'] = L.ReshapeLayer(net['l2_lstm'][-1],
                                           (-1, n_hidden_list[-1]))

        # if debug:
        #     get_l_reshape = theano.function([net['l1_in'].input_var, net['l1_mask'].input_var],
        #                                     L.get_output(net['l3_reshape']))
        #     l_reshape_val = get_l_reshape(self.X, self.masks)
        #     logger.debug('  l_reshape size: %s', l_reshape_val.shape)
        #
        # if debug:
        #     # Forwards LSTM
        #     get_l_lstm = theano.function([net['l1_in'].input_var, net['l1_mask'].input_var],
        #                                  L.get_output(net['l2_lstm'][-1]))
        #     l_lstm_val = get_l_lstm(self.X, self.masks)
        #     logger_RNNtools.debug('  l2_lstm size: %s', l_lstm_val.shape);

        if addDenseLayers:
            net['l4_dense'] = L.DenseLayer(
                net['l3_reshape'],
                nonlinearity=lasagne.nonlinearities.rectify,
                num_units=256)
            dropoutLayer = L.DropoutLayer(net['l4_dense'], p=0.3)
            net['l5_dense'] = L.DenseLayer(
                dropoutLayer,
                nonlinearity=lasagne.nonlinearities.rectify,
                num_units=64)
            # Now we can apply feed-forward layers as usual for classification
            net['l6_dense'] = L.DenseLayer(
                net['l5_dense'],
                num_units=num_output_units,
                nonlinearity=lasagne.nonlinearities.softmax)
        else:
            # Now we can apply feed-forward layers as usual for classification
            net['l6_dense'] = L.DenseLayer(
                net['l3_reshape'],
                num_units=num_output_units,
                nonlinearity=lasagne.nonlinearities.softmax)

        # # Now, the shape will be (n_batch * n_timesteps, num_output_units). We can then reshape to
        # # n_batch to get num_output_units values for each timestep from each sequence
        net['l7_out_flattened'] = L.ReshapeLayer(net['l6_dense'],
                                                 (-1, num_output_units))
        net['l7_out'] = L.ReshapeLayer(net['l6_dense'],
                                       (batch_size, -1, num_output_units))

        net['l7_out_valid_basic'] = L.SliceLayer(net['l7_out'],
                                                 indices=valid_indices,
                                                 axis=1)
        net['l7_out_valid'] = L.ReshapeLayer(
            net['l7_out_valid_basic'], (batch_size, -1, num_output_units))
        net['l7_out_valid_flattened'] = L.ReshapeLayer(
            net['l7_out_valid_basic'], (-1, num_output_units))

        if debug:
            get_l_out = theano.function(
                [net['l1_in'].input_var, net['l1_mask'].input_var],
                L.get_output(net['l7_out']))
            l_out = get_l_out(self.X, self.masks)

            # this only works for batch_size == 1
            get_l_out_valid = theano.function(
                [audio_inputs, audio_masks, valid_indices],
                L.get_output(net['l7_out_valid']))
            try:
                l_out_valid = get_l_out_valid(self.X, self.masks,
                                              self.valid_frames)
                logger_RNNtools.debug('\n\n\n  l_out: %s  | l_out_valid: %s',
                                      l_out.shape, l_out_valid.shape)
            except:
                logger_RNNtools.warning(
                    "batchsize not 1, get_valid not working")

        if debug: self.print_network_structure(net)
        self.network_lout = net['l7_out_flattened']
        self.network_lout_batch = net['l7_out']
        self.network_lout_valid = net['l7_out_valid']
        self.network_lout_valid_flattened = net['l7_out_valid_flattened']

        self.network = net
def resblock(net_in,
             filters,
             kernel_size,
             stride=1,
             num_groups=1,
             preactivated=True):

    # Preactivation
    net_pre = batch_norm(net_in)
    net_pre = l.NonlinearityLayer(net_pre,
                                  nonlinearity=nonlinearity(cfg.NONLINEARITY))

    # Preactivated shortcut?
    if preactivated:
        net_sc = net_pre
    else:
        net_sc = net_in

    # Stride size
    if cfg.MAX_POOLING:
        s = 1
    else:
        s = stride

    # First Convolution (alwys has preactivated input)
    net = batch_norm(
        l.Conv2DLayer(net_pre,
                      num_filters=filters,
                      filter_size=kernel_size,
                      pad='same',
                      stride=s,
                      num_groups=num_groups,
                      W=initialization(cfg.NONLINEARITY),
                      nonlinearity=nonlinearity(cfg.NONLINEARITY)))

    # Optional pooling layer
    if cfg.MAX_POOLING and stride > 1:
        net = l.MaxPool2DLayer(net, pool_size=stride)

    # Dropout Layer (we support different types of dropout)
    if cfg.DROPOUT_TYPE == 'channels' and cfg.DROPOUT > 0.0:
        net = l.dropout_channels(net, p=cfg.DROPOUT)
    elif cfg.DROPOUT_TYPE == 'location' and cfg.DROPOUT > 0.0:
        net = l.dropout_location(net, p=cfg.DROPOUT)
    elif cfg.DROPOUT > 0.0:
        net = l.DropoutLayer(net, p=cfg.DROPOUT)

    # Second Convolution
    net = l.Conv2DLayer(net,
                        num_filters=filters,
                        filter_size=kernel_size,
                        pad='same',
                        stride=1,
                        num_groups=num_groups,
                        W=initialization(cfg.NONLINEARITY),
                        nonlinearity=None)

    # Shortcut Layer
    if not l.get_output_shape(net) == l.get_output_shape(net_sc):
        shortcut = l.Conv2DLayer(net_sc,
                                 num_filters=filters,
                                 filter_size=1,
                                 pad='same',
                                 stride=s,
                                 W=initialization(cfg.NONLINEARITY),
                                 nonlinearity=None,
                                 b=None)

        # Optional pooling layer
        if cfg.MAX_POOLING and stride > 1:
            shortcut = l.MaxPool2DLayer(shortcut, pool_size=stride)
    else:
        shortcut = net_sc

    # Merge Layer
    out = l.ElemwiseSumLayer([net, shortcut])

    return out
Exemple #24
0
def make_model():
    image = ll.InputLayer((BS, CH, IH, IW), name='step1.image')

    h_read_init = ll.InputLayer(
        (HS, ),
        lasagne.utils.create_param(li.Uniform(), (HS, ),
                                   name='step1.tensor.h_read_init'),
        name='step1.h_read_init')
    h_read_init.add_param(h_read_init.input_var, (HS, ))

    h_write_init = ll.InputLayer(
        (HS, ),
        lasagne.utils.create_param(li.Uniform(), (HS, ),
                                   name='step1.tensor.h_write_init'),
        name='step1.h_write_init')
    h_write_init.add_param(h_write_init.input_var, (HS, ))

    h_read = ll.ExpressionLayer(h_read_init,
                                lambda t: T.tile(T.reshape(t, (1, HS)),
                                                 (BS, 1)), (BS, HS),
                                name='step1.h_read')

    h_write = ll.ExpressionLayer(h_write_init,
                                 lambda t: T.tile(T.reshape(t, (1, HS)),
                                                  (BS, 1)), (BS, HS),
                                 name='step1.h_write')

    canvas = ll.InputLayer(
        (BS, CH, IH, IW),
        lasagne.utils.create_param(li.Constant(0.0), (BS, CH, IH, IW),
                                   name='step1.tensor.canvas'),
        name='step1.canvas')

    image_prev = ll.NonlinearityLayer(canvas,
                                      ln.sigmoid,
                                      name='step1.image_prev')

    image_error = ll.ElemwiseSumLayer([image, image_prev],
                                      coeffs=[1, -1],
                                      name='step1.image_error')
    image_stack = ll.ConcatLayer([image, image_error],
                                 name='step1.image_stack')

    read_params = ll.DenseLayer(h_write,
                                6,
                                nonlinearity=None,
                                name='step1.read_params')
    read_window = advanced_layers.AttentionLayer([read_params, image_stack],
                                                 (WH, WW),
                                                 name='step1.read_window')

    read_flat = ll.FlattenLayer(read_window, name='step1.read_flat')
    read_code = ll.ConcatLayer([read_flat, h_write], name='step1.read_code')

    read_code_sequence = ll.ReshapeLayer(read_code,
                                         (BS, 1, read_code.output_shape[-1]),
                                         name='step1.read_code_sequence')

    read_rnn = ll.GRULayer(
        read_code_sequence,
        HS,
        only_return_final=True,
        hid_init=h_read,
        name='step1.read_rnn',
    )

    sample_mean = ll.DenseLayer(read_rnn,
                                ENC_NDIM,
                                nonlinearity=None,
                                name='step1.sample_mean')
    sample_logvar2 = ll.DenseLayer(read_rnn,
                                   ENC_NDIM,
                                   nonlinearity=None,
                                   name='step1.sample_logvar2')
    sample = advanced_layers.SamplingLayer([sample_mean, sample_logvar2],
                                           ENC_VAR,
                                           name='step1.sample')

    write_code = ll.DenseLayer(sample, HS, name='step1.write_code')
    write_code_sequence = ll.ReshapeLayer(write_code,
                                          (BS, 1, write_code.output_shape[-1]),
                                          name='step1.write_code_sequence')
    write_rnn = ll.GRULayer(
        write_code_sequence,
        HS,
        only_return_final=True,
        hid_init=h_write,
        name='step1.write_rnn',
    )
    write_window_flat = ll.DenseLayer(write_rnn,
                                      CH * WH * WW,
                                      name='step1.write_window_flat')
    write_window = ll.ReshapeLayer(write_window_flat, (BS, CH, WH, WW),
                                   name='step1.write_window')

    write_params = ll.DenseLayer(h_write,
                                 6,
                                 nonlinearity=None,
                                 name='step1.write_params')
    write_image = advanced_layers.AttentionLayer([write_params, write_window],
                                                 (IH, IW),
                                                 name='step1.write_image')
    canvas_next = ll.ElemwiseSumLayer([canvas, write_image],
                                      name='step1.canvas_next')

    def rename(name):
        if name is None:
            return None
        step, real_name = name.split('.', 1)
        step = int(step[4:])
        return 'step%d.%s' % (step + 1, real_name)

    for step in xrange(1, TIME_ROUNDS):
        sample_random_variable_next = sample.random_stream.normal(
            sample.input_shapes[0],
            std=sample.variation_coeff,
        )
        sample_random_variable_next.name = 'step%d.sample.random_variable' % \
            (step + 1)

        canvas, canvas_next = (canvas_next,
                               utils.modified_copy(
                                   canvas_next,
                                   modify={
                                       h_read:
                                       read_rnn,
                                       h_write:
                                       write_rnn,
                                       canvas:
                                       canvas_next,
                                       sample.random_stream:
                                       sample.random_stream,
                                       sample.random_variable:
                                       sample_random_variable_next,
                                   },
                                   rename=rename,
                               ))

        h_read = read_rnn
        h_write = write_rnn
        read_rnn = utils.layer_by_name(canvas_next,
                                       'step%d.read_rnn' % (step + 1))
        write_rnn = utils.layer_by_name(canvas_next,
                                        'step%d.write_rnn' % (step + 1))
        sample = utils.layer_by_name(canvas_next, 'step%d.sample' % (step + 1))

    output = ll.NonlinearityLayer(canvas_next, ln.sigmoid, name='output')

    return output
Exemple #25
0
def resblock(net_in, filters, kernel_size, stride=1, preactivated=True, block_id=1, name=''):

    # Show input shape
    #log.p(("\t\t" + name + " IN SHAPE:", l.get_output_shape(net_in)), new_line=False)

    # Pre-activation
    if block_id > 1:
        net_pre = l.NonlinearityLayer(net_in, nonlinearity=nl.rectify)
    else:
        net_pre = net_in

    # Pre-activated shortcut?
    if preactivated:
        net_in = net_pre

    # Bottleneck Convolution
    if stride > 1:
        net_pre = l.batch_norm(l.Conv2DLayer(net_pre,
                                            num_filters=l.get_output_shape(net_pre)[1],
                                            filter_size=1,
                                            pad='same',
                                            stride=1,
                                            nonlinearity=nl.rectify))
    
    # First Convolution     
    net = l.batch_norm(l.Conv2DLayer(net_pre,
                                   num_filters=l.get_output_shape(net_pre)[1],
                                   filter_size=kernel_size,
                                   pad='same',
                                   stride=1,
                                   nonlinearity=nl.rectify))

    # Pooling layer
    if stride > 1:
        net = l.MaxPool2DLayer(net, pool_size=(stride, stride))

    # Dropout Layer
    net = l.DropoutLayer(net)        

    # Second Convolution
    net = l.batch_norm(l.Conv2DLayer(net,
                        num_filters=filters,
                        filter_size=kernel_size,
                        pad='same',
                        stride=1,
                        nonlinearity=None))

    # Shortcut Layer
    if not l.get_output_shape(net) == l.get_output_shape(net_in):

        # Average pooling
        shortcut = l.Pool2DLayer(net_in, pool_size=(stride, stride), stride=stride, mode='average_exc_pad')

        # Shortcut convolution
        shortcut = l.batch_norm(l.Conv2DLayer(shortcut,
                                 num_filters=filters,
                                 filter_size=1,
                                 pad='same',
                                 stride=1,
                                 nonlinearity=None))        
        
    else:

        # Shortcut = input
        shortcut = net_in
    
    # Merge Layer
    out = l.ElemwiseSumLayer([net, shortcut])

    # Show output shape
    #log.p(("OUT SHAPE:", l.get_output_shape(out), "LAYER:", len(l.get_all_layers(out)) - 1))

    return out
Exemple #26
0
    def get_actor(self, avg=False):
        suf = '_avg' if avg else ''
        iw = L.InputLayer(shape=(None, self.args.sw))  # (100, 24)
        ew = L.EmbeddingLayer(
            iw,
            self.args.vw,
            self.args.nw,
            name='ew' + suf,
            W=HeNormal() if not avg else Constant())  # (100, 24, 256)
        ew.params[ew.W].remove('regularizable')
        if 'w' in self.args.freeze:
            ew.params[ew.W].remove('trainable')
        # for access from outside
        if not avg:
            self.Ew = ew.W

        # char embedding with CNN/LSTM
        ic = L.InputLayer(shape=(None, self.args.sw,
                                 self.args.max_len))  # (100, 24, 32)
        ec = self.get_char2word(ic, avg)  # (100, 24, 256)

        it = L.InputLayer(shape=(None, self.args.st))
        et = L.EmbeddingLayer(it,
                              self.args.vt,
                              self.args.nt,
                              name='et' + suf,
                              W=HeNormal() if not avg else Constant())
        et.params[et.W].remove('regularizable')

        il = L.InputLayer(shape=(None, self.args.sl))
        el = L.EmbeddingLayer(il,
                              self.args.vl,
                              self.args.nl,
                              name='el' + suf,
                              W=HeNormal() if not avg else Constant())
        el.params[el.W].remove('regularizable')

        to_concat = []
        if self.args.type == 'word':
            to_concat.append(ew)
        elif self.args.type == 'char':
            to_concat.append(ec)
        elif self.args.type == 'both':
            to_concat += [ew, ec]
        elif self.args.type == 'mix':
            to_concat.append(L.ElemwiseSumLayer([ew, ec]))

        if not self.args.untagged:
            to_concat.append(et)
        if not self.args.unlabeled:
            to_concat.append(el)

        x = L.concat(to_concat, axis=2)  # (100, 24, 64+16+16)

        # additional:
        # get the more compact representation of each token by its word, tag and label,
        # before putting into the hidden layer
        if self.args.squeeze:
            x = L.DenseLayer(
                x,
                num_units=self.args.squeeze,
                name='h0' + suf,
                num_leading_axes=2,
                W=HeNormal('relu') if not avg else Constant())  # (100, 24, 64)

        h1 = L.DenseLayer(
            x,
            num_units=self.args.nh1,
            name='h1' + suf,
            W=HeNormal('relu') if not avg else Constant())  # (100, 512)
        h1 = L.dropout(h1, self.args.p1)
        h2 = L.DenseLayer(
            h1,
            num_units=self.args.nh2,
            name='h2' + suf,
            W=HeNormal('relu') if not avg else Constant())  # (100, 256)
        h2 = L.dropout(h2, self.args.p2)
        h3 = L.DenseLayer(h2,
                          num_units=self.args.nh3,
                          name='h3' + suf,
                          W=HeNormal() if not avg else Constant(),
                          nonlinearity=softmax)  # (100, 125) num of actions

        return iw, ic, it, il, h3
Exemple #27
0
def build_segmenter_jet_2():
    # downsample down to a small region, then upsample all the way back up, using jet architecture
    # recreate basic FCN-8s structure (though more aptly 1s here since we upsample back to the original input size)
    # this jet will have another conv layer in the final upsample
    inp = ll.InputLayer(shape=(None, 1, None, None), name='input')
    conv1 = ll.Conv2DLayer(inp,
                           num_filters=32,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv1_1')
    bn1 = ll.BatchNormLayer(conv1, name='bn1')
    conv2 = ll.Conv2DLayer(bn1,
                           num_filters=64,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv1_2')
    bn2 = ll.BatchNormLayer(conv2, name='bn2')
    mp1 = ll.MaxPool2DLayer(bn2, 2, stride=2, name='mp1')  # 2x downsample
    conv3 = ll.Conv2DLayer(mp1,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv2_1')
    bn3 = ll.BatchNormLayer(conv3, name='bn3')
    conv4 = ll.Conv2DLayer(bn3,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv2_2')
    bn4 = ll.BatchNormLayer(conv4, name='bn4')
    mp2 = ll.MaxPool2DLayer(bn4, 2, stride=2, name='mp2')  # 4x downsample
    conv5 = ll.Conv2DLayer(mp2,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv3_1')
    bn5 = ll.BatchNormLayer(conv5, name='bn5')
    conv6 = ll.Conv2DLayer(bn5,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv3_2')
    bn6 = ll.BatchNormLayer(conv6, name='bn6')
    mp3 = ll.MaxPool2DLayer(bn6, 2, stride=2, name='mp3')  # 8x downsample
    conv7 = ll.Conv2DLayer(mp3,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv4_1')
    bn7 = ll.BatchNormLayer(conv7, name='bn7')
    conv8 = ll.Conv2DLayer(bn7,
                           num_filters=128,
                           filter_size=(3, 3),
                           pad='same',
                           W=Orthogonal(),
                           nonlinearity=rectify,
                           name='conv4_2')
    bn8 = ll.BatchNormLayer(conv8, name='bn8')
    # f 68 s 8
    # now start the upsample
    ## FIRST UPSAMPLE PREDICTION (akin to FCN-32s)
    conv_f8 = ll.Conv2DLayer(bn8,
                             num_filters=2,
                             filter_size=(3, 3),
                             pad='same',
                             W=Orthogonal(),
                             nonlinearity=linear,
                             name='conv_8xpred')
    softmax_8 = Softmax4D(conv_f8, name='4dsoftmax_8x')
    up8 = ll.Upscale2DLayer(
        softmax_8, 8,
        name='upsample_8x')  # take loss here, 8x upsample from 8x downsample

    ## COMBINE BY UPSAMPLING SOFTMAX 8 AND PRED ON CONV 6
    softmax_4up = ll.Upscale2DLayer(softmax_8, 2,
                                    name='upsample_4x_pre')  # 4x downsample
    conv_f6 = ll.Conv2DLayer(bn6,
                             num_filters=2,
                             filter_size=(3, 3),
                             pad='same',
                             W=Orthogonal(),
                             nonlinearity=linear,
                             name='conv_4xpred')
    softmax_4 = Softmax4D(conv_f6, name='4dsoftmax_4x')  # 4x downsample
    softmax_4_merge = ll.ElemwiseSumLayer([softmax_4, softmax_4up],
                                          coeffs=0.5,
                                          name='softmax_4_merge')

    up4 = ll.Upscale2DLayer(
        softmax_4_merge, 4,
        name='upsample_4x')  # take loss here, 4x upsample from 4x downsample

    ## COMBINE BY UPSAMPLING SOFTMAX_4_MERGE AND CONV 4
    softmax_2up = ll.Upscale2DLayer(softmax_4_merge, 2,
                                    name='upsample_2x_pre')  # 2x downsample
    conv_f4 = ll.Conv2DLayer(bn4,
                             num_filters=2,
                             filter_size=(3, 3),
                             pad='same',
                             W=Orthogonal(),
                             nonlinearity=linear,
                             name='conv_2xpred')

    softmax_2 = Softmax4D(conv_f4, name='4dsoftmax_2x')
    softmax_2_merge = ll.ElemwiseSumLayer([softmax_2, softmax_2up],
                                          coeffs=0.5,
                                          name='softmax_2_merge')

    up2 = ll.Upscale2DLayer(
        softmax_2_merge, 2, name='upsample_2x'
    )  # final loss here, 2x upsample from a 2x downsample

    ## COMBINE BY UPSAMPLING SOFTMAX_2_MERGE AND CONV 2
    softmax_1up = ll.Upscale2DLayer(
        softmax_2_merge, 2,
        name='upsample_1x_pre')  # 1x downsample (i.e. no downsample)
    conv_f2 = ll.Conv2DLayer(bn2,
                             num_filters=2,
                             filter_size=(3, 3),
                             pad='same',
                             W=Orthogonal(),
                             nonlinearity=linear,
                             name='conv_1xpred')

    softmax_1 = Softmax4D(conv_f2, name='4dsoftmax_1x')
    softmax_1_merge = ll.ElemwiseSumLayer([softmax_1, softmax_1up],
                                          coeffs=0.5,
                                          name='softmax_1_merge')

    # this is where up1 would go but that doesn't make any sense
    return [up8, up4, up2, softmax_1_merge]
Exemple #28
0
def run_experiment(args):
    import os
    # set environment variables for theano
    os.environ['THEANO_FLAGS'] = "lib.cnmem=" + str(
        args.mem) + ",device=gpu" + str(args.gpu)

    import inspect
    import shutil
    import time
    import logging
    import six
    import collections
    import numpy as np
    import scipy
    import theano
    import theano.tensor as T
    import lasagne
    import lasagne.layers as ll
    import lasagne.nonlinearities as ln
    import parmesan

    import layers
    import utils
    import cfdataset

    #----------------------------------------------------------------
    # Arguments and Settings
    floatX = theano.config.floatX
    logger = logging.getLogger()
    np.random.seed(args.seed)

    # copy file for reproducibility
    dirname = utils.setup_logging(args.message, args.loglv)
    script_src = os.path.abspath(inspect.getfile(inspect.currentframe()))
    script_dst = os.path.join(dirname, os.path.split(script_src)[1])
    shutil.copyfile(script_src, script_dst)

    # print arguments
    args_dict = collections.OrderedDict(sorted(vars(args).items()))
    for k, v in six.iteritems(args_dict):
        logger.info("  %20s: %s" % (k, v))

    # get arguments
    D_u, D_v = args.D_u, args.D_v
    J_u, J_v = args.J_u, args.J_v
    lr = args.lr
    alpha = args.alpha
    weight_decay = args.weight_decay
    n_step = args.n_step
    lookahead = args.lookahead
    max_epoch = args.max_epoch
    batch_size_u, batch_size_v = args.batch_size_u, args.batch_size_v
    share_params = not args.no_share_params
    nonlin_enc = layers.get_nonlin(args.nonlin_enc)
    nonlin_dec = layers.get_nonlin(args.nonlin_dec)

    #----------------------------------------------------------------
    # Dataset
    dataset = cfdataset.CFdata(name=args.dataset, split=args.split)

    N_stars = dataset.N_stars

    N_u, N_v = dataset.N_users, dataset.N_items
    R_train = dataset.R_train  # int (3 * N_train_rating)
    R_test = dataset.R_test  # int (3 * N_test_rating)

    n_valid_split = np.int(dataset.N_train_rating / 20)
    train_valid_perm = np.random.permutation(dataset.N_train_rating)
    R_valid = R_train[:, train_valid_perm[:n_valid_split]]
    R_train = R_train[:, train_valid_perm[n_valid_split:]]

    R_matrix = dict()
    R_matrix['train'] = scipy.sparse.coo_matrix(
        (R_train[2], (R_train[0], R_train[1])),
        shape=(N_u, N_v)).toarray().astype('int32')
    R_matrix['valid'] = scipy.sparse.coo_matrix(
        (R_valid[2], (R_valid[0], R_valid[1])),
        shape=(N_u, N_v)).toarray().astype('int32')
    R_matrix['test'] = scipy.sparse.coo_matrix(
        (R_test[2], (R_test[0], R_test[1])),
        shape=(N_u, N_v)).toarray().astype('int32')
    N_rating = dict()
    N_rating['train'] = dataset.N_train_rating - n_valid_split
    N_rating['valid'] = n_valid_split
    N_rating['test'] = dataset.N_test_rating

    logger.info("%d users, %d items" % (N_u, N_v))
    logger.info("%d training ratings, %d validation ratings, %d test ratings" %
                (N_rating['train'], N_rating['valid'], N_rating['test']))
    logger.info("%d-star scale" % N_stars)

    #----------------------------------------------------------------
    # numpy variables
    # encoded vectors
    np_enc_u_h = np.zeros((N_u, D_u), dtype=floatX)
    np_enc_v_h = np.zeros((N_v, D_v), dtype=floatX)

    #----------------------------------------------------------------
    # Symbolic variables
    sym_lr = T.fscalar('lr')
    sym_Ru = T.imatrix('Ru')
    sym_Rv = T.imatrix('Rv')
    sym_dr_Ru = T.fscalar('dr_Ru')
    sym_dr_Rv = T.fscalar('dr_Rv')
    sym_uid_origin = T.ivector('uid_origin')
    sym_uid_minibatch = T.ivector('uid_minibatch')
    sym_vid_origin = T.ivector('vid_origin')
    sym_vid_minibatch = T.ivector('vid_minibatch')
    sym_R_minibatch = T.ivector('R_minibatch')

    #----------------------------------------------------------------
    # Model setup (training model)
    logger.info("Setting up model ...")

    # Input layers
    l_in_Ru = ll.InputLayer((None, N_v), input_var=sym_Ru, name='l_in_Ru')
    l_in_Rv = ll.InputLayer((None, N_u), input_var=sym_Rv, name='l_in_Rv')
    l_in_uid_origin = ll.InputLayer((None, ),
                                    input_var=sym_uid_origin,
                                    name='l_in_uid_origin')
    l_in_vid_origin = ll.InputLayer((None, ),
                                    input_var=sym_vid_origin,
                                    name='l_in_vid_origin')
    l_in_uid_minibatch = ll.InputLayer((None, ),
                                       input_var=sym_uid_minibatch,
                                       name='l_in_uid_minibatch')
    l_in_vid_minibatch = ll.InputLayer((None, ),
                                       input_var=sym_vid_minibatch,
                                       name='l_in_vid_minibatch')

    # Dropout layers
    l_in_Ru = ll.DropoutLayer(l_in_Ru,
                              p=sym_dr_Ru,
                              rescale=False,
                              name='Dropout-l_in_Ru')
    l_in_Rv = ll.DropoutLayer(l_in_Rv,
                              p=sym_dr_Rv,
                              rescale=False,
                              name='Dropout-l_in_Rv')

    # User encoder model h(Ru)
    l_enc_u_h = layers.OneHotEncodeLayer(l_in_Ru,
                                         num_units=D_u,
                                         rank=J_u,
                                         num_hots=N_stars,
                                         share_params=share_params,
                                         nonlinearity=None,
                                         name='Dense-l_enc_u_h')
    l_enc_u_h = ll.NonlinearityLayer(l_enc_u_h,
                                     nonlinearity=nonlin_enc,
                                     name='Nonlin-l_enc_u_h')

    # Item encoder model h(Rv)
    l_enc_v_h = layers.OneHotEncodeLayer(l_in_Rv,
                                         num_units=D_v,
                                         rank=J_v,
                                         num_hots=N_stars,
                                         share_params=share_params,
                                         nonlinearity=None,
                                         name='Dense-l_enc_v_h')
    l_enc_v_h = ll.NonlinearityLayer(l_enc_v_h,
                                     nonlinearity=nonlin_enc,
                                     name='Nonlin-l_enc_v_h')

    # User decoder model s(h(Ru))
    l_dec_u_s = layers.OneHotDecodeLayer(
        [l_enc_u_h, l_in_vid_origin, l_in_uid_minibatch],
        num_units=N_v,
        rank=J_u,
        num_hots=N_stars,
        share_params=share_params,
        nonlinearity=None,
        name='Dense-l_dec_u_s')

    # Item decoder model s(h(Rv))
    l_dec_v_s = layers.OneHotDecodeLayer(
        [l_enc_v_h, l_in_uid_origin, l_in_vid_minibatch],
        num_units=N_u,
        rank=J_v,
        num_hots=N_stars,
        share_params=share_params,
        nonlinearity=None,
        name='Dense-l_dec_v_s')

    # Likelihood model p(R)
    l_uv_s = ll.ElemwiseSumLayer([l_dec_u_s, l_dec_v_s], name='l_uv_s')
    l_r = ll.NonlinearityLayer(l_uv_s, nonlinearity=ln.softmax, name='l_r')
    l_r_ordinal = ll.NonlinearityLayer(l_uv_s,
                                       nonlinearity=layers.log_ordinal_softmax,
                                       name='l_r_ordinal')

    #----------------------------------------------------------------
    # Likelihood and RMSE
    # training
    p_r_train, log_p_r_ordinal_train = ll.get_output([l_r, l_r_ordinal],
                                                     deterministic=False)

    log_p_r = T.mean(
        parmesan.distributions.log_multinomial(sym_R_minibatch - 1, p_r_train))
    R_minibatch_one_hot = lasagne.utils.one_hot(sym_R_minibatch,
                                                m=N_stars + 1)[:, 1:]
    log_p_r_ordinal = T.mean(
        T.sum(log_p_r_ordinal_train * R_minibatch_one_hot, axis=1))
    regularization = lasagne.regularization.regularize_network_params(
        [l_r], lasagne.regularization.l2)
    cost_function = -(
        1.0 - alpha
    ) * log_p_r - alpha * log_p_r_ordinal + weight_decay * regularization

    predicts_train = T.sum(p_r_train *
                           T.shape_padleft(T.arange(1, 1 + N_stars)),
                           axis=1)
    SE_train = T.sum(T.sqr(T.cast(sym_R_minibatch, floatX) - predicts_train))

    # test
    sym_enc_u_h = T.fmatrix('enc_u_h')
    sym_enc_v_h = T.fmatrix('enc_v_h')
    enc_u_h_out, enc_v_h_out = ll.get_output([l_enc_u_h, l_enc_v_h],
                                             deterministic=True)
    p_r_test, = ll.get_output([l_r],
                              inputs={
                                  l_enc_u_h: sym_enc_u_h,
                                  l_enc_v_h: sym_enc_v_h
                              },
                              deterministic=True)

    predicts_test = T.sum(p_r_test * T.shape_padleft(T.arange(1, 1 + N_stars)),
                          axis=1)
    SE_test = T.sum(T.sqr(T.cast(sym_R_minibatch, floatX) - predicts_test))

    #----------------------------------------------------------------
    # Gradients
    clip_grad = 1
    max_norm = 5

    params = ll.get_all_params([
        l_r,
    ], trainable=True)
    for p in params:
        logger.debug("%s: %s" % (p, p.get_value().shape))

    grads = T.grad(cost_function, params)
    mgrads = lasagne.updates.total_norm_constraint(grads, max_norm=max_norm)
    cgrads = [T.clip(g, -clip_grad, clip_grad) for g in mgrads]

    updates = lasagne.updates.adam(cgrads,
                                   params,
                                   beta1=0.9,
                                   beta2=0.999,
                                   epsilon=1e-4,
                                   learning_rate=sym_lr)

    #----------------------------------------------------------------
    # Compile
    # training function
    logger.info("Compiling train_model ...")
    train_model = theano.function(
        inputs=[
            sym_lr, sym_uid_origin, sym_uid_minibatch, sym_vid_origin,
            sym_vid_minibatch, sym_R_minibatch, sym_Ru, sym_Rv, sym_dr_Ru,
            sym_dr_Rv
        ],
        outputs=[log_p_r, SE_train],
        updates=updates,
    )

    # encoders
    logger.info("Compiling encode_model ...")
    u_encode_model = theano.function(inputs=[sym_Ru], outputs=enc_u_h_out)
    v_encode_model = theano.function(inputs=[sym_Rv], outputs=enc_v_h_out)

    # test function
    logger.info("Compiling test_model ...")
    test_model = theano.function(
        inputs=[
            sym_uid_origin, sym_uid_minibatch, sym_vid_origin,
            sym_vid_minibatch, sym_R_minibatch, sym_enc_u_h, sym_enc_v_h
        ],
        outputs=[SE_test],
    )

    #----------------------------------------------------------------
    # Predict function
    def predict(which_set='test'):
        assert which_set in ['valid', 'test']
        if which_set == 'valid':
            R_matrix_cond = R_matrix['train']
        else:
            R_matrix_cond = R_matrix['train'] + R_matrix['valid']

        # test statistics
        SE_epoch = 0
        n_pred_epoch = 0

        # preconpute hidden representation
        u_end = 0
        while u_end < N_u:
            u_start, u_end = u_end, min(u_end + batch_size_u, N_u)
            # create user mini-batch
            u_batch_ids = np.arange(u_start, u_end).astype('int32')
            # create conditionals
            Ru_minibatch = R_matrix_cond[u_batch_ids, :]
            # encode
            np_enc_u_h[u_batch_ids] = u_encode_model(Ru_minibatch)

        v_end = 0
        while v_end < N_v:
            v_start, v_end = v_end, min(v_end + batch_size_v, N_v)
            # create item mini-batch
            v_batch_ids = np.arange(v_start, v_end).astype('int32')
            # create conditionals
            Rv_minibatch = R_matrix_cond[:, v_batch_ids].T
            # encode
            np_enc_v_h[v_batch_ids] = v_encode_model(Rv_minibatch)

        # loop mini-batches
        u_end = 0
        while u_end < N_u:
            u_start, u_end = u_end, min(u_end + batch_size_u, N_u)
            v_end = 0
            while v_end < N_v:
                v_start, v_end = v_end, min(v_end + batch_size_v, N_v)
                # create user mini-batch and item mini-batch
                u_batch_ids = np.arange(u_start, u_end).astype('int32')
                v_batch_ids = np.arange(v_start, v_end).astype('int32')

                # get encoded vectors
                Ru_encoded = np_enc_u_h[u_batch_ids, :]
                Rv_encoded = np_enc_v_h[v_batch_ids, :]

                # create test samples mini-batch
                R_matrix_minibatch = R_matrix[which_set][np.ix_(
                    u_batch_ids, v_batch_ids)]
                R_matrix_minibatch_sparse = scipy.sparse.coo_matrix(
                    R_matrix_minibatch)

                # prepare user and item IDs needed
                uid_minibatch = R_matrix_minibatch_sparse.row
                vid_minibatch = R_matrix_minibatch_sparse.col
                R_minibatch = R_matrix_minibatch_sparse.data

                n_pred_step = R_minibatch.shape[0]
                if n_pred_step == 0:
                    continue

                uid_origin = u_batch_ids[uid_minibatch]
                vid_origin = v_batch_ids[vid_minibatch]

                SE_step, = test_model(uid_origin, uid_minibatch, vid_origin,
                                      vid_minibatch, R_minibatch, Ru_encoded,
                                      Rv_encoded)

                SE_epoch += SE_step
                n_pred_epoch += n_pred_step

        # print info after test finished
        assert n_pred_epoch == N_rating[which_set]
        RMSE_epoch = np.sqrt(SE_epoch / n_pred_epoch) / (N_stars / 5.0)
        logger.critical("Estimated  %s  RMSE = %f (%d %s ratings)" %
                        (which_set, RMSE_epoch, n_pred_epoch, which_set))
        return RMSE_epoch

#----------------------------------------------------------------
# Training

    best_valid_result = np.inf
    best_model = None
    n_epocs_without_improvement = 0

    logger.warning("Training started.")
    # loop epoch
    for epoch in range(1, 1 + max_epoch):
        epoch_start_time = time.time()

        # training statistics
        LL_epoch_train, SE_epoch_train = 0, 0
        n_pred_epoch_train = 0

        # loop mini-batches
        for step in range(n_step):
            # sample i and j
            #i = np.random.randint(N_u)
            #j = np.random.randint(N_v)
            threshold_u = np.int(0.2 * N_u)
            threshold_v = np.int(0.2 * N_v)
            i = np.random.randint(low=threshold_u,
                                  high=N_u - min(threshold_u, batch_size_u))
            j = np.random.randint(low=threshold_v,
                                  high=N_v - min(threshold_v, batch_size_v))

            # calculate mini-batch size
            Bi = min(batch_size_u, N_u - i)
            Bj = min(batch_size_v, N_v - j)

            # sample user mini-batch and item mini-batch
            u_batch_ids_train = np.random.choice(N_u, Bi,
                                                 replace=False).astype('int32')
            v_batch_ids_train = np.random.choice(N_v, Bj,
                                                 replace=False).astype('int32')

            # create conditionals
            Ru_minibatch_train = R_matrix['train'][u_batch_ids_train, :]
            Rv_minibatch_train = R_matrix['train'][:, v_batch_ids_train].T
            Ru_minibatch_train[:, v_batch_ids_train] = 0
            Rv_minibatch_train[:, u_batch_ids_train] = 0

            # calculate dropout rate
            dr_Ru = 1.0 - 1.0 * j / (N_v - Bj)
            dr_Rv = 1.0 - 1.0 * i / (N_u - Bi)

            # create training samples mini-batch
            R_matrix_minibatch_train = R_matrix['train'][np.ix_(
                u_batch_ids_train, v_batch_ids_train)]
            R_matrix_minibatch_sparse_train = scipy.sparse.coo_matrix(
                R_matrix_minibatch_train)

            # prepare user and item IDs needed
            uid_minibatch_train = R_matrix_minibatch_sparse_train.row
            vid_minibatch_train = R_matrix_minibatch_sparse_train.col
            R_minibatch_train = R_matrix_minibatch_sparse_train.data

            n_pred_step_train = R_minibatch_train.shape[0]
            if n_pred_step_train == 0:
                logger.warning(
                    'no training samples in current mini-batch.(i=%d, j=%d)' %
                    (i, j))
                continue

            uid_origin_train = u_batch_ids_train[uid_minibatch_train]
            vid_origin_train = v_batch_ids_train[vid_minibatch_train]

            # update parameters and calculate likelihood and RMSE
            LL_step_train, SE_step_train = train_model(
                lr, uid_origin_train, uid_minibatch_train, vid_origin_train,
                vid_minibatch_train, R_minibatch_train, Ru_minibatch_train,
                Rv_minibatch_train, dr_Ru, dr_Rv)
            LL_epoch_train += LL_step_train * n_pred_step_train
            SE_epoch_train += SE_step_train
            n_pred_epoch_train += n_pred_step_train

        # print info after epoch finished
        LL_epoch_train /= n_pred_epoch_train
        RMSE_epoch_train = np.sqrt(
            SE_epoch_train / n_pred_epoch_train) / (N_stars / 5.0)

        epoch_end_time = time.time()
        logger.info(
            "Epoch %d, Estimated training RMSE = %f, LL = %f (%d training ratings). Elapsed time %fs."
            % (epoch, RMSE_epoch_train, LL_epoch_train, n_pred_epoch_train,
               epoch_end_time - epoch_start_time))

        # validation
        RMSE_valid = predict('valid')

        # termination
        if RMSE_valid < best_valid_result:
            n_epocs_without_improvement = 0
            best_valid_result = RMSE_valid
            best_model = ll.get_all_param_values([
                l_r,
            ], trainable=True)
            logger.debug("New best model found!")
        else:
            n_epocs_without_improvement += 1
            if n_epocs_without_improvement >= lookahead:
                ll.set_all_param_values([
                    l_r,
                ], best_model, trainable=True)
                if lr > 1e-5:
                    n_epocs_without_improvement = 0
                    lr /= 4
                    logger.warning("Learning rate = %f now." % lr)
                else:
                    logger.warning("Training finished.")
                    break

#----------------------------------------------------------------
# Test
    RMSE_test = predict('test')

    #----------------------------------------------------------------
    # Summarization
    for k, v in six.iteritems(args_dict):
        logger.info("  %20s: %s" % (k, v))
def build_network_from_ae(classn):
    input_var = T.tensor4('input_var')

    layer = layers.InputLayer(shape=(None, 3, PS, PS), input_var=input_var)
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           100,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           120,
                           filter_size=(5, 5),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = layers.Pool2DLayer(layer,
                               pool_size=(2, 2),
                               stride=2,
                               mode='average_inc_pad')
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           240,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           320,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    layer = layers.Pool2DLayer(layer,
                               pool_size=(2, 2),
                               stride=2,
                               mode='average_inc_pad')
    layer = batch_norm(
        layers.Conv2DLayer(layer,
                           640,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    prely = batch_norm(
        layers.Conv2DLayer(layer,
                           1024,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))

    featm = batch_norm(
        layers.Conv2DLayer(prely,
                           640,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    feat_map = batch_norm(
        layers.Conv2DLayer(featm,
                           100,
                           filter_size=(1, 1),
                           nonlinearity=rectify,
                           name="feat_map"))
    maskm = batch_norm(
        layers.Conv2DLayer(prely,
                           100,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    mask_rep = batch_norm(layers.Conv2DLayer(maskm,
                                             1,
                                             filter_size=(1, 1),
                                             nonlinearity=None),
                          beta=None,
                          gamma=None)
    mask_map = SoftThresPerc(mask_rep,
                             perc=97.0,
                             alpha=0.1,
                             beta=init.Constant(0.5),
                             tight=100.0,
                             name="mask_map")
    enlyr = ChInnerProdMerge(feat_map, mask_map, name="encoder")

    layer = batch_norm(
        layers.Deconv2DLayer(enlyr,
                             1024,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             640,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             640,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             320,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             320,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             240,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             120,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = batch_norm(
        layers.Deconv2DLayer(layer,
                             100,
                             filter_size=(5, 5),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    layer = layers.Deconv2DLayer(layer,
                                 3,
                                 filter_size=(1, 1),
                                 stride=1,
                                 crop='same',
                                 nonlinearity=identity)

    glblf = batch_norm(
        layers.Conv2DLayer(prely,
                           128,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    glblf = layers.Pool2DLayer(glblf,
                               pool_size=(5, 5),
                               stride=5,
                               mode='average_inc_pad')
    glblf = batch_norm(
        layers.Conv2DLayer(glblf,
                           64,
                           filter_size=(3, 3),
                           stride=1,
                           pad='same',
                           nonlinearity=leaky_rectify))
    gllyr = batch_norm(layers.Conv2DLayer(glblf,
                                          5,
                                          filter_size=(1, 1),
                                          nonlinearity=rectify),
                       name="global_feature")

    glblf = batch_norm(
        layers.Deconv2DLayer(gllyr,
                             256,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(9, 9),
                             stride=5,
                             crop=(2, 2),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             128,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             64,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(4, 4),
                             stride=2,
                             crop=(1, 1),
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = batch_norm(
        layers.Deconv2DLayer(glblf,
                             32,
                             filter_size=(3, 3),
                             stride=1,
                             crop='same',
                             nonlinearity=leaky_rectify))
    glblf = layers.Deconv2DLayer(glblf,
                                 3,
                                 filter_size=(1, 1),
                                 stride=1,
                                 crop='same',
                                 nonlinearity=identity)

    layer = layers.ElemwiseSumLayer([layer, glblf])

    network = ReshapeLayer(layer, ([0], -1))

    mask_map.beta.set_value(np.float32(0.9 * mask_map.beta.get_value()))
    old_params = layers.get_all_params(network, trainable=True)

    # Adding more layers
    aug_var = T.matrix('aug_var')
    target_var = T.imatrix('targets')
    add_a = batch_norm(
        layers.Conv2DLayer(enlyr,
                           320,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    add_b = batch_norm(
        layers.Conv2DLayer(add_a,
                           320,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    add_c = batch_norm(
        layers.Conv2DLayer(add_b,
                           320,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    add_d = batch_norm(
        layers.Conv2DLayer(add_c,
                           320,
                           filter_size=(1, 1),
                           nonlinearity=leaky_rectify))
    add_0 = layers.Pool2DLayer(add_d,
                               pool_size=(25, 25),
                               stride=25,
                               mode='average_inc_pad')
    add_1 = batch_norm(
        layers.DenseLayer(add_0, 100, nonlinearity=leaky_rectify))

    add_2 = batch_norm(
        layers.DenseLayer(gllyr, 320, nonlinearity=leaky_rectify))
    add_3 = batch_norm(
        layers.DenseLayer(add_2, 320, nonlinearity=leaky_rectify))
    add_4 = batch_norm(
        layers.DenseLayer(add_3, 100, nonlinearity=leaky_rectify))

    aug_layer = layers.InputLayer(shape=(None, aug_fea_n), input_var=aug_var)

    cat_layer = lasagne.layers.ConcatLayer([add_1, add_4, aug_layer], axis=1)

    hidden_layer = layers.DenseLayer(cat_layer, 80, nonlinearity=leaky_rectify)
    network = layers.DenseLayer(hidden_layer, classn, nonlinearity=sigmoid)

    all_params = layers.get_all_params(network, trainable=True)
    new_params = [x for x in all_params if x not in old_params]

    return network, new_params, input_var, aug_var, target_var
Exemple #30
0
    def build_network(self, K, vocab_size, W_init):

        l_docin = L.InputLayer(shape=(None,None,1), input_var=self.inps[0])
        l_doctokin = L.InputLayer(shape=(None,None), input_var=self.inps[1])
        l_qin = L.InputLayer(shape=(None,None,1), input_var=self.inps[2])
        l_qtokin = L.InputLayer(shape=(None,None), input_var=self.inps[3])
        l_docmask = L.InputLayer(shape=(None,None), input_var=self.inps[6])
        l_qmask = L.InputLayer(shape=(None,None), input_var=self.inps[7])
        l_tokin = L.InputLayer(shape=(None,MAX_WORD_LEN), input_var=self.inps[8])
        l_tokmask = L.InputLayer(shape=(None,MAX_WORD_LEN), input_var=self.inps[9])
        l_featin = L.InputLayer(shape=(None,None), input_var=self.inps[11])

        doc_shp = self.inps[1].shape
        qry_shp = self.inps[3].shape

        l_docembed = L.EmbeddingLayer(l_docin, input_size=vocab_size, 
                output_size=self.embed_dim, W=W_init) # B x N x 1 x DE
        l_doce = L.ReshapeLayer(l_docembed, 
                (doc_shp[0],doc_shp[1],self.embed_dim)) # B x N x DE
        l_qemb = L.EmbeddingLayer(l_qin, input_size=vocab_size, 
                output_size=self.embed_dim, W=l_docembed.W)
        l_qembed = L.ReshapeLayer(l_qemb, 
                (qry_shp[0],qry_shp[1],self.embed_dim)) # B x N x DE
        l_fembed = L.EmbeddingLayer(l_featin, input_size=2, output_size=2) # B x N x 2

        if self.train_emb==0: 
            l_docembed.params[l_docembed.W].remove('trainable')
            l_qemb.params[l_qemb.W].remove('trainable')

        # char embeddings
        if self.use_chars:
            l_lookup = L.EmbeddingLayer(l_tokin, self.num_chars, self.char_dim) # T x L x D
            l_fgru = L.GRULayer(l_lookup, self.char_dim, grad_clipping=GRAD_CLIP, 
                    mask_input=l_tokmask, gradient_steps=GRAD_STEPS, precompute_input=True,
                    only_return_final=True)
            l_bgru = L.GRULayer(l_lookup, self.char_dim, grad_clipping=GRAD_CLIP, 
                    mask_input=l_tokmask, gradient_steps=GRAD_STEPS, precompute_input=True, 
                    backwards=True, only_return_final=True) # T x 2D
            l_fwdembed = L.DenseLayer(l_fgru, self.embed_dim/2, nonlinearity=None) # T x DE/2
            l_bckembed = L.DenseLayer(l_bgru, self.embed_dim/2, nonlinearity=None) # T x DE/2
            l_embed = L.ElemwiseSumLayer([l_fwdembed, l_bckembed], coeffs=1)
            l_docchar_embed = IndexLayer([l_doctokin, l_embed]) # B x N x DE/2
            l_qchar_embed = IndexLayer([l_qtokin, l_embed]) # B x Q x DE/2

            l_doce = L.ConcatLayer([l_doce, l_docchar_embed], axis=2)
            l_qembed = L.ConcatLayer([l_qembed, l_qchar_embed], axis=2)

        attentions = []
        if self.save_attn:
            l_m = PairwiseInteractionLayer([l_doce,l_qembed])
            attentions.append(L.get_output(l_m, deterministic=True))

        for i in range(K-1):
            l_fwd_doc_1 = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, 
                    mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True)
            l_bkd_doc_1 = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, 
                    mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \
                            backwards=True)

            l_doc_1 = L.concat([l_fwd_doc_1, l_bkd_doc_1], axis=2) # B x N x DE

            l_fwd_q_1 = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, 
                    mask_input=l_qmask, 
                    gradient_steps=GRAD_STEPS, precompute_input=True)
            l_bkd_q_1 = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, 
                    mask_input=l_qmask, 
                    gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True)

            l_q_c_1 = L.ConcatLayer([l_fwd_q_1, l_bkd_q_1], axis=2) # B x Q x DE

            l_m = PairwiseInteractionLayer([l_doc_1, l_q_c_1])
            l_doc_2_in = GatedAttentionLayer([l_doc_1, l_q_c_1, l_m], 
                    gating_fn=self.gating_fn, 
                    mask_input=self.inps[7])
            l_doce = L.dropout(l_doc_2_in, p=self.dropout) # B x N x DE
            if self.save_attn: 
                attentions.append(L.get_output(l_m, deterministic=True))

        if self.use_feat: l_doce = L.ConcatLayer([l_doce, l_fembed], axis=2) # B x N x DE+2

        # final layer
        l_fwd_doc = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, 
                mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True)
        l_bkd_doc = L.GRULayer(l_doce, self.nhidden, grad_clipping=GRAD_CLIP, 
                mask_input=l_docmask, gradient_steps=GRAD_STEPS, precompute_input=True, \
                        backwards=True)
        l_doc = L.concat([l_fwd_doc, l_bkd_doc], axis=2)

        l_fwd_q = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, 
                gradient_steps=GRAD_STEPS, precompute_input=True, only_return_final=False)
        l_bkd_q = L.GRULayer(l_qembed, self.nhidden, grad_clipping=GRAD_CLIP, mask_input=l_qmask, 
                gradient_steps=GRAD_STEPS, precompute_input=True, backwards=True, 
                only_return_final=False)
        l_q = L.ConcatLayer([l_fwd_q, l_bkd_q], axis=2) # B x Q x 2D

        if self.save_attn:
            l_m = PairwiseInteractionLayer([l_doc, l_q])
            attentions.append(L.get_output(l_m, deterministic=True))

        l_prob = AttentionSumLayer([l_doc,l_q], self.inps[4], self.inps[12], 
                mask_input=self.inps[10])
        final = L.get_output(l_prob)
        final_v = L.get_output(l_prob, deterministic=True)

        return final, final_v, l_prob, l_docembed.W, attentions