Esempio n. 1
0
    def __init__(self, nf, nout, kwargs):

        for pname in DNN.param_names:
            setattr(self, pname, kwargs[pname])

        if self.activation == "elu":
            nonlin = lambda x: T.switch(x >= 0, x, T.exp(x) - 1)
        else:
            self.activation = "rectify" if self.activation == "relu" else self.activation
            nonlin = getattr(lasagne.nonlinearities, self.activation)
        self.opt = getattr(lasagne.updates, self.opt)

        l_in = lasagne.layers.InputLayer(shape=(None, nf))
        cur_layer = batch_norm(l_in) if self.bnorm else l_in
        cur_layer = lasagne.layers.DropoutLayer(cur_layer, p=self.drates[0]) if self.drates[0] > 0 else cur_layer

        self.layers = [cur_layer]
        for n_hidden, drate in zip(self.n_hidden, self.drates[1:]):
            l_betw = lasagne.layers.DenseLayer(self.layers[-1], num_units=n_hidden, nonlinearity=nonlin)
            cur_layer = batch_norm(l_betw) if self.bnorm else l_betw
            cur_layer = lasagne.layers.DropoutLayer(cur_layer, p=drate) if drate > 0 else cur_layer
            self.layers.append(cur_layer)

        l_out = lasagne.layers.DenseLayer(self.layers[-1], num_units=nout, nonlinearity=None)

        target_output = T.matrix("target_output")

        # cost_train = T.mean(lasagne.objectives.squared_error(lasagne.layers.get_output(l_out, deterministic=False), target_output))
        cost_train = T.mean(
            T.sum(
                lasagne.objectives.squared_error(lasagne.layers.get_output(l_out, deterministic=False), target_output),
                axis=1,
            )
            / 2
        )
        cost_eval = T.mean(
            T.sum(
                lasagne.objectives.squared_error(lasagne.layers.get_output(l_out, deterministic=True), target_output),
                axis=1,
            )
            / 2
        )
        # cost_eval = T.mean((lasagne.layers.get_output(l_out, deterministic=True)-target_output)**2)

        all_params = lasagne.layers.get_all_params(l_out, trainable=True)
        all_grads = T.grad(cost_train, all_params)

        all_grads, total_norm = lasagne.updates.total_norm_constraint(all_grads, self.norm, return_norm=True)
        # all_grads = [T.switch(T.or_(T.isnan(total_norm), T.isinf(total_norm)), p*0.01 , g) for g,p in zip(all_grads, all_params)]
        updates = self.opt(all_grads, all_params, self.lr)

        self.train_model = theano.function(
            inputs=[l_in.input_var, target_output], outputs=cost_train, updates=updates, allow_input_downcast=True
        )
        self.predict_model = theano.function(
            inputs=[l_in.input_var, target_output],
            outputs=[cost_eval, lasagne.layers.get_output(l_out, deterministic=True)],
            allow_input_downcast=True,
        )
Esempio n. 2
0
def net_multi_base_named_dilated(X, nfilt, doBatchNorm, trainPhase,
                                 pool_stride, pool_size, conf):
    inDim = X.get_shape()[3]
    with tf.variable_scope('layer1'):
        conv1 = conv_relu(X, [5, 5, inDim, 48], 0.01, 0, doBatchNorm,
                          trainPhase)
        norm1 = norm('norm1', conv1, lsize=2)

    with tf.variable_scope('layer2'):
        weights = tf.get_variable(
            "weights", [3, 3, 48, nfilt],
            initializer=tf.contrib.layers.xavier_initializer())
        biases = tf.get_variable("biases",
                                 nfilt,
                                 initializer=tf.constant_initializer(1))
        conv2 = tf.nn.convolution(norm1,
                                  weights,
                                  strides=[1, 1],
                                  padding='SAME',
                                  dilation_rate=[4, 4])
        if doBatchNorm:
            conv2 = batch_norm(conv2, trainPhase)
        conv2 = tf.nn.relu(conv2 + biases)
        norm2 = norm('norm2', conv2, lsize=4)

    with tf.variable_scope('layer3'):
        weights = tf.get_variable(
            "weights", [3, 3, nfilt, nfilt],
            initializer=tf.contrib.layers.xavier_initializer())
        biases = tf.get_variable("biases",
                                 nfilt,
                                 initializer=tf.constant_initializer(1))
        conv3 = tf.nn.convolution(norm2,
                                  weights,
                                  strides=[1, 1],
                                  padding='SAME',
                                  dilation_rate=[2, 2])
        if doBatchNorm:
            conv3 = batch_norm(conv3, trainPhase)
        conv3 = tf.nn.relu(conv3 + biases)

    with tf.variable_scope('layer4'):
        conv4 = conv_relu(conv3, [3, 3, nfilt, nfilt], 0.01, 1, doBatchNorm,
                          trainPhase)
    with tf.variable_scope('layer5'):
        conv5 = conv_relu(conv4, [3, 3, nfilt, nfilt], 0.01, 1, doBatchNorm,
                          trainPhase)

    out_dict = {
        'conv1': conv1,
        'conv2': conv2,
        'conv3': conv3,
        'conv4': conv4,
        'conv5': conv5,
        'norm1': norm1,
        'norm2': norm2,
    }
    return conv5, out_dict
def ModelSimple(X, is_training):
    h_1 = lrelu(batch_norm(conv2d(X, 32, name='conv1'),
                           is_training, scope='bn1'), name='lrelu1')
    h_2 = lrelu(batch_norm(conv2d(h_1, 64, name='conv2'),
                           is_training, scope='bn2'), name='lrelu2')
    h_3 = lrelu(batch_norm(conv2d(h_2, 64, name='conv3'),
                           is_training, scope='bn3'), name='lrelu3')
    h_3_flat = tf.reshape(h_3, [-1, 64 * 4 * 4])
    return linear(h_3_flat, 10)
Esempio n. 4
0
def ModelSimple(X, is_training):
    h_1 = lrelu(batch_norm(conv2d(X, 32, name='conv1'),
                           is_training, scope='bn1'), name='lrelu1')
    h_2 = lrelu(batch_norm(conv2d(h_1, 64, name='conv2'),
                           is_training, scope='bn2'), name='lrelu2')
    h_3 = lrelu(batch_norm(conv2d(h_2, 64, name='conv3'),
                           is_training, scope='bn3'), name='lrelu3')
    h_3_flat = tf.reshape(h_3, [-1, 64 * 4 * 4])
    return linear(h_3_flat, 10)
Esempio n. 5
0
def ConvBNRelu(input, kernelSize, outputSize,is_training):
    inputSize = input.get_shape()[3].value
    # print type(inputSize)
    weights.append(CreateWeight(kernelSize, inputSize, outputSize))
    conv = tf.nn.conv2d(input, weights[-1], strides=[1, 1, 1, 1], padding='SAME')
    # conv = tf.nn.batch_normalization(conv, 0.001, 1.0, 0, 1, 0.0001)
    conv = batch_norm(conv,is_training)
    return tf.nn.relu(conv)
def ConvBNRelu(input, kernelSize, outputSize,is_training):
    inputSize = input.get_shape()[3].value
    # print type(inputSize)
    weights.append(CreateWeight(kernelSize, inputSize, outputSize))
    conv = tf.nn.conv2d(input, weights[-1], strides=[1, 1, 1, 1], padding='SAME')
    # conv = tf.nn.batch_normalization(conv, 0.001, 1.0, 0, 1, 0.0001)
    conv = batch_norm(conv,is_training)
    return tf.nn.relu(conv)
def bn_relu_dropout_conv(input_layer, filter_shape, strides, bn_param, keep_prob, device):
    layer = [batch_norm(input_layer, bn_param, device=device)]
    layer.append(tf.nn.relu(layer[-1]))

    if FLAGS.keep_prob!=None:
      layer.append(tf.nn.dropout(layer[-1], keep_prob[0]))

    layer.append(convolution_layer(layer[-1], shape=filter_shape, strides=strides, bias=False, layer_name='conv', device=device))
    return layer[-1]
Esempio n. 8
0
def conv_relu(X, kernel_shape, conv_std,bias_val,doBatchNorm,trainPhase):
    weights = tf.get_variable("weights", kernel_shape,
        initializer=tf.random_normal_initializer(stddev=conv_std))
    biases = tf.get_variable("biases", kernel_shape[-1],
        initializer=tf.constant_initializer(bias_val))
    conv = tf.nn.conv2d(X, weights,
        strides=[1, 1, 1, 1], padding='SAME')
    if doBatchNorm:
        conv = batch_norm(conv,trainPhase)
    return tf.nn.relu(conv + biases)
Esempio n. 9
0
def conv_relu(X, kernel_shape, conv_std, bias_val, doBatchNorm, trainPhase):
    weights = tf.get_variable(
        "weights",
        kernel_shape,
        initializer=tf.random_normal_initializer(stddev=conv_std))
    biases = tf.get_variable("biases",
                             kernel_shape[-1],
                             initializer=tf.constant_initializer(bias_val))
    conv = tf.nn.conv2d(X, weights, strides=[1, 1, 1, 1], padding='SAME')
    if doBatchNorm:
        conv = batch_norm(conv, trainPhase)
    return tf.nn.relu(conv + biases)
Esempio n. 10
0
def net_multi_base_named_dilated(X, nfilt, doBatchNorm, trainPhase, pool_stride, pool_size, conf):
    inDim = X.get_shape()[3]
    with tf.variable_scope('layer1'):
        conv1 = conv_relu(X, [5, 5, inDim, 48], 0.01, 0, doBatchNorm, trainPhase)
        norm1 = norm('norm1', conv1, lsize=2)

    with tf.variable_scope('layer2'):
        weights = tf.get_variable("weights", [3,3,48,nfilt],
                                  initializer=tf.contrib.layers.xavier_initializer())
        biases = tf.get_variable("biases", nfilt,
                                 initializer=tf.constant_initializer(1))
        conv2 = tf.nn.convolution(norm1, weights,
                            strides=[1, 1], padding='SAME',dilation_rate=[4,4])
        if doBatchNorm:
            conv2 = batch_norm(conv2, trainPhase)
        conv2 = tf.nn.relu(conv2 + biases)
        norm2 = norm('norm2',conv2 , lsize=4)

    with tf.variable_scope('layer3'):
        weights = tf.get_variable("weights", [3,3,nfilt,nfilt],
                                  initializer=tf.contrib.layers.xavier_initializer())
        biases = tf.get_variable("biases",nfilt,
                                 initializer=tf.constant_initializer(1))
        conv3 = tf.nn.convolution(norm2, weights,
                                  strides=[1, 1], padding='SAME', dilation_rate=[2, 2])
        if doBatchNorm:
            conv3 = batch_norm(conv3, trainPhase)
        conv3 = tf.nn.relu(conv3 + biases)

    with tf.variable_scope('layer4'):
        conv4 = conv_relu(conv3, [3, 3, nfilt, nfilt], 0.01, 1, doBatchNorm, trainPhase)
    with tf.variable_scope('layer5'):
        conv5 = conv_relu(conv4, [3, 3, nfilt, nfilt], 0.01, 1, doBatchNorm, trainPhase)

    out_dict = {'conv1': conv1, 'conv2': conv2, 'conv3': conv3,
                'conv4': conv4, 'conv5': conv5, 'norm1': norm1, 'norm2': norm2,
                }
    return conv5, out_dict
Esempio n. 11
0
    def _act(x, name="bn_act"):
        """Batch-normalized activation function.

    Args:
        x: Input tensor.
        name: Name for the output tensor.

    Returns:
        normed: Output tensor.
    """
        n_out = x.get_shape()[-1]
        with tf.variable_scope("bn_params"):
            if affine:
                beta = nn.weight_variable([n_out],
                                          init_method="constant",
                                          dtype=dtype,
                                          init_param={"val": 0.0},
                                          name="beta")
                gamma = nn.weight_variable([n_out],
                                           init_method="constant",
                                           dtype=dtype,
                                           init_param={"val": 1.0},
                                           name="gamma")
            else:
                beta = None
                gamma = None
            if learn_sigma:
                sigma = nn.weight_variable([1],
                                           init_method="constant",
                                           dtype=dtype,
                                           init_param={"val": sigma_init},
                                           name="sigma")
            else:
                sigma = sigma_init
            eps = sigma**2
        x_normed, x_mean = batch_norm(x,
                                      n_out,
                                      is_training,
                                      gamma=gamma,
                                      beta=beta,
                                      eps=eps,
                                      axes=axes,
                                      scope=scope,
                                      name=name,
                                      return_mean=True)
        if l1_reg > 0.0:
            l1_collection.append(l1_loss(x, x_mean=x_mean, alpha=l1_reg))
        return act(x_normed)
def inference(input_tensor_batch, bn_param, keep_prob, n, k, num_classes, device):
    layers = []
    with tf.variable_scope('group1'):
        conv0 = convolution_layer(input_tensor_batch, shape=[3, 3, 3, 16], strides=[1, 1, 1, 1], bias=False, layer_name='conv0', device=device)
        layers.append(conv0)

    for i in range(n):
        with tf.variable_scope('group2_block%d' %i):
            if i == 0 and k!=1:
                conv1 = first_residual_block(layers[-1], 16*k, bn_param, keep_prob, down_sample=False, device=device)
            else:
                conv1 = residual_block(layers[-1], 16*k, bn_param, keep_prob, device=device)
            layers.append(conv1)

    for i in range(n):
        with tf.variable_scope('group3_block%d' %i):
            if i==0:
                conv2 = first_residual_block(layers[-1], 32*k, bn_param, keep_prob, down_sample=True, device=device)
            else:
                conv2 = residual_block(layers[-1], 32*k, bn_param, keep_prob, device=device)
            layers.append(conv2)

    for i in range(n):
        with tf.variable_scope('group4_block%d' %i):
            if i==0:
                conv3 = first_residual_block(layers[-1], 64*k, bn_param, keep_prob, down_sample=True, device=device)
            else:
                conv3 = residual_block(layers[-1], 64*k, bn_param, keep_prob, device=device)
            layers.append(conv3)
        assert conv3.get_shape().as_list()[1:] == [8, 8, 64*k]

    with tf.variable_scope('fc'):
        bn_layer = batch_norm(layers[-1], bn_param, device=device)
        relu_layer = tf.nn.relu(bn_layer)
        global_pool = tf.reduce_mean(relu_layer, [1, 2])

        assert global_pool.get_shape().as_list()[-1:] == [64*k]

        shape=[global_pool.get_shape().as_list()[-1], num_classes]
        output = full_connection_layer(global_pool, shape=shape, bias=True, layer_name='output', device=device)

        layers.append(output)

    return layers[-1]
Esempio n. 13
0
def net_multi_conv(X0, X1, X2, _dropout, conf, doBatchNorm, trainPhase):
    imsz = conf.imsz
    rescale = conf.rescale
    pool_scale = conf.pool_scale
    nfilt = conf.nfilt
    pool_stride = conf.pool_stride
    pool_size = conf.pool_size

    #     conv5_0,base_dict_0 = net_multi_base(X0,_weights['base0'])
    #     conv5_1,base_dict_1 = net_multi_base(X1,_weights['base1'])
    #     conv5_2,base_dict_2 = net_multi_base(X2,_weights['base2'])
    if conf.dilation_rate is 4:
        net_to_use = net_multi_base_named_dilated
    else:
        net_to_use = net_multi_base_named

    with tf.variable_scope('scale0'):
        conv5_0, base_dict_0 = net_to_use(X0, nfilt, doBatchNorm, trainPhase,
                                          pool_stride, pool_size, conf)
    with tf.variable_scope('scale1'):
        conv5_1, base_dict_1 = net_to_use(X1, nfilt, doBatchNorm, trainPhase,
                                          pool_stride, pool_size, conf)
    with tf.variable_scope('scale2'):
        conv5_2, base_dict_2 = net_to_use(X2, nfilt, doBatchNorm, trainPhase,
                                          pool_stride, pool_size, conf)

    sz0 = int(math.ceil(float(imsz[0]) / pool_scale / rescale))
    sz1 = int(math.ceil(float(imsz[1]) / pool_scale / rescale))
    conv5_1_up = upscale('5_1', conv5_1, [sz0, sz1])
    conv5_2_up = upscale('5_2', conv5_2, [sz0, sz1])

    # crop lower res layers to match higher res size
    conv5_0_sz = tf.Tensor.get_shape(conv5_0).as_list()
    conv5_1_sz = tf.Tensor.get_shape(conv5_1_up).as_list()
    crop_0 = int(old_div((sz0 - conv5_0_sz[1]), 2))
    crop_1 = int(old_div((sz1 - conv5_0_sz[2]), 2))

    curloc = [0, crop_0, crop_1, 0]
    patchsz = tf.to_int32([-1, conv5_0_sz[1], conv5_0_sz[2], -1])
    conv5_1_up = tf.slice(conv5_1_up, curloc, patchsz)
    conv5_2_up = tf.slice(conv5_2_up, curloc, patchsz)
    conv5_1_final_sz = tf.Tensor.get_shape(conv5_1_up).as_list()
    #     print("Initial lower res layer size %s"%(', '.join(map(str,conv5_1_sz))))
    #     print("Initial higher res layer size %s"%(', '.join(map(str,conv5_0_sz))))
    #     print("Crop start lower res layer at %s"%(', '.join(map(str,curloc))))
    #     print("Final size of lower res layer %s"%(', '.join(map(str,conv5_1_final_sz))))

    conv5_cat = tf.concat([conv5_0, conv5_1_up, conv5_2_up], 3)

    # Reshape conv5 output to fit dense layer input
    #     conv6 = conv2d('conv6',conv5_cat,_weights['wd1'],_weights['bd1'])
    #     conv6 = tf.nn.dropout(conv6,_dropout)
    #     conv7 = conv2d('conv7',conv6,_weights['wd2'],_weights['bd2'])
    #     conv7 = tf.nn.dropout(conv7,_dropout)

    with tf.variable_scope('layer6'):
        if hasattr(conf, 'dilation_rate'):
            dilation_rate = [conf.dilation_rate, conf.dilation_rate]
        else:
            dilation_rate = [1, 1]
        weights = tf.get_variable(
            "weights",
            [conf.psz, conf.psz, conf.numscale * nfilt, conf.nfcfilt],
            initializer=tf.contrib.layers.xavier_initializer())
        biases = tf.get_variable("biases",
                                 conf.nfcfilt,
                                 initializer=tf.constant_initializer(1))
        conv6 = tf.nn.convolution(conv5_cat,
                                  weights,
                                  strides=[1, 1],
                                  padding='SAME',
                                  dilation_rate=dilation_rate)
        if doBatchNorm:
            conv6 = batch_norm(conv6, trainPhase)
        conv6 = tf.nn.relu(conv6 + biases)
        conv6 = tf.nn.dropout(conv6, _dropout,
                              [conf.batch_size, 1, 1, conf.nfcfilt])

    with tf.variable_scope('layer7'):
        conv7 = conv_relu(conv6, [1, 1, conf.nfcfilt, conf.nfcfilt], 0.005, 1,
                          doBatchNorm, trainPhase)
        # if not doBatchNorm:
        conv7 = tf.nn.dropout(conv7, _dropout,
                              [conf.batch_size, 1, 1, conf.nfcfilt])

# Output, class prediction
#     out = tf.nn.bias_add(tf.nn.conv2d(
#             conv7, _weights['wd3'],
#             strides=[1, 1, 1, 1], padding='SAME'),_weights['bd3'])

    with tf.variable_scope('layer8'):
        l8_weights = tf.get_variable(
            "weights", [1, 1, conf.nfcfilt, conf.n_classes],
            initializer=tf.random_normal_initializer(stddev=0.01))
        l8_biases = tf.get_variable("biases",
                                    conf.n_classes,
                                    initializer=tf.constant_initializer(0))
        out = tf.nn.conv2d(
            conv7, l8_weights, strides=[1, 1, 1, 1
                                        ], padding='SAME') + l8_biases


#   No batch norm for the output layer.

    out_dict = {
        'base_dict_0': base_dict_0,
        'base_dict_1': base_dict_1,
        'base_dict_2': base_dict_2,
        'conv6': conv6,
        'conv7': conv7,
    }

    return out, out_dict
def bn_relu_conv(input_layer, filter_shape, strides, bn_param, device):
    bn = batch_norm(input_layer, bn_param, device=device)
    relu = tf.nn.relu(bn)
    conv = convolution_layer(relu, shape=filter_shape, strides=strides, bias=False, layer_name='conv', device=device)
    return conv, relu
Esempio n. 15
0
def inference(input_tensor, train, regularizer):
    #第一层卷积层
    with tf.variable_scope('layer1-conv1'):

        conv1_weights_g = tf.get_variable(
            "weight_g",
            shape=[CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv1_biases_g = tf.get_variable(
            "biases_g",
            shape=[CONV1_DEEP],
            initializer=tf.constant_initializer(0.0))

        conv1_weights_o = tf.get_variable(
            "weight_o",
            shape=[CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1))

        #tf.assign(conv1_weights_o , gutils.unit(conv1_weights_o))

        conv1_biases_o = tf.get_variable(
            "biases_o",
            shape=[CONV1_DEEP],
            initializer=tf.constant_initializer(0.0))

        conv1_weights_g_tmp = tf.get_variable(
            "weight_g_tmp",
            shape=[CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=1))
        conv1_weights_o_tmp = tf.get_variable(
            "weight_o_tmp",
            shape=[CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=1))

        conv1_biases_g_tmp = tf.get_variable(
            "biases_g_tmp",
            shape=[CONV1_DEEP],
            initializer=tf.constant_initializer(0.0))
        conv1_biases_o_tmp = tf.get_variable(
            "biases_o_tmp",
            shape=[CONV1_DEEP],
            initializer=tf.constant_initializer(0.0))

        #卷积网络前向传播,这里步长为1且做全0填充,输出是28*28*32的矩阵,步幅就在第二个参数矩阵里面了。
        conv1_g = tf.nn.conv2d(input_tensor,
                               conv1_weights_g,
                               strides=[1, 1, 1, 1],
                               padding='SAME')
        conv1_batch_g = batch_norm.batch_norm(conv1_g, scale=None)
        relu1_g = tf.nn.relu(tf.nn.bias_add(conv1_batch_g, conv1_biases_g))

        conv1_o = tf.nn.conv2d(input_tensor,
                               conv1_weights_o,
                               strides=[1, 1, 1, 1],
                               padding='SAME')
        conv1_batch_o = batch_norm.batch_norm(conv1_o, scale=None)
        relu1_o = tf.nn.relu(tf.nn.bias_add(conv1_batch_o, conv1_biases_o))

    #第二层池化层,步长为2,全0填充,过滤器边长为2
    with tf.name_scope('layer2-pool1'):
        pool1_g = tf.nn.max_pool(relu1_g,
                                 ksize=[1, 2, 2, 1],
                                 strides=[1, 2, 2, 1],
                                 padding='SAME')  #第二个参数是步幅,第三个参数是步长
        pool1_batch_g = batch_norm.batch_norm(pool1_g, scale=None)

        pool1_o = tf.nn.max_pool(relu1_o,
                                 ksize=[1, 2, 2, 1],
                                 strides=[1, 2, 2, 1],
                                 padding='SAME')  # 第二个参数是步幅,第三个参数是步长
        pool1_batch_o = batch_norm.batch_norm(pool1_o, scale=None)
    #输出是14*14*32,池化不改变层数
    #第三层卷积层,步幅为5,步长为1,深度为64,全0填充,输出为14*14*64
    with tf.variable_scope('layer3-conv2'):
        conv2_weights_g = tf.get_variable(
            'weight_g',
            shape=[CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv2_biases_g = tf.get_variable(
            'biases_g',
            shape=[CONV2_DEEP],
            initializer=tf.constant_initializer(0.0))

        conv2_weights_o = tf.get_variable(
            'weight_o',
            shape=[CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        #tf.assign(conv2_weights_o , gutils.unit(conv2_weights_o))

        conv2_biases_o = tf.get_variable(
            'biases_o',
            shape=[CONV2_DEEP],
            initializer=tf.constant_initializer(0.0))

        conv2_weights_g_tmp = tf.get_variable(
            "weight_g_tmp",
            shape=[CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=1))
        conv2_weights_o_tmp = tf.get_variable(
            "weight_o_tmp",
            shape=[CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=1))

        conv2_biases_g_tmp = tf.get_variable(
            'biases_g_tmp',
            shape=[CONV2_DEEP],
            initializer=tf.constant_initializer(0.0))
        conv2_biases_o_tmp = tf.get_variable(
            'biases_o_tmp',
            shape=[CONV2_DEEP],
            initializer=tf.constant_initializer(0.0))

        #卷积网络前向传播
        conv2_g = tf.nn.conv2d(pool1_batch_g,
                               conv2_weights_g,
                               strides=[1, 1, 1, 1],
                               padding='SAME')
        conv2_batch_g = batch_norm.batch_norm(conv2_g, scale=None)
        relu2_g = tf.nn.relu(tf.nn.bias_add(conv2_batch_g, conv2_biases_g))

        conv2_o = tf.nn.conv2d(pool1_batch_o,
                               conv2_weights_o,
                               strides=[1, 1, 1, 1],
                               padding='SAME')
        conv2_batch_o = batch_norm.batch_norm(conv2_o, scale=None)
        relu2_o = tf.nn.relu(tf.nn.bias_add(conv2_batch_o, conv2_biases_o))

    #第四层池化层和第二层结构相同
    with tf.name_scope('layer4-pool2'):
        pool2_g = tf.nn.max_pool(relu2_g,
                                 ksize=[1, 2, 2, 1],
                                 strides=[1, 2, 2, 1],
                                 padding='SAME')  # 第二个参数是步幅,第三个参数是步长
        pool2_batch_g = batch_norm.batch_norm(pool2_g, scale=None)

        pool2_o = tf.nn.max_pool(relu2_o,
                                 ksize=[1, 2, 2, 1],
                                 strides=[1, 2, 2, 1],
                                 padding='SAME')  # 第二个参数是步幅,第三个参数是步长
        pool2_batch_o = batch_norm.batch_norm(pool2_o, scale=None)

    #第五层是引入dropout的全连接层
    #  输出是7*7*64
    # 第五层是全连接网络,输出节点是512个
    with tf.variable_scope('layer5-fc1'):
        pool_shape = pool2_batch_g.get_shape().as_list()
        # pool_shape的第一个数据pool_shape[0]就是batch的大小
        nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
        # 重新改变输入的结构把它拉成一个向量做全连接
        reshaped_g = tf.reshape(pool2_batch_g, [-1, nodes])
        reshaped_o = tf.reshape(pool2_batch_o, [-1, nodes])
        fc1_weights_g = tf.get_variable(
            'weight_g',
            shape=[nodes, FC_SIZE],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        fc1_weights_o = tf.get_variable(
            'weight_o',
            shape=[nodes, FC_SIZE],
            initializer=tf.truncated_normal_initializer(stddev=0.1))

        #tf.assign(fc1_weights_o , gutils.unit(fc1_weights_o))

        fc1_weights_g_tmp = tf.get_variable(
            'weight_g_tmp',
            shape=[nodes, FC_SIZE],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        fc1_weights_o_tmp = tf.get_variable(
            'weight_o_tmp',
            shape=[nodes, FC_SIZE],
            initializer=tf.truncated_normal_initializer(stddev=0.1))

        #只对全连接参数做正则化
        if regularizer != None:
            tf.add_to_collection('losses_g', regularizer(fc1_weights_g))
            tf.add_to_collection('losses_o', regularizer(fc1_weights_o))
        fc1_biases_g = tf.get_variable(
            'biases_g',
            shape=[FC_SIZE],
            initializer=tf.constant_initializer(0.0))
        fc1_biases_o = tf.get_variable(
            'biases_o',
            shape=[FC_SIZE],
            initializer=tf.constant_initializer(0.0))

        fc1_biases_g_tmp = tf.get_variable(
            'biases_g_tmp',
            shape=[FC_SIZE],
            initializer=tf.constant_initializer(0.0))
        fc1_biases_o_tmp = tf.get_variable(
            'biases_o_tmp',
            shape=[FC_SIZE],
            initializer=tf.constant_initializer(0.0))

        fc1_g = tf.nn.relu(tf.matmul(reshaped_g, fc1_weights_g) + fc1_biases_g)
        fc1_o = tf.nn.relu(tf.matmul(reshaped_o, fc1_weights_o) + fc1_biases_o)

        if train:
            fc1_g = tf.nn.dropout(fc1_g, 0.5)
            fc1_o = tf.nn.dropout(fc1_o, 0.5)
    #第六层输出层
    with tf.variable_scope('layer6-fc2'):
        fc2_weights_g = tf.get_variable(
            'weight_g',
            shape=[FC_SIZE, NUM_LABELS],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        fc2_weights_o = tf.get_variable(
            'weight_o',
            shape=[FC_SIZE, NUM_LABELS],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        #tf.assign(fc2_weights_o , gutils.unit(fc2_weights_o))

        fc2_weights_g_tmp = tf.get_variable(
            'weight_g_tmp',
            shape=[FC_SIZE, NUM_LABELS],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        fc2_weights_o_tmp = tf.get_variable(
            'weight_o_tmp',
            shape=[FC_SIZE, NUM_LABELS],
            initializer=tf.truncated_normal_initializer(stddev=0.1))

        if regularizer != None:
            tf.add_to_collection('losses_g', regularizer(fc2_weights_g))
            tf.add_to_collection('losses_o', regularizer(fc2_weights_o))

        fc2_biases_g = tf.get_variable(
            'biases_g',
            shape=[NUM_LABELS],
            initializer=tf.constant_initializer(0.0))
        fc2_biases_o = tf.get_variable(
            'biases_o',
            shape=[NUM_LABELS],
            initializer=tf.constant_initializer(0.0))

        fc2_biases_g_tmp = tf.get_variable(
            'biases_g_tmp',
            shape=[NUM_LABELS],
            initializer=tf.constant_initializer(0.0))
        fc2_biases_o_tmp = tf.get_variable(
            'biases_o_tmp',
            shape=[NUM_LABELS],
            initializer=tf.constant_initializer(0.0))

        logit_g = tf.matmul(fc1_g, fc2_weights_g) + fc2_biases_g
        logit_o = tf.matmul(fc1_o, fc2_weights_o) + fc2_biases_o

        return logit_g, logit_o
Esempio n. 16
0
    def __init__(self, nf, nout, kwargs):

        for pname in DNN.param_names:
            setattr(self, pname, kwargs[pname])

        if self.activation == 'elu':
            nonlin = lambda x: T.switch(x >= 0, x, T.exp(x) - 1)
        else:
            self.activation = 'rectify' if self.activation == 'relu' else self.activation
            nonlin = getattr(lasagne.nonlinearities, self.activation)
        self.opt = getattr(lasagne.updates, self.opt)

        l_in = lasagne.layers.InputLayer(shape=(None, nf))
        cur_layer = batch_norm(l_in) if self.bnorm else l_in
        cur_layer = lasagne.layers.DropoutLayer(
            cur_layer, p=self.drates[0]) if self.drates[0] > 0 else cur_layer

        self.layers = [cur_layer]
        for n_hidden, drate in zip(self.n_hidden, self.drates[1:]):
            l_betw = lasagne.layers.DenseLayer(self.layers[-1],
                                               num_units=n_hidden,
                                               nonlinearity=nonlin)
            cur_layer = batch_norm(l_betw) if self.bnorm else l_betw
            cur_layer = lasagne.layers.DropoutLayer(
                cur_layer, p=drate) if drate > 0 else cur_layer
            self.layers.append(cur_layer)

        l_out = lasagne.layers.DenseLayer(self.layers[-1],
                                          num_units=nout,
                                          nonlinearity=None)

        target_output = T.matrix('target_output')

        # cost_train = T.mean(lasagne.objectives.squared_error(lasagne.layers.get_output(l_out, deterministic=False), target_output))
        cost_train = T.mean(
            T.sum(lasagne.objectives.squared_error(
                lasagne.layers.get_output(l_out, deterministic=False),
                target_output),
                  axis=1) / 2)
        cost_eval = T.mean(
            T.sum(lasagne.objectives.squared_error(
                lasagne.layers.get_output(l_out, deterministic=True),
                target_output),
                  axis=1) / 2)
        # cost_eval = T.mean((lasagne.layers.get_output(l_out, deterministic=True)-target_output)**2)

        all_params = lasagne.layers.get_all_params(l_out, trainable=True)
        all_grads = T.grad(cost_train, all_params)

        all_grads, total_norm = lasagne.updates.total_norm_constraint(
            all_grads, self.norm, return_norm=True)
        # all_grads = [T.switch(T.or_(T.isnan(total_norm), T.isinf(total_norm)), p*0.01 , g) for g,p in zip(all_grads, all_params)]
        updates = self.opt(all_grads, all_params, self.lr)

        self.train_model = theano.function(
            inputs=[l_in.input_var, target_output],
            outputs=cost_train,
            updates=updates,
            allow_input_downcast=True)
        self.predict_model = theano.function(
            inputs=[l_in.input_var, target_output],
            outputs=[
                cost_eval,
                lasagne.layers.get_output(l_out, deterministic=True)
            ],
            allow_input_downcast=True)
import tensorflow as tf
from batch_norm import batch_norm
from activations import lrelu
from connections import conv2d, linear
from datasets import MNIST


# %% Setup input to the network and true output label.  These are
# simply placeholders which we'll fill in later.
mnist = MNIST()
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])
x_tensor = tf.reshape(x, [-1, 28, 28, 1])

# %% Define the network:
bn1 = batch_norm(-1, name='bn1')
bn2 = batch_norm(-1, name='bn2')
bn3 = batch_norm(-1, name='bn3')
h_1 = lrelu(bn1(conv2d(x_tensor, 32, name='conv1')), name='lrelu1')
h_2 = lrelu(bn2(conv2d(h_1, 64, name='conv2')), name='lrelu2')
h_3 = lrelu(bn3(conv2d(h_2, 64, name='conv3')), name='lrelu3')
h_3_flat = tf.reshape(h_3, [-1, 64 * 4 * 4])
h_4 = linear(h_3_flat, 10)
y_pred = tf.nn.softmax(h_4)

# %% Define loss/eval/training functions
cross_entropy = -tf.reduce_sum(y * tf.log(y_pred))
train_step = tf.train.AdamOptimizer().minimize(cross_entropy)

correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
Esempio n. 18
0
    def __init__(
        self, sess, height, width, phi_length, n_actions, name,
        optimizer='RMS', learning_rate=0.00025, epsilon=0.01, decay=0.95, momentum=0.,
        slow=False, tau=0.001, verbose=False, path='', folder='_networks', l2_decay=0.001):
        """ Initialize network """
        super(DqnNetClass, self).__init__(sess, name=name)
        self.slow = slow
        self.tau = tau
        self.name = name
        self.sess = sess
        self.path = path
        self.folder = folder

        self.observation = tf.placeholder(tf.float32, [None, height, width, phi_length], name=self.name + '_observation')
        self.actions = tf.placeholder(tf.float32, shape=[None, n_actions], name=self.name + "_actions")

        self.is_training = tf.placeholder(tf.bool, [])

        with tf.name_scope("Conv1") as scope:
            kernel_shape = [8, 8, phi_length, 32]
            self.W_conv1 = self.weight_variable(kernel_shape, 'conv1')
            #self.b_conv1 = self.bias_variable(kernel_shape, 'conv1')
            self.h_conv1_bn = batch_norm(self.conv2d(self.observation, self.W_conv1, 4), 32, self.is_training, self.sess, slow=self.slow, tau=self.tau)
            self.h_conv1 = tf.nn.relu(self.h_conv1_bn.bnorm, name=self.name + '_conv1_activations')
            tf.add_to_collection('conv_weights', self.W_conv1)
            tf.add_to_collection('conv_output', self.h_conv1)
            tf.add_to_collection('transfer_params', self.W_conv1)
            tf.add_to_collection('transfer_params', self.h_conv1_bn.scale)
            tf.add_to_collection('transfer_params', self.h_conv1_bn.beta)
            tf.add_to_collection('transfer_params', self.h_conv1_bn.pop_mean)
            tf.add_to_collection('transfer_params', self.h_conv1_bn.pop_var)

        with tf.name_scope("Conv2") as scope:
            kernel_shape = [4, 4, 32, 64]
            self.W_conv2 = self.weight_variable(kernel_shape, 'conv2')
            #self.b_conv2 = self.bias_variable(kernel_shape, 'conv2')
            self.h_conv2_bn = batch_norm(self.conv2d(self.h_conv1, self.W_conv2, 2), 64, self.is_training, self.sess, slow=self.slow, tau=self.tau)
            self.h_conv2 = tf.nn.relu(self.h_conv2_bn.bnorm, name=self.name + '_conv2_activations')
            tf.add_to_collection('conv_weights', self.W_conv2)
            tf.add_to_collection('conv_output', self.h_conv2)
            tf.add_to_collection('transfer_params', self.W_conv2)
            tf.add_to_collection('transfer_params', self.h_conv2_bn.scale)
            tf.add_to_collection('transfer_params', self.h_conv2_bn.beta)
            tf.add_to_collection('transfer_params', self.h_conv2_bn.pop_mean)
            tf.add_to_collection('transfer_params', self.h_conv2_bn.pop_var)

        with tf.name_scope("Conv3") as scope:
            kernel_shape = [3, 3, 64, 64]
            self.W_conv3 = self.weight_variable(kernel_shape, 'conv3')
            #self.b_conv3 = self.bias_variable(kernel_shape, 'conv3')
            self.h_conv3_bn = batch_norm(self.conv2d(self.h_conv2, self.W_conv3, 1), 64, self.is_training, self.sess, slow=self.slow, tau=self.tau)
            self.h_conv3 = tf.nn.relu(self.h_conv3_bn.bnorm, name=self.name + '_conv3_activations')
            tf.add_to_collection('conv_weights', self.W_conv3)
            tf.add_to_collection('conv_output', self.h_conv3)
            tf.add_to_collection('transfer_params', self.W_conv3)
            tf.add_to_collection('transfer_params', self.h_conv3_bn.scale)
            tf.add_to_collection('transfer_params', self.h_conv3_bn.beta)
            tf.add_to_collection('transfer_params', self.h_conv3_bn.pop_mean)
            tf.add_to_collection('transfer_params', self.h_conv3_bn.pop_var)

        self.h_conv3_flat = tf.reshape(self.h_conv3, [-1, 3136])

        with tf.name_scope("FullyConnected1") as scope:
            kernel_shape = [3136, 512]
            self.W_fc1 = self.weight_variable(kernel_shape, 'fc1')
            #self.b_fc1 = self.bias_variable(kernel_shape, 'fc1')
            self.h_fc1_bn = batch_norm(tf.matmul(self.h_conv3_flat, self.W_fc1), 512, self.is_training, self.sess, slow=self.slow, tau=self.tau, linear=True)
            self.h_fc1 = tf.nn.relu(self.h_fc1_bn.bnorm, name=self.name + '_fc1_activations')
            tf.add_to_collection('transfer_params', self.W_fc1)
            tf.add_to_collection('transfer_params', self.h_fc1_bn.scale)
            tf.add_to_collection('transfer_params', self.h_fc1_bn.beta)
            tf.add_to_collection('transfer_params', self.h_fc1_bn.pop_mean)
            tf.add_to_collection('transfer_params', self.h_fc1_bn.pop_var)

        with tf.name_scope("FullyConnected2") as scope:
            kernel_shape = [512, n_actions]
            self.W_fc2 = self.weight_variable_last_layer(kernel_shape, 'fc2')
            self.b_fc2 = self.bias_variable_last_layer(kernel_shape, 'fc2')
            self.action_output = tf.add(tf.matmul(self.h_fc1, self.W_fc2), self.b_fc2, name=self.name + '_fc1_outputs')
            tf.add_to_collection('transfer_params', self.W_fc2)
            tf.add_to_collection('transfer_params', self.b_fc2)

        if verbose:
            self.init_verbosity()

        # cost of q network
        with tf.name_scope("Entropy") as scope:
            self.cross_entropy = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(
                    _sentinel=None,
                    labels=self.actions,
                    logits=self.action_output)) #+ \
                    #l2_decay*tf.nn.l2_loss(self.W_fc2) + l2_decay*tf.nn.l2_loss(self.b_fc2))
            ce_summ = tf.summary.scalar("cross_entropy", self.cross_entropy)
        # self.parameters = [
        #     self.W_conv1, self.h_conv1_bn.scale, self.h_conv1_bn.beta,
        #     self.W_conv2, self.h_conv2_bn.scale, self.h_conv2_bn.beta,
        #     self.W_conv3, self.h_conv3_bn.scale, self.h_conv3_bn.beta,
        #     self.W_fc1, self.h_fc1_bn.scale, self.h_fc1_bn.beta,
        #     self.W_fc2, self.b_fc2,
        # ]
        with tf.name_scope("Train") as scope:
            if optimizer == "Adam":
                self.opt = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=epsilon)
            else:
                self.opt = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=momentum, epsilon=epsilon)
            self.grads_vars = self.opt.compute_gradients(self.cross_entropy)
            grads = []
            params = []
            for p in self.grads_vars:
                if p[0] == None:
                    continue
                grads.append(p[0])
                params.append(p[1])

            grads = tf.clip_by_global_norm(grads, 1)[0]
            self.grads_vars_updates = zip(grads, params)
            self.train_step = self.opt.apply_gradients(self.grads_vars_updates)
            # for grad, var in self.grads_vars:
            #     if grad == None:
            #         continue
            #     tf.summary.histogram(var.op.name + '/gradients', grad)
        with tf.name_scope("Evaluating") as scope:
            correct_prediction = tf.equal(tf.argmax(self.action_output,1), tf.argmax(self.actions,1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            accuracy_summary = tf.summary.scalar("accuracy", self.accuracy)
        # initialize all tensor variable parameters
        self.sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver()
        self.merged = tf.summary.merge_all()
        self.writer = tf.summary.FileWriter(self.path + self.folder + '/log_tb', self.sess.graph)
Esempio n. 19
0
def inference(input_tensor, train, regularizer):

    with tf.variable_scope('layer1-conv1_grassmann'):
        conv1_weights_g = tf.get_variable(
            "weight_g",
            shape=[CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1, seed=1))
        conv1_biases_g = tf.get_variable(
            "biases_g",
            shape=[CONV1_DEEP],
            initializer=tf.constant_initializer(0))

        conv1_weights_g_tmp = tf.get_variable(
            "weight_g_tmp",
            shape=[CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=1))

        conv1_biases_g_tmp = tf.get_variable(
            "biases_g_tmp",
            shape=[CONV1_DEEP],
            initializer=tf.constant_initializer(0.0))

        #卷积网络前向传播,这里步长为1且做全0填充,输出是28*28*32的矩阵,步幅就在第二个参数矩阵里面了。
        conv1_g = tf.nn.conv2d(input_tensor,
                               conv1_weights_g,
                               strides=[1, 1, 1, 1],
                               padding='SAME')
        conv1_batch_g = batch_norm.batch_norm(conv1_g, scale=None)
        relu1_g_grassmann = tf.nn.relu(
            tf.nn.bias_add(conv1_batch_g, conv1_biases_g))

    with tf.name_scope('layer2-pool1_grassmann'):
        pool1_g = tf.nn.max_pool(relu1_g_grassmann,
                                 ksize=[1, 2, 2, 1],
                                 strides=[1, 2, 2, 1],
                                 padding='SAME')  #第二个参数是步幅,第三个参数是步长
        pool1_batch_g_grassmann = batch_norm.batch_norm(pool1_g, scale=None)

    with tf.variable_scope('layer3-conv2_grassmann'):
        conv2_weights_g = tf.get_variable(
            'weight_g',
            shape=[CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1, seed=3))
        conv2_biases_g = tf.get_variable(
            'biases_g',
            shape=[CONV2_DEEP],
            initializer=tf.constant_initializer(0.0))

        conv2_weights_g_tmp = tf.get_variable(
            "weight_g_tmp",
            shape=[CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=1))

        conv2_biases_g_tmp = tf.get_variable(
            "biases_g_tmp",
            shape=[CONV2_DEEP],
            initializer=tf.constant_initializer(0.0))
        #卷积网络前向传播
        conv2_g = tf.nn.conv2d(pool1_batch_g_grassmann,
                               conv2_weights_g,
                               strides=[1, 1, 1, 1],
                               padding='SAME')
        conv2_batch_g = batch_norm.batch_norm(conv2_g, scale=None)
        relu2_g_grassmann = tf.nn.relu(
            tf.nn.bias_add(conv2_batch_g, conv2_biases_g))

    with tf.name_scope('layer4-pool2_grassmann'):
        pool2_g = tf.nn.max_pool(relu2_g_grassmann,
                                 ksize=[1, 2, 2, 1],
                                 strides=[1, 2, 2, 1],
                                 padding='SAME')  # 第二个参数是步幅,第三个参数是步长
        pool2_batch_g_grassmann = batch_norm.batch_norm(pool2_g, scale=None)

    with tf.variable_scope('layer5-fc1_grassmann'):
        pool_shape = pool2_batch_g_grassmann.get_shape().as_list()
        # pool_shape的第一个数据pool_shape[0]就是batch的大小
        nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
        # 重新改变输入的结构把它拉成一个向量做全连接
        reshaped_g = tf.reshape(pool2_batch_g_grassmann, [-1, nodes])
        fc1_weights_g = tf.get_variable(
            'weight_g',
            shape=[nodes, FC_SIZE],
            initializer=tf.truncated_normal_initializer(stddev=0.1, seed=5))

        fc1_weights_g_tmp = tf.get_variable(
            'weight_g_tmp',
            shape=[nodes, FC_SIZE],
            initializer=tf.truncated_normal_initializer(stddev=0.1))

        #只对全连接参数做正则化
        if regularizer != None:
            tf.add_to_collection('losses_g_grassmann',
                                 regularizer(fc1_weights_g))
        fc1_biases_g = tf.get_variable(
            'biases_g',
            shape=[FC_SIZE],
            initializer=tf.constant_initializer(0.0))

        fc1_biases_g_tmp = tf.get_variable(
            "biases_g_tmp",
            shape=[FC_SIZE],
            initializer=tf.constant_initializer(0.0))

        fc1_g_grassmann = tf.nn.relu(
            tf.matmul(reshaped_g, fc1_weights_g) + fc1_biases_g)

        if train:
            fc1_g_grassmann = tf.nn.dropout(fc1_g_grassmann, 0.5)

    with tf.variable_scope('layer6-fc2_grassmann'):
        fc2_weights_g = tf.get_variable(
            'weight_g',
            shape=[FC_SIZE, NUM_LABELS],
            initializer=tf.truncated_normal_initializer(stddev=0.1, seed=5))

        fc2_weights_g_tmp = tf.get_variable(
            'weight_g_tmp',
            shape=[FC_SIZE, NUM_LABELS],
            initializer=tf.truncated_normal_initializer(stddev=0.1))

        if regularizer != None:
            tf.add_to_collection('losses_g_grassmann',
                                 regularizer(fc2_weights_g))

        fc2_biases_g = tf.get_variable(
            'biases_g',
            shape=[NUM_LABELS],
            initializer=tf.constant_initializer(0.0))

        fc2_biases_g_tmp = tf.get_variable(
            "biases_g_tmp",
            shape=[NUM_LABELS],
            initializer=tf.constant_initializer(0.0))

        logit_g_grassmann = tf.matmul(fc1_g_grassmann,
                                      fc2_weights_g) + fc2_biases_g

        return logit_g_grassmann
def fc_bn_relu(inputTensor, shape, layer_name, bn_param, device):
  fc = full_connection_layer(inputTensor, shape, bias=False, layer_name=layer_name, device=device)
  bn = batch_norm(fc, bn_param=bn_param, scale=False, name=layer_name, device=device)
  return tf.nn.relu(bn)
Esempio n. 21
0
    def __init__(self,
                 sess,
                 height,
                 width,
                 phi_length,
                 n_actions,
                 name,
                 gamma=0.99,
                 copy_interval=4,
                 optimizer='RMS',
                 learning_rate=0.00025,
                 epsilon=0.01,
                 decay=0.95,
                 momentum=0.,
                 l2_decay=0.0001,
                 error_clip=1.0,
                 slow=False,
                 tau=0.01,
                 verbose=False,
                 path='',
                 folder='_networks',
                 decay_learning_rate=False,
                 transfer=False):
        """ Initialize network """
        Network.__init__(self, sess, name=name)
        self.gamma = gamma
        self.slow = slow
        self.tau = tau
        self.name = name
        self.sess = sess
        self.path = path
        self.folder = folder
        self.copy_interval = copy_interval
        self.update_counter = 0
        self.decay_learning_rate = decay_learning_rate

        self.observation = tf.placeholder(tf.float32,
                                          [None, height, width, phi_length],
                                          name=self.name + '_observation')
        self.actions = tf.placeholder(tf.float32,
                                      shape=[None, n_actions],
                                      name=self.name +
                                      "_actions")  # one-hot matrix
        self.next_observation = tf.placeholder(
            tf.float32, [None, height, width, phi_length],
            name=self.name + '_t_next_observation')
        self.rewards = tf.placeholder(tf.float32,
                                      shape=[None],
                                      name=self.name + "_rewards")
        self.terminals = tf.placeholder(tf.float32,
                                        shape=[None],
                                        name=self.name + "_terminals")

        self.slow_learnrate_vars = []
        self.fast_learnrate_vars = []

        self.observation_n = tf.div(self.observation, 255.)
        self.next_observation_n = tf.div(self.next_observation, 255.)

        # q network model:
        self.is_training = tf.placeholder(tf.bool, [])

        with tf.name_scope("Conv1") as scope:
            kernel_shape = [8, 8, phi_length, 32]
            self.W_conv1 = self.weight_variable(phi_length, kernel_shape,
                                                'conv1')
            #self.b_conv1 = self.bias_variable(kernel_shape, 'conv1')
            self.h_conv1_bn = batch_norm(self.conv2d(self.observation_n,
                                                     self.W_conv1, 4),
                                         32,
                                         self.is_training,
                                         self.sess,
                                         slow=self.slow,
                                         tau=self.tau)
            self.h_conv1 = tf.nn.relu(self.h_conv1_bn.bnorm,
                                      name=self.name + '_conv1_activations')
            tf.add_to_collection('conv_weights', self.W_conv1)
            tf.add_to_collection('conv_output', self.h_conv1)
            if transfer:
                self.slow_learnrate_vars.append(self.W_conv1)
                self.slow_learnrate_vars.append(self.h_conv1_bn.scale)
                self.slow_learnrate_vars.append(self.h_conv1_bn.beta)

        with tf.name_scope("Conv2") as scope:
            kernel_shape = [4, 4, 32, 64]
            self.W_conv2 = self.weight_variable(32, kernel_shape, 'conv2')
            #self.b_conv2 = self.bias_variable(kernel_shape, 'conv2')
            self.h_conv2_bn = batch_norm(self.conv2d(self.h_conv1,
                                                     self.W_conv2, 2),
                                         64,
                                         self.is_training,
                                         self.sess,
                                         slow=self.slow,
                                         tau=self.tau)
            self.h_conv2 = tf.nn.relu(self.h_conv2_bn.bnorm,
                                      name=self.name + '_conv2_activations')
            tf.add_to_collection('conv_weights', self.W_conv2)
            tf.add_to_collection('conv_output', self.h_conv2)
            if transfer:
                self.slow_learnrate_vars.append(self.W_conv2)
                self.slow_learnrate_vars.append(self.h_conv2_bn.scale)
                self.slow_learnrate_vars.append(self.h_conv2_bn.beta)

        with tf.name_scope("Conv3") as scope:
            kernel_shape = [3, 3, 64, 64]
            self.W_conv3 = self.weight_variable(64, kernel_shape, 'conv3')
            #self.b_conv3 = self.bias_variable(kernel_shape, 'conv3')
            self.h_conv3_bn = batch_norm(self.conv2d(self.h_conv2,
                                                     self.W_conv3, 1),
                                         64,
                                         self.is_training,
                                         self.sess,
                                         slow=self.slow,
                                         tau=self.tau)
            self.h_conv3 = tf.nn.relu(self.h_conv3_bn.bnorm,
                                      name=self.name + '_conv3_activations')
            tf.add_to_collection('conv_weights', self.W_conv3)
            tf.add_to_collection('conv_output', self.h_conv3)
            if transfer:
                self.slow_learnrate_vars.append(self.W_conv3)
                self.slow_learnrate_vars.append(self.h_conv3_bn.scale)
                self.slow_learnrate_vars.append(self.h_conv3_bn.beta)

        self.h_conv3_flat = tf.reshape(self.h_conv3, [-1, 3136])

        with tf.name_scope("FullyConnected1") as scope:
            kernel_shape = [3136, 512]
            self.W_fc1 = self.weight_variable_linear(kernel_shape, 'fc1')
            #self.b_fc1 = self.bias_variable(kernel_shape, 'fc1')
            self.h_fc1_bn = batch_norm(tf.matmul(self.h_conv3_flat,
                                                 self.W_fc1),
                                       512,
                                       self.is_training,
                                       self.sess,
                                       slow=self.slow,
                                       tau=self.tau,
                                       linear=True)
            self.h_fc1 = tf.nn.relu(self.h_fc1_bn.bnorm,
                                    name=self.name + '_fc1_activations')
            if transfer:
                self.fast_learnrate_vars.append(self.W_fc1)
                self.fast_learnrate_vars.append(self.h_fc1_bn.scale)
                self.fast_learnrate_vars.append(self.h_fc1_bn.beta)

        with tf.name_scope("FullyConnected2") as scope:
            kernel_shape = [512, n_actions]
            self.W_fc2 = self.weight_variable_linear(kernel_shape, 'fc2')
            self.b_fc2 = self.bias_variable_linear(kernel_shape, 'fc2')
            self.q_value = tf.add(tf.matmul(self.h_fc1, self.W_fc2),
                                  self.b_fc2,
                                  name=self.name + '_fc1_outputs')
            if transfer:
                self.fast_learnrate_vars.append(self.W_fc2)
                self.fast_learnrate_vars.append(self.b_fc2)

        if transfer:
            self.load_transfer_model(optimizer=optimizer.lower())
            # Scale down the last layer
            W_fc2_scaled = tf.scalar_mul(0.01, self.W_fc2)
            b_fc2_scaled = tf.scalar_mul(0.01, self.b_fc2)
            self.sess.run([
                self.W_fc2.assign(W_fc2_scaled),
                self.b_fc2.assign(b_fc2_scaled)
            ])

        if verbose:
            self.init_verbosity()

        # target q network model:
        self.t_is_training = tf.placeholder(tf.bool, [])
        with tf.name_scope("TConv1") as scope:
            kernel_shape = [8, 8, phi_length, 32]
            self.t_W_conv1 = self.weight_variable(phi_length, kernel_shape,
                                                  't_conv1')
            #self.t_b_conv1 = self.bias_variable(kernel_shape, 't_conv1')
            self.t_h_conv1_bn = batch_norm(self.conv2d(self.next_observation_n,
                                                       self.t_W_conv1, 4),
                                           32,
                                           self.t_is_training,
                                           self.sess,
                                           parForTarget=self.h_conv1_bn,
                                           slow=self.slow,
                                           tau=self.tau)
            self.t_h_conv1 = tf.nn.relu(self.t_h_conv1_bn.bnorm,
                                        name=self.name +
                                        '_t_conv1_activations')

        with tf.name_scope("TConv2") as scope:
            kernel_shape = [4, 4, 32, 64]
            self.t_W_conv2 = self.weight_variable(32, kernel_shape, 't_conv2')
            #self.t_b_conv2 = self.bias_variable(kernel_shape, 't_conv2')
            self.t_h_conv2_bn = batch_norm(self.conv2d(self.t_h_conv1,
                                                       self.t_W_conv2, 2),
                                           64,
                                           self.t_is_training,
                                           self.sess,
                                           parForTarget=self.h_conv2_bn,
                                           slow=self.slow,
                                           tau=self.tau)
            self.t_h_conv2 = tf.nn.relu(self.t_h_conv2_bn.bnorm,
                                        name=self.name +
                                        '_t_conv2_activations')

        with tf.name_scope("TConv3") as scope:
            kernel_shape = [3, 3, 64, 64]
            self.t_W_conv3 = self.weight_variable(64, kernel_shape, 't_conv3')
            #self.t_b_conv3 = self.bias_variable(kernel_shape, 't_conv3')
            self.t_h_conv3_bn = batch_norm(self.conv2d(self.t_h_conv2,
                                                       self.t_W_conv3, 1),
                                           64,
                                           self.t_is_training,
                                           self.sess,
                                           parForTarget=self.h_conv3_bn,
                                           slow=self.slow,
                                           tau=self.tau)
            self.t_h_conv3 = tf.nn.relu(self.t_h_conv3_bn.bnorm,
                                        name=self.name +
                                        '_t_conv3_activations')

        self.t_h_conv3_flat = tf.reshape(self.t_h_conv3, [-1, 3136])

        with tf.name_scope("TFullyConnected1") as scope:
            kernel_shape = [3136, 512]
            self.t_W_fc1 = self.weight_variable_linear(kernel_shape, 't_fc1')
            #self.t_b_fc1 = self.bias_variable(kernel_shape, 't_fc1')
            self.t_h_fc1_bn = batch_norm(tf.matmul(self.t_h_conv3_flat,
                                                   self.t_W_fc1),
                                         512,
                                         self.t_is_training,
                                         self.sess,
                                         parForTarget=self.h_fc1_bn,
                                         slow=self.slow,
                                         tau=self.tau,
                                         linear=True)
            self.t_h_fc1 = tf.nn.relu(self.t_h_fc1_bn.bnorm,
                                      name=self.name + '_t_fc1_activations')

        with tf.name_scope("TFullyConnected2") as scope:
            kernel_shape = [512, n_actions]
            self.t_W_fc2 = self.weight_variable_linear(kernel_shape, 't_fc2')
            self.t_b_fc2 = self.bias_variable_linear(kernel_shape, 't_fc2')
            self.t_q_value = tf.add(tf.matmul(self.t_h_fc1, self.t_W_fc2),
                                    self.t_b_fc2,
                                    name=self.name + '_t_fc1_outputs')

        if transfer:
            # only intialize tensor variables that are not loaded from the transfer model
            #self.sess.run(tf.variables_initializer(fast_learnrate_vars))
            self._global_vars_temp = set(tf.global_variables())

        # cost of q network
        #self.l2_regularizer_loss = l2_decay * (tf.reduce_sum(tf.pow(self.W_conv1, 2)) + tf.reduce_sum(tf.pow(self.W_conv2, 2)) + tf.reduce_sum(tf.pow(self.W_conv3, 2))  + tf.reduce_sum(tf.pow(self.W_fc1, 2)) + tf.reduce_sum(tf.pow(self.W_fc2, 2)))
        self.cost = self.build_loss(error_clip,
                                    n_actions)  #+ self.l2_regularizer_loss
        # self.parameters = [
        #     self.W_conv1, self.h_conv1_bn.scale, self.h_conv1_bn.beta,
        #     self.W_conv2, self.h_conv2_bn.scale, self.h_conv2_bn.beta,
        #     self.W_conv3, self.h_conv3_bn.scale, self.h_conv3_bn.beta,
        #     self.W_fc1, self.h_fc1_bn.scale, self.h_fc1_bn.beta,
        #     self.W_fc2, self.b_fc2,
        # ]
        with tf.name_scope("Train") as scope:
            if optimizer == "Graves":
                # Nature RMSOptimizer
                self.train_step, self.grads_vars = graves_rmsprop_optimizer(
                    self.cost, learning_rate, decay, epsilon, 1)
            else:
                if optimizer == "Adam":
                    self.opt = tf.train.AdamOptimizer(
                        learning_rate=learning_rate, epsilon=epsilon)
                elif optimizer == "RMS":
                    # Tensorflow RMSOptimizer
                    self.opt = tf.train.RMSPropOptimizer(learning_rate,
                                                         decay=decay,
                                                         momentum=momentum,
                                                         epsilon=epsilon)
                else:
                    print(colored("Unknown Optimizer!", "red"))
                    sys.exit()

                self.grads_vars = self.opt.compute_gradients(self.cost)
                grads = []
                params = []
                for p in self.grads_vars:
                    if p[0] == None:
                        continue
                    grads.append(p[0])
                    params.append(p[1])
                #grads = tf.clip_by_global_norm(grads, 1)[0]
                self.grads_vars_updates = zip(grads, params)
                self.train_step = self.opt.apply_gradients(
                    self.grads_vars_updates)

            # for grad, var in self.grads_vars:
            #     if grad == None:
            #         continue
            #     tf.summary.histogram(var.op.name + '/gradients', grad)

        if transfer:
            vars_diff = set(tf.global_variables()) - self._global_vars_temp
            self.sess.run(tf.variables_initializer(vars_diff))
            self.sess.run(
                tf.variables_initializer([
                    self.t_h_conv1_bn.pop_mean, self.t_h_conv1_bn.pop_var,
                    self.t_h_conv2_bn.pop_mean, self.t_h_conv2_bn.pop_var,
                    self.t_h_conv3_bn.pop_mean, self.t_h_conv3_bn.pop_var,
                    self.t_h_fc1_bn.pop_mean, self.t_h_fc1_bn.pop_var
                ]))
        else:
            # initialize all tensor variable parameters
            self.sess.run(tf.global_variables_initializer())

        # Make sure q and target model have same initial parameters copy the parameters
        self.sess.run([
            self.t_W_conv1.assign(
                self.W_conv1),  #self.t_b_conv1.assign(self.b_conv1),
            self.t_W_conv2.assign(
                self.W_conv2),  #self.t_b_conv2.assign(self.b_conv2),
            self.t_W_conv3.assign(
                self.W_conv3),  #self.t_b_conv3.assign(self.b_conv3),
            self.t_W_fc1.assign(self.W_fc1),  #self.t_b_fc1.assign(self.b_fc1),
            self.t_W_fc2.assign(self.W_fc2),
            self.t_b_fc2.assign(self.b_fc2),
            self.t_h_conv1_bn.scale.assign(self.h_conv1_bn.scale),
            self.t_h_conv1_bn.beta.assign(self.h_conv1_bn.beta),
            self.t_h_conv2_bn.scale.assign(self.h_conv2_bn.scale),
            self.t_h_conv2_bn.beta.assign(self.h_conv2_bn.beta),
            self.t_h_conv3_bn.scale.assign(self.h_conv3_bn.scale),
            self.t_h_conv3_bn.beta.assign(self.h_conv3_bn.beta),
            self.t_h_fc1_bn.scale.assign(self.h_fc1_bn.scale),
            self.t_h_fc1_bn.beta.assign(self.h_fc1_bn.beta)
        ])

        if self.slow:
            self.update_target_op = [
                self.t_W_conv1.assign(self.tau * self.W_conv1 +
                                      (1 - self.tau) * self.t_W_conv1
                                      ),  #self.t_b_conv1.assign(self.b_conv1),
                self.t_W_conv2.assign(self.tau * self.W_conv2 +
                                      (1 - self.tau) * self.t_W_conv2
                                      ),  #self.t_b_conv2.assign(self.b_conv2),
                self.t_W_conv3.assign(self.tau * self.W_conv3 +
                                      (1 - self.tau) * self.t_W_conv3
                                      ),  #self.t_b_conv3.assign(self.b_conv3),
                self.t_W_fc1.assign(self.tau * self.W_fc1 +
                                    (1 - self.tau) * self.t_W_fc1
                                    ),  #self.t_b_fc1.assign(self.b_fc1),
                self.t_W_fc2.assign(self.tau * self.W_fc2 +
                                    (1 - self.tau) * self.t_W_fc2),
                self.t_b_fc2.assign(self.tau * self.b_fc2 +
                                    (1 - self.tau) * self.t_b_fc2),
                self.t_h_conv1_bn.updateTarget,
                self.t_h_conv2_bn.updateTarget,
                self.t_h_conv3_bn.updateTarget,
                self.t_h_fc1_bn.updateTarget
            ]
        else:
            self.update_target_op = [
                self.t_W_conv1.assign(
                    self.W_conv1),  #self.t_b_conv1.assign(self.b_conv1),
                self.t_W_conv2.assign(
                    self.W_conv2),  #self.t_b_conv2.assign(self.b_conv2),
                self.t_W_conv3.assign(
                    self.W_conv3),  #self.t_b_conv3.assign(self.b_conv3),
                self.t_W_fc1.assign(
                    self.W_fc1),  #self.t_b_fc1.assign(self.b_fc1),
                self.t_W_fc2.assign(self.W_fc2),
                self.t_b_fc2.assign(self.b_fc2),
                self.t_h_conv1_bn.updateTarget,
                self.t_h_conv2_bn.updateTarget,
                self.t_h_conv3_bn.updateTarget,
                self.t_h_fc1_bn.updateTarget
            ]

        self.saver = tf.train.Saver()
        self.merged = tf.summary.merge_all()
        self.writer = tf.summary.FileWriter(
            self.path + self.folder + '/log_tb', self.sess.graph)
def conv_bn_relu(inputTensor, shape, bn_param, device):
  conv = convolution_layer(inputTensor, shape, strides=[1,1,1,1], bias=False, layer_name='conv', device=device)
  bn = batch_norm(conv, bn_param=bn_param, scale=False, device = device)
  return tf.nn.relu(bn)
Esempio n. 23
0
def net_multi_conv(X0,X1,X2,_dropout,conf,doBatchNorm,trainPhase):
    imsz = conf.imsz; rescale = conf.rescale
    pool_scale = conf.pool_scale
    nfilt = conf.nfilt
    pool_stride = conf.pool_stride
    pool_size = conf.pool_size

    #     conv5_0,base_dict_0 = net_multi_base(X0,_weights['base0'])
    #     conv5_1,base_dict_1 = net_multi_base(X1,_weights['base1'])
    #     conv5_2,base_dict_2 = net_multi_base(X2,_weights['base2'])
    if conf.dilation_rate is 4:
        net_to_use = net_multi_base_named_dilated
    else:
        net_to_use = net_multi_base_named

    with tf.variable_scope('scale0'):
        conv5_0,base_dict_0 = net_to_use(X0,nfilt,doBatchNorm,trainPhase,
                                                   pool_stride,pool_size,conf)
    with tf.variable_scope('scale1'):
        conv5_1,base_dict_1 = net_to_use(X1,nfilt,doBatchNorm,trainPhase,
                                                   pool_stride,pool_size,conf)
    with tf.variable_scope('scale2'):
        conv5_2,base_dict_2 = net_to_use(X2,nfilt,doBatchNorm,trainPhase,
                                                   pool_stride,pool_size,conf)

    sz0 = int(math.ceil(float(imsz[0])/pool_scale/rescale))
    sz1 = int(math.ceil(float(imsz[1])/pool_scale/rescale))
    conv5_1_up = upscale('5_1',conv5_1,[sz0,sz1])
    conv5_2_up = upscale('5_2',conv5_2,[sz0,sz1])

    # crop lower res layers to match higher res size
    conv5_0_sz = tf.Tensor.get_shape(conv5_0).as_list()
    conv5_1_sz = tf.Tensor.get_shape(conv5_1_up).as_list()
    crop_0 = int(old_div((sz0-conv5_0_sz[1]),2))
    crop_1 = int(old_div((sz1-conv5_0_sz[2]),2))

    curloc = [0,crop_0,crop_1,0]
    patchsz = tf.to_int32([-1,conv5_0_sz[1],conv5_0_sz[2],-1])
    conv5_1_up = tf.slice(conv5_1_up,curloc,patchsz)
    conv5_2_up = tf.slice(conv5_2_up,curloc,patchsz)
    conv5_1_final_sz = tf.Tensor.get_shape(conv5_1_up).as_list()
#     print("Initial lower res layer size %s"%(', '.join(map(str,conv5_1_sz))))
#     print("Initial higher res layer size %s"%(', '.join(map(str,conv5_0_sz))))
#     print("Crop start lower res layer at %s"%(', '.join(map(str,curloc))))
#     print("Final size of lower res layer %s"%(', '.join(map(str,conv5_1_final_sz))))


    conv5_cat = tf.concat([conv5_0,conv5_1_up,conv5_2_up],3)
    
    # Reshape conv5 output to fit dense layer input
#     conv6 = conv2d('conv6',conv5_cat,_weights['wd1'],_weights['bd1']) 
#     conv6 = tf.nn.dropout(conv6,_dropout)
#     conv7 = conv2d('conv7',conv6,_weights['wd2'],_weights['bd2']) 
#     conv7 = tf.nn.dropout(conv7,_dropout)

    with tf.variable_scope('layer6'):
        if hasattr(conf, 'dilation_rate'):
            dilation_rate = [conf.dilation_rate, conf.dilation_rate]
        else:
            dilation_rate = [1, 1]
        weights = tf.get_variable("weights", [conf.psz,conf.psz,conf.numscale*nfilt,conf.nfcfilt],
                                  initializer=tf.contrib.layers.xavier_initializer())
        biases = tf.get_variable("biases", conf.nfcfilt,
                                 initializer=tf.constant_initializer(1))
        conv6 = tf.nn.convolution(conv5_cat, weights,
                            strides=[1, 1], padding='SAME',dilation_rate=dilation_rate)
        if doBatchNorm:
            conv6 = batch_norm(conv6, trainPhase)
        conv6 = tf.nn.relu(conv6 + biases)
        conv6 = tf.nn.dropout(conv6,_dropout,
                          [conf.batch_size,1,1,conf.nfcfilt])

    with tf.variable_scope('layer7'):
        conv7 = conv_relu(conv6,[1,1,conf.nfcfilt,conf.nfcfilt],
                          0.005,1,doBatchNorm,trainPhase) 
        # if not doBatchNorm:
        conv7 = tf.nn.dropout(conv7,_dropout,
                              [conf.batch_size,1,1,conf.nfcfilt])

# Output, class prediction
#     out = tf.nn.bias_add(tf.nn.conv2d(
#             conv7, _weights['wd3'], 
#             strides=[1, 1, 1, 1], padding='SAME'),_weights['bd3'])

    with tf.variable_scope('layer8'):
        l8_weights = tf.get_variable("weights", [1,1,conf.nfcfilt,conf.n_classes],
            initializer=tf.random_normal_initializer(stddev=0.01))
        l8_biases = tf.get_variable("biases", conf.n_classes,
            initializer=tf.constant_initializer(0))
        out = tf.nn.conv2d(conv7, l8_weights,
            strides=[1, 1, 1, 1], padding='SAME') + l8_biases
#   No batch norm for the output layer.

    out_dict = {'base_dict_0':base_dict_0,
                'base_dict_1':base_dict_1,
                'base_dict_2':base_dict_2,
                'conv6':conv6,
                'conv7':conv7,
               }
    
    return out,out_dict
Esempio n. 24
0
def build_model(input_var):
    network = lasagne.layers.InputLayer(shape=(None, 3, 32, 32),
                                        input_var=input_var)
    network = batch_norm(lasagne.layers.Conv2DLayer(
            network, num_filters=96, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.HeNormal(), pad='same'))
    #network = lasagne.layers.dropout(network, p=0.4)
    network = batch_norm(lasagne.layers.Conv2DLayer(
            network, num_filters=96, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.HeNormal(), pad='same'))
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))

    network = batch_norm(lasagne.layers.Conv2DLayer(
            network, num_filters=192, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.HeNormal(), pad='same'))
    #network = lasagne.layers.dropout(network, p=0.4)
    network = batch_norm(lasagne.layers.Conv2DLayer(
            network, num_filters=192, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.HeNormal(), pad='same'))        
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
            
    network = batch_norm(lasagne.layers.Conv2DLayer(
            network, num_filters=256, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.HeNormal(), pad='same'))
    #network = lasagne.layers.dropout(network, p=0.4)
    network = batch_norm(lasagne.layers.Conv2DLayer(
            network, num_filters=256, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.HeNormal(), pad='same'))
    #network = lasagne.layers.dropout(network, p=0.4)
    #network = batch_norm(lasagne.layers.Conv2DLayer(
            #network, num_filters=256, filter_size=(3, 3),
            #nonlinearity=lasagne.nonlinearities.rectify,
            #W=lasagne.init.HeNormal(), pad='same'))
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
    
    network = batch_norm(lasagne.layers.Conv2DLayer(
            network, num_filters=512, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.HeNormal(), pad='same'))
    #network = lasagne.layers.dropout(network, p=0.4)
    network = batch_norm(lasagne.layers.Conv2DLayer(
            network, num_filters=512, filter_size=(3, 3),
            nonlinearity=lasagne.nonlinearities.rectify,
            W=lasagne.init.HeNormal(), pad='same'))
    #network = lasagne.layers.dropout(network, p=0.4)
    #network = batch_norm(lasagne.layers.Conv2DLayer(
            #network, num_filters=512, filter_size=(3, 3),
            #nonlinearity=lasagne.nonlinearities.rectify,
            #W=lasagne.init.HeNormal(), pad='same'))
    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
    
    #network = batch_norm(lasagne.layers.Conv2DLayer(
            #network, num_filters=512, filter_size=(3, 3),
            #nonlinearity=lasagne.nonlinearities.rectify,
            #W=lasagne.init.HeNormal(), pad='same'))
    ##network = lasagne.layers.dropout(network, p=0.4)
    #network = batch_norm(lasagne.layers.Conv2DLayer(
            #network, num_filters=512, filter_size=(3, 3),
            #nonlinearity=lasagne.nonlinearities.rectify,
            #W=lasagne.init.HeNormal(), pad='same'))
    ##network = lasagne.layers.dropout(network, p=0.4)
    #network = batch_norm(lasagne.layers.Conv2DLayer(
            #network, num_filters=512, filter_size=(3, 3),
            #nonlinearity=lasagne.nonlinearities.rectify,
            #W=lasagne.init.HeNormal(), pad='same'))
    #network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
    ##network = lasagne.layers.dropout(network, p=0.5)
    #network = batch_norm(lasagne.layers.DenseLayer(
            #network, num_units=512,
            #nonlinearity=lasagne.nonlinearities.rectify, 
            #W=lasagne.init.HeNormal()))
       
    #network = lasagne.layers.dropout(network, p=0.5)
    network = lasagne.layers.DenseLayer(network,
            num_units=200, W=lasagne.init.HeNormal(), 
            nonlinearity=lasagne.nonlinearities.rectify)
    
    network = lasagne.layers.DenseLayer(network, num_units=10, 
                                        W=lasagne.init.HeNormal(), 
                                        nonlinearity=None)
    
    return network
deconv32 = tf.nn.conv2d_transpose(
  value=net['relu7'],
  filter=tf.Variable(tf.truncated_normal(shape=(32, 32, 1, 512), mean=1.0)),
  output_shape=tf.pack((tf.shape(net['relu7'])[0],tf.shape(input_image)[1],tf.shape(input_image)[2],1)),
  strides=(1, 32, 32, 1), padding='SAME') + tf.Variable(tf.truncated_normal(shape=(1,),stddev=0.1), dtype=tf.float32)
  
#Concatenate them, one deconvolution per channel
deconvs = tf.concat(3,(deconv8, deconv16, deconv32))

#One last convolution to rule them all
conv    = tf.nn.conv2d(deconvs, tf.Variable(tf.truncated_normal(shape=(1,1,3,21), mean=1.0), dtype=tf.float32),
  strides=(1,1,1,1), padding="SAME") + tf.Variable(tf.truncated_normal(shape=(21,),stddev=0.1), dtype=tf.float32)
  
#Batch normalization
from batch_norm import batch_norm
bn = batch_norm(conv, scale=True, is_training=True)
  
#Network estimate
exp = tf.exp(bn)
norm = tf.reduce_sum(exp, reduction_indices=3, keep_dims=True)
y_hat = tf.div(exp, norm)

##########################################
########TRAIN DECONVOLUTION LAYERS########
##########################################

#Test data
indices = tf.placeholder(tf.int64, shape=(None,None,None))
targets = tf.one_hot(indices=indices, depth=21, on_value=1.0, off_value=0.0, axis=-1)

#Loss function (cross-entropy)
Esempio n. 26
0
def model18(w=32, h=32, c=1,
            nb_filters=64,
            size_filters_enc=5,
            size_filters_dec=5,
            nb_hidden=100,
            sparsity=True,
            use_batch_norm=False,
            nb_filters_mul=2,
            nonlin=rectify,
            stride=2,
            nb_layers_enc=2,
            nb_layers_dec=2):
    """
    stadard conv aa without any sparsity
    """
    s = size_filters_enc
    l_in = layers.InputLayer((None, c, w, h), name="input")
    l_conv = l_in
    l_convs = []
    for i in range(nb_layers_enc):
        l_conv = layers.Conv2DLayer(
                l_conv,
                num_filters=nb_filters * (nb_filters_mul**i),
                filter_size=(s, s),
                nonlinearity=nonlin,
                W=init.GlorotUniform(),
                stride=stride,
                name="conv{}".format(i))
        if use_batch_norm:
            l_conv = batch_norm(l_conv)
        print(l_conv.output_shape)
        l_convs.append(l_conv)

    lastconv_num_units = np.prod(l_conv.output_shape[1:])
    lastconv_shape = l_conv.output_shape[1:]

    z_mean = layers.DenseLayer(
        l_conv,
        num_units=nb_hidden,
        nonlinearity=linear,
        name="z_mean")
    z_log_sigma = layers.DenseLayer(
        l_conv,
        num_units=nb_hidden,
        nonlinearity=linear,
        name="z_log_sigma")

    encoder = [l_in] + l_convs + [z_mean, z_log_sigma]

    z_in = layers.InputLayer((None, nb_hidden), name="input")
    l_unconv = layers.DenseLayer(z_in, num_units=lastconv_num_units, name="unconv0")
    l_unconvs = [l_unconv]
    l_unconv = layers.ReshapeLayer(l_unconv, ([0],) +lastconv_shape, name="unconv0")
    s = size_filters_dec
    for i in range(nb_layers_dec):
        print(l_unconv.output_shape)
        if i == nb_layers_dec - 1:
            if sparsity:
                l_unconv = layers.NonlinearityLayer(l_unconv, wta_spatial, name="wta_spatial")
                l_unconv = layers.NonlinearityLayer(l_unconv, wta_channel, name="wta_channel")
            nonlin_cur = linear
            nb = c
            name = "output"
        else:
            nonlin_cur = nonlin
            nb = nb_filters * 2**(nb_layers_dec - i - 1)
            name = "unconv{}".format(i + 1)
        if stride==1:
            l_unconv = layers.Conv2DLayer(
                l_unconv,
                num_filters=nb,
                filter_size=(s, s),
                nonlinearity=nonlin_cur,
                W=init.GlorotUniform(),
                pad='full',
                name=name)
        else:
            l_unconv = Deconv2DLayer(
                    l_unconv,
                    num_filters=nb,
                    filter_size=(s, s),
                    nonlinearity=nonlin_cur,
                    W=init.GlorotUniform(),
                    stride=stride,
                    name=name)
        if use_batch_norm:
            l_unconv = batch_norm(l_unconv)
        print(l_unconv.output_shape)
        l_unconvs.append(l_unconv)
    decoder = [z_in] + l_unconvs
    return encoder , decoder
Esempio n. 27
0
def build_model(d_params, g_params, s_params, options):

    trng = RandomStreams(SEED)
    x = tensor.matrix('x', dtype='int32')  # n_sample * n_emb  where is n_word
    if options['debug']:
        x.tag.test_value = np.random.randint(2, size=(64, 40)).astype(
            'int32')  # batchsize * sent_len(n_word)  item: 0-voc_size
    # Used for dropout.
    use_noise = theano.shared(numpy_floatX(0.))

    # generative model part
    z = tensor.matrix('z', dtype='float32')  # n_batch * n_feature
    n_z = z.shape[0]

    n_samples = options['batch_size']
    n_words = options['n_words']
    n_x = d_params['Wemb'].shape[1]  #embeding dim
    if options['shareLSTM']:
        h_decoder = decoder_g(g_params,
                              z,
                              options,
                              max_step=options['max_step'],
                              prefix='decoder_0')
    else:
        z_code = tensor.cast(z[:, 0], dtype='int32')
        h_decoder = tensor.zeros(
            [options['max_step'], n_samples, options['n_h']])

        h_temp = []
        for idx in range(options['n_codes']):
            temp_idx = tensor.eq(z_code, idx).nonzero()[0]
            if options['sharedEmb']:
                h_decoder_temp = decoder_emb_from_d(
                    g_params,
                    d_params,
                    z[:, 1:],
                    options,
                    max_step=options['max_step'],
                    prefix=_p('decoder', idx))
            else:
                h_decoder_temp = decoder_g(g_params,
                                           z[:, 1:],
                                           options,
                                           max_step=options['max_step'],
                                           prefix=_p('decoder', idx))
            h_temp.append(h_decoder_temp)
            h_decoder = tensor.inc_subtensor(h_decoder[:, temp_idx, :],
                                             h_temp[idx][:, temp_idx, :])

    #h_decoder = dropout(h_decoder, trng, use_noise)
    # reconstruct the original sentence
    shape_w = h_decoder.shape  # n_step, n_sample , n_h
    h_decoder = h_decoder.reshape((shape_w[0] * shape_w[1], shape_w[2]))

    # pred_w: (n_steps * n_samples) * n_words
    if options['sharedEmb']:
        Vhid = tensor.dot(g_params['Vhid'], d_params['Wemb'].T)
    else:
        Vhid = tensor.dot(g_params['Vhid'], g_params['Wemb'].T)
    pred_w = tensor.dot(h_decoder, Vhid) + g_params['bhid']
    n_steps = shape_w[0]

    #  nondifferentiable
    if options['delta'] > 1e-10:
        pred_w = tensor.switch(tensor.ge(pred_w, options['delta']), pred_w, 0)
    #pred_w = tensor.nnet.softmax(pred_w*options['L'])
    max_w = tensor.max(pred_w, axis=1, keepdims=True)
    e0 = tensor.exp((pred_w - max_w) * options['L'])
    pred_w = e0 / tensor.sum(e0, axis=1, keepdims=True)

    max_print = tensor.max(pred_w, axis=1)
    max_print = max_print.reshape((n_steps, n_samples)).dimshuffle(1, 0)

    pred_w = pred_w.reshape(
        (n_steps, n_samples,
         n_words)).dimshuffle(1, 0, 2)  # reshape need parenthesis

    if options['force_cut'] == 'cut':
        rng_temp = tensor.minimum(
            -tensor.sum(tensor.log(trng.uniform(
                (n_samples, 6))), axis=1) * 3.3, options['max_step'] - 5)
        rng_length = tensor.floor(rng_temp).astype('int32')  #gamma(6,3.3)
        # pred_mask = tensor.zeros(pred_w.shape)
        period = options['period']
        # should use set values
        for i in xrange(n_samples):
            pred_w = tensor.set_subtensor(pred_w[i, rng_length[i]:, :], 0)
            pred_w = tensor.set_subtensor(pred_w[i, rng_length[i], period], 1)
            pred_w = tensor.set_subtensor(pred_w[i, (rng_length[i] + 1):, 0],
                                          1)
    elif options['force_cut'] == 'strip':
        for i in xrange(n_samples):
            pred_w = tensor.set_subtensor(
                pred_w[i, options['max_step'] - 1, 0], 1)
            idx_end = theano.tensor.eq(tensor.argmax(pred_w[i, :, :], axis=1),
                                       0).nonzero()[0][0]
            pred_w = tensor.set_subtensor(pred_w[i, (idx_end + 1):, 0], 1)
            pred_w = tensor.set_subtensor(pred_w[i, (idx_end + 1):, 1:], 0)

    pad = max(options['filter_hs']) - 1
    end_mat = tensor.concatenate([
        tensor.ones([n_samples, pad, 1]),
        tensor.zeros([n_samples, pad, n_words - 1])
    ],
                                 axis=2)
    pred_w = tensor.concatenate([end_mat, pred_w, end_mat], axis=1)

    n_steps = n_steps + 2 * pad
    pred_w = pred_w.reshape((n_steps * n_samples, n_words))

    # should be d's embeding
    fake_input = tensor.dot(pred_w, d_params['Wemb'])

    #  real[ 64   1  68 300] fake[ 64   1  41 300]
    fake_input = fake_input.reshape(
        (n_samples, 1, n_steps, d_params['Wemb'].shape[1]))  #(64,1,  )
    use_noise2 = theano.shared(numpy_floatX(0.))
    fake_input = dropout(fake_input, trng, use_noise2)

    # fake feature output
    fake_outputs1 = []
    for i in xrange(len(options['filter_hs'])):
        filter_shape = options['filter_shapes'][i]
        pool_size = options['pool_sizes'][i]
        conv_layer = encoder(d_params,
                             fake_input,
                             filter_shape,
                             pool_size,
                             options,
                             prefix=_p('cnn_d', i))
        fake_output1 = conv_layer
        fake_outputs1.append(fake_output1)

    fake_output1 = tensor.concatenate(fake_outputs1, 1)  # should be 64*900
    if options['batch_norm']:
        fake_output1 = batch_norm(d_params,
                                  fake_output1,
                                  options,
                                  prefix='fake')

    if options['cnn_activation'] == 'tanh':
        fake_pred = mlp_layer_linear(d_params, fake_output1, prefix='dis_d')
    elif options['cnn_activation'] == 'linear':
        fake_pred = mlp_layer_linear(d_params,
                                     tensor.tanh(fake_output1),
                                     prefix='dis_d')  #

    if not options['wgan']:
        fake_pred = tensor.nnet.sigmoid(fake_pred) * (
            1 - 2 * options['label_smoothing']) + options['label_smoothing']

    # for reverse model
    # if options['reverse']:
    fake_recon = mlp_layer_tanh(d_params, fake_output1, prefix='recon')
    r_t = fake_recon / 2.0 + .5
    z_t = z / 2.0 + .5
    r_cost = (-z_t * tensor.log(r_t + 0.0001) -
              (1. - z_t) * tensor.log(1.0001 - r_t)).sum() / n_samples / n_z

    # Proposal nets (for infogan)
    fake_outputs2 = []
    for i in xrange(len(options['filter_hs'])):
        filter_shape = options['filter_shapes'][i]
        pool_size = options['pool_sizes'][i]
        conv_layer = encoder(g_params,
                             fake_input,
                             filter_shape,
                             pool_size,
                             options,
                             prefix=_p('cnn_d', i))
        fake_output2 = conv_layer
        fake_outputs2.append(fake_output2)
    fake_output2 = tensor.concatenate(
        fake_outputs2, 1)  # should be 64*900     # why it is 64*0???

    # check whether to use softmax or tanh
    fake_propose = mlp_layer_tanh(g_params, fake_output2, prefix='dis_q')
    fake_propose = (fake_propose + 1) / 2
    fake_propose = tensor.log(fake_propose)
    z_code = tensor.cast(z[:, 0], dtype='int32')
    z_index = tensor.arange(n_z)
    fake_logent = fake_propose[z_index, z_code]
    l_I = tensor.sum(fake_logent)

    # Wemb: voc_size(n_words) * n_emb       64* 1* 40 *48
    real_input = d_params['Wemb'][tensor.cast(
        x.flatten(), dtype='int32')].reshape(
            (x.shape[0], 1, x.shape[1],
             d_params['Wemb'].shape[1]))  # n_sample,1,n_length,n_emb
    real_input = dropout(real_input, trng, use_noise2)

    real_outputs = []
    for i in xrange(len(options['filter_hs'])):
        filter_shape = options['filter_shapes'][i]
        pool_size = options['pool_sizes'][i]
        conv_layer2 = encoder(d_params,
                              real_input,
                              filter_shape,
                              pool_size,
                              options,
                              prefix=_p('cnn_d', i))
        real_output = conv_layer2
        real_outputs.append(real_output)
    real_output = tensor.concatenate(real_outputs, 1)

    if options['batch_norm']:
        real_output = batch_norm(d_params, real_output, options, prefix='real')

    if options['cnn_activation'] == 'tanh':
        real_pred = mlp_layer_linear(d_params, real_output, prefix='dis_d')
    elif options['cnn_activation'] == 'linear':
        real_pred = mlp_layer_linear(d_params,
                                     tensor.tanh(real_output),
                                     prefix='dis_d')

    if not options['wgan']:
        real_pred = tensor.nnet.sigmoid(real_pred) * (
            1 - 2 * options['label_smoothing']) + options['label_smoothing']

    #Compute for KDE
    mu = real_output
    X = fake_output1
    KDE = cal_nkde(X, mu, options['kde_sigma'])

    #calculate KDE on real_input and fake_input
    X_i = fake_input.reshape((n_samples, n_steps * d_params['Wemb'].shape[1]))
    mu_i = real_input.reshape((n_samples, n_steps * d_params['Wemb'].shape[1]))
    KDE_input = cal_nkde(X_i, mu_i, options['kde_sigma'])

    # sufficient statistics
    cur_size = s_params['seen_size'] * 1.0
    identity = tensor.eye(options['n_z']) * options['diag']
    fake_mean = tensor.mean(fake_output1, axis=0)
    real_mean = tensor.mean(real_output, axis=0)
    fake_xx = tensor.dot(fake_output1.T, fake_output1)
    real_xx = tensor.dot(real_output.T, real_output)
    acc_fake_xx = (s_params['acc_fake_xx'] * cur_size + fake_xx) / (cur_size +
                                                                    n_samples)
    acc_real_xx = (s_params['acc_real_xx'] * cur_size + real_xx) / (cur_size +
                                                                    n_samples)
    acc_fake_mean = (s_params['acc_fake_mean'] * cur_size +
                     fake_mean * n_samples) / (cur_size + n_samples)
    acc_real_mean = (s_params['acc_real_mean'] * cur_size +
                     real_mean * n_samples) / (cur_size + n_samples)

    cov_fake = acc_fake_xx - tensor.dot(acc_fake_mean.dimshuffle(0, 'x'),
                                        acc_fake_mean.dimshuffle(
                                            0, 'x').T) + identity
    cov_real = acc_real_xx - tensor.dot(acc_real_mean.dimshuffle(0, 'x'),
                                        acc_real_mean.dimshuffle(
                                            0, 'x').T) + identity

    cov_fake_inv = tensor.nlinalg.matrix_inverse(cov_fake)
    cov_real_inv = tensor.nlinalg.matrix_inverse(cov_real)

    if options['feature_match'] == 'moment':
        temp1 = ((fake_mean - real_mean)**2).sum()
        fake_obj = temp1

    elif options['feature_match'] == 'JSD_acc':

        temp1 = tensor.nlinalg.trace(
            tensor.dot(cov_fake_inv, cov_real) +
            tensor.dot(cov_real_inv, cov_fake))
        temp2 = tensor.dot(
            tensor.dot((acc_fake_mean - acc_real_mean),
                       (cov_fake_inv + cov_real_inv)),
            (acc_fake_mean - acc_real_mean).T)

        fake_obj = temp1 + temp2

    elif options['feature_match'] == 'mmd':
        #### too many nodes, use scan ####
        kxx, kxy, kyy = 0, 0, 0
        dividend = 1
        dist_x, dist_y = fake_output1 / dividend, real_output / dividend
        x_sq = tensor.sum(dist_x**2, axis=1).dimshuffle(0, 'x')  #  64*1
        y_sq = tensor.sum(dist_y**2, axis=1).dimshuffle(0, 'x')  #  64*1
        tempxx = -2 * tensor.dot(dist_x,
                                 dist_x.T) + x_sq + x_sq.T  # (xi -xj)**2
        tempxy = -2 * tensor.dot(dist_x,
                                 dist_y.T) + x_sq + y_sq.T  # (xi -yj)**2
        tempyy = -2 * tensor.dot(dist_y,
                                 dist_y.T) + y_sq + y_sq.T  # (yi -yj)**2

        for sigma in options['sigma_range']:
            kxx += tensor.mean(tensor.exp(-tempxx / 2 / (sigma**2)))
            kxy += tensor.mean(tensor.exp(-tempxy / 2 / (sigma**2)))
            kyy += tensor.mean(tensor.exp(-tempyy / 2 / (sigma**2)))

        fake_obj = tensor.sqrt(kxx + kyy - 2 * kxy)

    elif options['feature_match'] == 'mmd_cov':
        kxx, kxy, kyy = 0, 0, 0
        cov_sum = (cov_fake + cov_real) / 2
        cov_sum_inv = tensor.nlinalg.matrix_inverse(cov_sum)

        dividend = 1
        dist_x, dist_y = fake_output1 / dividend, real_output / dividend
        cov_inv_mat = cov_sum_inv
        x_sq = tensor.sum(tensor.dot(dist_x, cov_inv_mat) * dist_x,
                          axis=1).dimshuffle(0, 'x')
        y_sq = tensor.sum(tensor.dot(dist_y, cov_inv_mat) * dist_y,
                          axis=1).dimshuffle(0, 'x')

        tempxx = -2 * tensor.dot(tensor.dot(dist_x, cov_inv_mat),
                                 dist_x.T) + x_sq + x_sq.T  # (xi -xj)**2
        tempxy = -2 * tensor.dot(tensor.dot(dist_x, cov_inv_mat),
                                 dist_y.T) + x_sq + y_sq.T  # (xi -yj)**2
        tempyy = -2 * tensor.dot(tensor.dot(dist_y, cov_inv_mat),
                                 dist_y.T) + y_sq + y_sq.T  # (yi -yj)**2

        for sigma in options['sigma_range']:
            kxx += tensor.mean(tensor.exp(-tempxx / 2 / (sigma**2)))
            kxy += tensor.mean(tensor.exp(-tempxy / 2 / (sigma**2)))
            kyy += tensor.mean(tensor.exp(-tempyy / 2 / (sigma**2)))
        fake_obj = tensor.sqrt(kxx + kyy - 2 * kxy)

    elif options['feature_match'] == 'mmd_ld':

        kxx, kxy, kyy = 0, 0, 0
        real_mmd = mlp_layer_tanh(d_params, real_output, prefix='dis_mmd')
        fake_mmd = mlp_layer_tanh(d_params, fake_output1, prefix='dis_mmd')

        dividend = options['dim_mmd']  # for numerical stability & scale with
        dist_x, dist_y = fake_mmd / dividend, real_mmd / dividend
        x_sq = tensor.sum(dist_x**2, axis=1).dimshuffle(0, 'x')  #  64*1
        y_sq = tensor.sum(dist_y**2, axis=1).dimshuffle(0, 'x')  #  64*1
        tempxx = -2 * tensor.dot(dist_x,
                                 dist_x.T) + x_sq + x_sq.T  # (xi -xj)**2
        tempxy = -2 * tensor.dot(dist_x,
                                 dist_y.T) + x_sq + y_sq.T  # (xi -yj)**2
        tempyy = -2 * tensor.dot(dist_y,
                                 dist_y.T) + y_sq + y_sq.T  # (yi -yj)**2

        for sigma in options['sigma_range']:
            kxx += tensor.exp(-tempxx / 2 / sigma).sum()
            kxy += tensor.exp(-tempxy / 2 / sigma).sum()
            kyy += tensor.exp(-tempyy / 2 / sigma).sum()

        fake_obj = tensor.sqrt(kxx + kyy - 2 * kxy)

    elif options['feature_match'] == 'mmd_h':
        #### too many nodes, use scan ####

        kxx, kxy, kyy = 0, 0, 0

        if options['cnn_activation'] == 'tanh':
            fake_mmd = middle_layer(d_params, fake_output1, prefix='dis_d')
        elif options['cnn_activation'] == 'linear':
            fake_mmd = middle_layer(d_params,
                                    tensor.tanh(fake_output1),
                                    prefix='dis_d')  #

        if options['cnn_activation'] == 'tanh':
            real_mmd = middle_layer(d_params, real_output, prefix='dis_d')
        elif options['cnn_activation'] == 'linear':
            real_mmd = middle_layer(d_params,
                                    tensor.tanh(real_output),
                                    prefix='dis_d')  #

        dividend = 1
        dist_x, dist_y = fake_mmd / dividend, real_mmd / dividend
        x_sq = tensor.sum(dist_x**2, axis=1).dimshuffle(0, 'x')  #  64*1
        y_sq = tensor.sum(dist_y**2, axis=1).dimshuffle(0, 'x')  #  64*1
        tempxx = -2 * tensor.dot(dist_x,
                                 dist_x.T) + x_sq + x_sq.T  # (xi -xj)**2
        tempxy = -2 * tensor.dot(dist_x,
                                 dist_y.T) + x_sq + y_sq.T  # (xi -yj)**2
        tempyy = -2 * tensor.dot(dist_y,
                                 dist_y.T) + y_sq + y_sq.T  # (yi -yj)**2

        for sigma in options['sigma_range']:
            kxx += tensor.mean(tensor.exp(-tempxx / 2 / (sigma**2)))
            kxy += tensor.mean(tensor.exp(-tempxy / 2 / (sigma**2)))
            kyy += tensor.mean(tensor.exp(-tempyy / 2 / (sigma**2)))
        fake_obj = tensor.sqrt(kxx + kyy - 2 * kxy)

    else:
        fake_obj = -tensor.log(fake_pred + 1e-6).sum() / n_z

    if options['wgan']:
        gan_cost_d = fake_pred.sum() / n_z - real_pred.sum() / n_samples
        gan_cost_g = -fake_pred.sum() / n_z + 0 * (
            (fake_mean - acc_real_mean)**2).sum()
    else:
        gan_cost_d = -tensor.log(1 - fake_pred + 1e-6).sum(
        ) / n_z - tensor.log(real_pred + 1e-6).sum() / n_samples
        gan_cost_g = fake_obj

    #result4 = fake_obj
    d_cost = gan_cost_d - options['lambda_fm'] * fake_obj + options[
        'lambda_recon'] * r_cost + options['lambda_q'] * l_I / n_z
    g_cost = gan_cost_g - options['lambda_q'] * l_I / n_z

    #result1, result2, result4, result5, result6 = x_sq, y_sq, tempxx, tempxy, tempyy

    result1 = tensor.mean(real_pred)  # goes to nan
    result2 = tensor.mean(fake_pred)  # goes to nan
    result3 = tensor.argmax(pred_w, axis=1).reshape([n_samples, n_steps])
    result4 = tensor.nlinalg.trace(
        tensor.dot(cov_fake_inv, cov_real) +
        tensor.dot(cov_real_inv, cov_fake))
    result5 = max_print[
        0]  #mu  #tensor.dot( tensor.dot((acc_fake_mean - acc_real_mean) , (cov_fake_inv + cov_real_inv)), (acc_fake_mean - acc_real_mean).T)
    result6 = ((fake_mean - real_mean)**2).sum()

    return use_noise, use_noise2, x, z, d_cost, g_cost, r_cost, fake_recon, acc_fake_xx, acc_real_xx, acc_fake_mean, acc_real_mean, result1, result2, result3, result4, result5, result6, KDE, KDE_input
Esempio n. 28
0
def VAE(input_shape=[None, 784],
        n_filters=[64, 64, 64],
        filter_sizes=[4, 4, 4],
        n_hidden=32,
        n_code=2,
        activation=tf.nn.tanh,
        dropout=False,
        denoising=False,
        convolutional=False,
        variational=False,
        on_cloud=0):
    """(Variational) (Convolutional) (Denoising) Autoencoder.

    Uses tied weights.

    Parameters
    ----------
    input_shape : list, optional
        Shape of the input to the network. e.g. for MNIST: [None, 784].
    n_filters : list, optional
        Number of filters for each layer.
        If convolutional=True, this refers to the total number of output
        filters to create for each layer, with each layer's number of output
        filters as a list.
        If convolutional=False, then this refers to the total number of neurons
        for each layer in a fully connected network.
    filter_sizes : list, optional
        Only applied when convolutional=True.  This refers to the ksize (height
        and width) of each convolutional layer.
    n_hidden : int, optional
        Only applied when variational=True.  This refers to the first fully
        connected layer prior to the variational embedding, directly after
        the encoding.  After the variational embedding, another fully connected
        layer is created with the same size prior to decoding.  Set to 0 to
        not use an additional hidden layer.
    n_code : int, optional
        Only applied when variational=True.  This refers to the number of
        latent Gaussians to sample for creating the inner most encoding.
    activation : function, optional
        Activation function to apply to each layer, e.g. tf.nn.relu
    dropout : bool, optional
        Whether or not to apply dropout.  If using dropout, you must feed a
        value for 'keep_prob', as returned in the dictionary.  1.0 means no
        dropout is used.  0.0 means every connection is dropped.  Sensible
        values are between 0.5-0.8.
    denoising : bool, optional
        Whether or not to apply denoising.  If using denoising, you must feed a
        value for 'corrupt_prob', as returned in the dictionary.  1.0 means no
        corruption is used.  0.0 means every feature is corrupted.  Sensible
        values are between 0.5-0.8.
    convolutional : bool, optional
        Whether or not to use a convolutional network or else a fully connected
        network will be created.  This effects the n_filters parameter's
        meaning.
    variational : bool, optional
        Whether or not to create a variational embedding layer.  This will
        create a fully connected layer after the encoding, if `n_hidden` is
        greater than 0, then will create a multivariate gaussian sampling
        layer, then another fully connected layer.  The size of the fully
        connected layers are determined by `n_hidden`, and the size of the
        sampling layer is determined by `n_code`.

    Returns
    -------
    model : dict
        {
            'cost': Tensor to optimize.
            'Ws': All weights of the encoder.
            'x': Input Placeholder
            'z': Inner most encoding Tensor (latent features)
            'y': Reconstruction of the Decoder
            'keep_prob': Amount to keep when using Dropout
            'corrupt_prob': Amount to corrupt when using Denoising
            'train': Set to True when training/Applies to Batch Normalization.
        }
    """
    # network input / placeholders for train (bn) and dropout
    x = tf.placeholder(tf.float32, input_shape, 'x')
    phase_train = tf.placeholder(tf.bool, name='phase_train')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    corrupt_prob = tf.placeholder(tf.float32, [1])

    # apply noise if denoising
    x_ = (utils.corrupt(x) * corrupt_prob + x *
          (1 - corrupt_prob)) if denoising else x

    # 2d -> 4d if convolution
    x_tensor = utils.to_tensor(x_) if convolutional else x_
    current_input = x_tensor

    Ws = []
    shapes = []

    # Build the encoder
    for layer_i, n_output in enumerate(n_filters):
        with tf.variable_scope('encoder/{}'.format(layer_i)):
            shapes.append(current_input.get_shape().as_list())
            if convolutional:
                h, W = utils.conv2d(x=current_input,
                                    n_output=n_output,
                                    k_h=filter_sizes[layer_i],
                                    k_w=filter_sizes[layer_i])
            else:
                h, W = utils.linear(x=current_input, n_output=n_output)
            h = activation(batch_norm(h, phase_train, 'bn' + str(layer_i)))
            if dropout:
                h = tf.nn.dropout(h, keep_prob)
            Ws.append(W)
            current_input = h

    shapes.append(current_input.get_shape().as_list())

    with tf.variable_scope('variational'):
        if variational:
            dims = current_input.get_shape().as_list()
            flattened = utils.flatten(current_input)

            if n_hidden:
                h = utils.linear(flattened, n_hidden, name='W_fc')[0]
                h = activation(batch_norm(h, phase_train, 'fc/bn'))
                if dropout:
                    h = tf.nn.dropout(h, keep_prob)
            else:
                h = flattened

            z_mu = utils.linear(h, n_code, name='mu')[0]
            z_log_sigma = 0.5 * utils.linear(h, n_code, name='log_sigma')[0]

            # Sample from noise distribution p(eps) ~ N(0, 1)
            epsilon = tf.random_normal(tf.stack([tf.shape(x)[0], n_code]))

            # Sample from posterior
            z = z_mu + tf.multiply(epsilon, tf.exp(z_log_sigma))

            if n_hidden:
                h = utils.linear(z, n_hidden, name='fc_t')[0]
                h = activation(batch_norm(h, phase_train, 'fc_t/bn'))
                if dropout:
                    h = tf.nn.dropout(h, keep_prob)
            else:
                h = z

            size = dims[1] * dims[2] * dims[3] if convolutional else dims[1]
            h = utils.linear(h, size, name='fc_t2')[0]
            current_input = activation(batch_norm(h, phase_train, 'fc_t2/bn'))
            if dropout:
                current_input = tf.nn.dropout(current_input, keep_prob)

            if convolutional:
                current_input = tf.reshape(
                    current_input,
                    tf.stack([
                        tf.shape(current_input)[0], dims[1], dims[2], dims[3]
                    ]))
        else:
            z = current_input

    shapes.reverse()
    n_filters.reverse()
    Ws.reverse()

    n_filters += [input_shape[-1]]

    # %%
    # Decoding layers
    for layer_i, n_output in enumerate(n_filters[1:]):
        with tf.variable_scope('decoder/{}'.format(layer_i)):
            shape = shapes[layer_i + 1]
            if convolutional:
                h, W = utils.deconv2d(x=current_input,
                                      n_output_h=shape[1],
                                      n_output_w=shape[2],
                                      n_output_ch=shape[3],
                                      n_input_ch=shapes[layer_i][3],
                                      k_h=filter_sizes[layer_i],
                                      k_w=filter_sizes[layer_i])
            else:
                h, W = utils.linear(x=current_input, n_output=n_output)
            h = activation(batch_norm(h, phase_train, 'dec/bn' + str(layer_i)))
            if dropout:
                h = tf.nn.dropout(h, keep_prob)
            current_input = h

    y = current_input
    x_flat = utils.flatten(x)
    y_flat = utils.flatten(y)

    # l2 loss
    loss_x = tf.reduce_sum(tf.squared_difference(x_flat, y_flat), 1)

    if variational:
        # variational lower bound, kl-divergence
        loss_z = -0.5 * tf.reduce_sum(
            1.0 + 2.0 * z_log_sigma - tf.square(z_mu) -
            tf.exp(2.0 * z_log_sigma), 1)

        # add l2 loss
        cost = tf.reduce_mean(loss_x + loss_z)
    else:
        # just optimize l2 loss
        cost = tf.reduce_mean(loss_x)

    return {
        'cost': cost,
        'Ws': Ws,
        'x': x,
        'z': z,
        'y': y,
        'keep_prob': keep_prob,
        'corrupt_prob': corrupt_prob,
        'train': phase_train
    }
Esempio n. 29
0
 def __call__(self,
              inputs,
              state,
              scope=None,
              is_training=True,
              reuse=None,
              reuse_bn=None):
     self.unroll_count += 1
     with tf.variable_scope(scope or type(self).__name__):
         if self._state_is_tuple:
             c, h = state
         else:
             c, h = nn.split(state, 2, 1)
         with tf.variable_scope("LSTM_weights", reuse=reuse):
             print("resue is ", reuse)
             i2h = _linear([inputs],
                           4 * self._num_units,
                           True,
                           scope="LinearI",
                           init_scale=self.init_scale)
             h2h = _linear([h],
                           4 * self._num_units,
                           True,
                           scope="LinearH",
                           init_scale=self.init_scale)
             beta_i = nn.weight_variable([4 * self._num_units],
                                         init_method="constant",
                                         init_param={"val": 0.0},
                                         name="beta_i")
             gamma_i = nn.weight_variable([4 * self._num_units],
                                          init_method="constant",
                                          init_param={"val": 0.1},
                                          name="gamma_i")
             beta_h = nn.weight_variable([4 * self._num_units],
                                         init_method="constant",
                                         init_param={"val": 0.0},
                                         name="beta_h")
             gamma_h = nn.weight_variable([4 * self._num_units],
                                          init_method="constant",
                                          init_param={"val": 0.1},
                                          name="gamma_h")
             beta_c = nn.weight_variable([self._num_units],
                                         init_method="constant",
                                         init_param={"val": 0.0},
                                         name="beta_c")
             gamma_c = nn.weight_variable([self._num_units],
                                          init_method="constant",
                                          init_param={"val": 0.1},
                                          name="gamma_c")
         i2h_norm, mean_i = batch_norm(i2h,
                                       self._num_units * 4,
                                       is_training,
                                       reuse=reuse_bn,
                                       gamma=gamma_i,
                                       beta=beta_i,
                                       axes=[0],
                                       eps=self.eps,
                                       scope="bn_i_{}".format(
                                           self.unroll_count),
                                       return_mean=True)
         # if self.l1_reg > 0.0:
         # tf.add_to_collection(L1_REG_KEY,
         # self.l1_reg * tf.reduce_mean(tf.abs(i2h - mean_i)))
         h2h_norm, mean_h = batch_norm(h2h,
                                       self._num_units * 4,
                                       is_training,
                                       reuse=reuse_bn,
                                       gamma=gamma_h,
                                       beta=beta_h,
                                       axes=[0],
                                       eps=self.eps,
                                       scope="bn_h_{}".format(
                                           self.unroll_count),
                                       return_mean=True)
         # if self.l1_reg > 0.0:
         # tf.add_to_collection(L1_REG_KEY,
         #                      self.l1_reg * tf.reduce_mean(tf.abs(h2h - mean_h)))
         i, j, f, o = nn.split(i2h_norm + h2h_norm, 4, 1)
         new_c = (c * self.gate_activation(f + self._forget_bias) +
                  self.gate_activation(i) * self.state_activation(j))
         new_c_norm, mean_c = batch_norm(new_c,
                                         self._num_units,
                                         is_training,
                                         reuse=reuse_bn,
                                         gamma=gamma_c,
                                         beta=beta_c,
                                         axes=[0],
                                         eps=self.eps,
                                         scope="bn_c_{}".format(
                                             self.unroll_count),
                                         return_mean=True)
         # if self.l1_reg > 0.0:
         # tf.add_to_collection(L1_REG_KEY, self.l1_reg *
         #                      tf.reduce_mean(tf.abs(new_c - mean_c)))
         new_h = self.state_activation(new_c_norm) * self.gate_activation(o)
         if self._state_is_tuple:
             new_state = LSTMStateTuple(new_c_norm, new_h)
         else:
             new_state = nn.concat([new_c_norm, new_h], 1)
     return new_h, new_state