Example #1
0
def batch_normalized_linear_layer(state_below, scope_name, n_inputs, n_outputs, stddev, wd, eps=.00001, test=False):
    """
    A linear layer with batch normalization
    """
    with tf.variable_scope(scope_name) as scope:
        weight = _variable_with_weight_decay(
            "weights", shape=[n_inputs, n_outputs],
            stddev=stddev, wd=wd
        )
        act = tf.matmul(state_below, weight)
        # get moments
        act_mean, act_variance = tf.nn.moments(act, [0])
        # get mean and variance variables
        mean = _variable_on_cpu('bn_mean', [n_outputs], tf.constant_initializer(0.0), trainable=False)
        variance = _variable_on_cpu('bn_variance', [n_outputs], tf.constant_initializer(1.0), trainable=False)
        # assign the moments

        if not test:
            assign_mean = mean.assign(act_mean)
            assign_variance = variance.assign(act_variance)
            act_bn = tf.mul((act - act_mean), tf.rsqrt(act_variance + eps), name=scope.name+"_bn")
        else:
            act_bn = tf.mul((act - mean), tf.rsqrt(variance + eps), name=scope.name+"_bn")

        beta = _variable_on_cpu("beta", [n_outputs], tf.constant_initializer(0.0))
        gamma = _variable_on_cpu("gamma", [n_outputs], tf.constant_initializer(1.0))
        bn = tf.add(tf.mul(act_bn, gamma), beta)
        # output = tf.nn.relu(bn, name=scope.name)
        output = randomized_relu(bn, .1, name=scope.name, is_training=(not test))
        if not test:
            output = control_flow_ops.with_dependencies(dependencies=[assign_mean, assign_variance], output_tensor=output)
        _activation_summary(output)
    return output
Example #2
0
def dense_layer(feed,
                input_dim,
                output_dim,
                dropout=False,
                keep_prob=None,
                batch_norm=False,
                weight_decay=None):
    weights = _variable_with_weight_decay('weights',
                                          shape=[input_dim, output_dim],
                                          stddev=0.04,
                                          wd=weight_decay)
    biases = _variable_on_cpu('biases', [output_dim],
                              tf.constant_initializer(0.1))
    intermediate = tf.matmul(feed, weights)
    if batch_norm:
        mean, variance = tf.nn.moments(intermediate, axes=[0])
        epsilon = 1e-5
        gamma = _variable_on_cpu('gammas', [output_dim],
                                 tf.constant_initializer(1.0))
        pre_activation = tf.nn.batch_normalization(intermediate, mean,
                                                   variance, biases, gamma,
                                                   epsilon)
    else:
        pre_activation = intermediate + biases
    if dropout:
        pre_activation = tf.nn.dropout(pre_activation,
                                       keep_prob=keep_prob,
                                       name="dropout")
    after_activation = tf.nn.relu(pre_activation, name='activated_out')
    _activation_summary(after_activation)

    return after_activation
Example #3
0
def batch_normalized_conv_layer(state_below, scope_name, n_inputs, n_outputs, filter_shape, stddev, wd, eps=.00001, test=False):
    """
    Convolutional layer with batch normalization
    """
    with tf.variable_scope(scope_name) as scope:
        kernel = _variable_with_weight_decay(
            "weights", shape=[filter_shape[0], filter_shape[1], n_inputs, n_outputs],
            stddev=stddev, wd=wd
        )
        conv = tf.nn.conv2d(state_below, kernel, [1, 1, 1, 1], padding='SAME')
        # get moments
        conv_mean, conv_variance = tf.nn.moments(conv, [0, 1, 2])
        # get mean and variance variables
        mean = _variable_on_cpu("bn_mean", [n_outputs], tf.constant_initializer(0.0), False)
        variance = _variable_on_cpu("bn_variance", [n_outputs], tf.constant_initializer(1.0), False)
        # assign the moments

        if not test:
            assign_mean = mean.assign(conv_mean)
            assign_variance = variance.assign(conv_variance)
            conv_bn = tf.mul((conv - conv_mean), tf.rsqrt(conv_variance + eps), name=scope.name+"_bn")
        else:
            conv_bn = tf.mul((conv - mean), tf.rsqrt(variance + eps), name=scope.name+"_bn")

        beta = _variable_on_cpu("beta", [n_outputs], tf.constant_initializer(0.0))
        gamma = _variable_on_cpu("gamma", [n_outputs], tf.constant_initializer(1.0))
        bn = tf.add(tf.mul(conv_bn, gamma), beta)
        # output = tf.nn.relu(bn, name=scope.name)
        output = randomized_relu(bn, .1, name=scope.name, is_training=(not test))
        if not test:
            output = control_flow_ops.with_dependencies(dependencies=[assign_mean, assign_variance], output_tensor=output)
        _activation_summary(output)

    return output
Example #4
0
def conv2d_stack(feed,
                 kernel_list,
                 stride_list,
                 padding_list,
                 batch_norm=False):
    if not ((len(kernel_list) == len(stride_list)) and
            (len(stride_list) == len(padding_list))):
        return
    inputs = []
    inputs.append(feed)
    for i in range(len(kernel_list)):
        with tf.variable_scope('conv%d' % (i + 1)) as scope:
            kernel = _variable_with_weight_decay('weights',
                                                 shape=kernel_list[i],
                                                 stddev=5e-2,
                                                 wd=None)
            conv = conv2d(inputs[-1],
                          kernel,
                          stride_list[i],
                          padding=padding_list[i])
            biases = _variable_on_cpu('biases', kernel_list[i][-1],
                                      tf.constant_initializer(0.0))
            if batch_norm:
                mean, variance = tf.nn.moments(conv, axes=[0])
                epsilon = 1e-5
                gamma = _variable_on_cpu('gammas', kernel_list[i][-1],
                                         tf.constant_initializer(1.0))
                pre_activation = tf.nn.batch_normalization(
                    conv, mean, variance, biases, gamma, epsilon)
            else:
                pre_activation = tf.nn.bias_add(conv, biases)
            after_activation = tf.nn.relu(pre_activation, name='activated_out')
            _activation_summary(after_activation)
            inputs.append(after_activation)
    return inputs[-1]
Example #5
0
def batch_normalized_conv_layer(state_below,
                                scope_name,
                                n_inputs,
                                n_outputs,
                                filter_shape,
                                stddev,
                                wd,
                                eps=.00001,
                                test=False):
    """
    Convolutional layer with batch normalization
    """
    with tf.variable_scope(scope_name) as scope:
        kernel = _variable_with_weight_decay(
            "weights",
            shape=[filter_shape[0], filter_shape[1], n_inputs, n_outputs],
            stddev=stddev,
            wd=wd)
        conv = tf.nn.conv2d(state_below, kernel, [1, 1, 1, 1], padding='SAME')
        # get moments
        conv_mean, conv_variance = tf.nn.moments(conv, [0, 1, 2])
        # get mean and variance variables
        mean = _variable_on_cpu("bn_mean", [n_outputs],
                                tf.constant_initializer(0.0), False)
        variance = _variable_on_cpu("bn_variance", [n_outputs],
                                    tf.constant_initializer(1.0), False)
        # assign the moments

        if not test:
            assign_mean = mean.assign(conv_mean)
            assign_variance = variance.assign(conv_variance)
            conv_bn = tf.mul((conv - conv_mean),
                             tf.rsqrt(conv_variance + eps),
                             name=scope.name + "_bn")
        else:
            conv_bn = tf.mul((conv - mean),
                             tf.rsqrt(variance + eps),
                             name=scope.name + "_bn")

        beta = _variable_on_cpu("beta", [n_outputs],
                                tf.constant_initializer(0.0))
        gamma = _variable_on_cpu("gamma", [n_outputs],
                                 tf.constant_initializer(1.0))
        bn = tf.add(tf.mul(conv_bn, gamma), beta)
        # output = tf.nn.relu(bn, name=scope.name)
        output = randomized_relu(bn,
                                 .1,
                                 name=scope.name,
                                 is_training=(not test))
        if not test:
            output = control_flow_ops.with_dependencies(
                dependencies=[assign_mean, assign_variance],
                output_tensor=output)
        _activation_summary(output)

    return output
Example #6
0
def batch_normalized_linear_layer(state_below,
                                  scope_name,
                                  n_inputs,
                                  n_outputs,
                                  stddev,
                                  wd,
                                  eps=.00001,
                                  test=False):
    """
    A linear layer with batch normalization
    """
    with tf.variable_scope(scope_name) as scope:
        weight = _variable_with_weight_decay("weights",
                                             shape=[n_inputs, n_outputs],
                                             stddev=stddev,
                                             wd=wd)
        act = tf.matmul(state_below, weight)
        # get moments
        act_mean, act_variance = tf.nn.moments(act, [0])
        # get mean and variance variables
        mean = _variable_on_cpu('bn_mean', [n_outputs],
                                tf.constant_initializer(0.0),
                                trainable=False)
        variance = _variable_on_cpu('bn_variance', [n_outputs],
                                    tf.constant_initializer(1.0),
                                    trainable=False)
        # assign the moments

        if not test:
            assign_mean = mean.assign(act_mean)
            assign_variance = variance.assign(act_variance)
            act_bn = tf.mul((act - act_mean),
                            tf.rsqrt(act_variance + eps),
                            name=scope.name + "_bn")
        else:
            act_bn = tf.mul((act - mean),
                            tf.rsqrt(variance + eps),
                            name=scope.name + "_bn")

        beta = _variable_on_cpu("beta", [n_outputs],
                                tf.constant_initializer(0.0))
        gamma = _variable_on_cpu("gamma", [n_outputs],
                                 tf.constant_initializer(1.0))
        bn = tf.add(tf.mul(act_bn, gamma), beta)
        # output = tf.nn.relu(bn, name=scope.name)
        output = randomized_relu(bn,
                                 .1,
                                 name=scope.name,
                                 is_training=(not test))
        if not test:
            output = control_flow_ops.with_dependencies(
                dependencies=[assign_mean, assign_variance],
                output_tensor=output)
        _activation_summary(output)
    return output
Example #7
0
def batch_norm_for_conv(x, phase_train, scope='bn'):
    channels = x.shape.as_list()[3]
    with tf.variable_scope(scope):
        gamma = _variable_on_cpu('gamma', [
            channels,
        ],
                                 tf.constant_initializer(1.0),
                                 dtype='float32')
        beta = _variable_on_cpu('beta', [
            channels,
        ],
                                tf.constant_initializer(0.0),
                                dtype='float32')
        moving_mean = _variable_on_cpu('moving_mean', [
            channels,
        ],
                                       dtype='float32',
                                       initializer=tf.zeros_initializer(),
                                       trainable=False)
        moving_variance = _variable_on_cpu('moving_variance', [
            channels,
        ],
                                           dtype='float32',
                                           initializer=tf.zeros_initializer(),
                                           trainable=False)
        tf.add_to_collection('params', gamma)
        tf.add_to_collection('params', beta)
        tf.add_to_collection('params', moving_mean)
        tf.add_to_collection('params', moving_variance)

        if not phase_train:
            normed_x, _, _ = tf.nn.fused_batch_norm(x,
                                                    gamma,
                                                    beta,
                                                    mean=moving_mean,
                                                    variance=moving_variance,
                                                    is_training=False,
                                                    epsilon=cfg.bn_eps)
        else:
            normed_x, batch_mean, batch_var = tf.nn.fused_batch_norm(
                x, gamma, beta, is_training=True, epsilon=cfg.bn_eps)

            update_moving_mean = moving_averages.assign_moving_average(
                moving_mean, batch_mean, cfg.bn_momentum)
            update_moving_variance = moving_averages.assign_moving_average(
                moving_variance, batch_var, cfg.bn_momentum)

            tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_moving_mean)
            tf.add_to_collection(tf.GraphKeys.UPDATE_OPS,
                                 update_moving_variance)

        return normed_x, [x, moving_mean, moving_variance, beta, gamma]
Example #8
0
def linear_layer(state_below,
                 scope_name,
                 n_inputs,
                 n_outputs,
                 stddev,
                 wd,
                 use_nonlinearity=True):
    """
    Standard linear neural network layer
    """
    with tf.variable_scope(scope_name) as scope:
        weights = _variable_with_weight_decay('weights', [n_inputs, n_outputs],
                                              stddev=stddev,
                                              wd=wd)
        biases = _variable_on_cpu('biases', [n_outputs],
                                  tf.constant_initializer(0.0))
        activation = tf.nn.xw_plus_b(state_below,
                                     weights,
                                     biases,
                                     name="activation")
        if use_nonlinearity:
            output = tf.nn.relu(activation, name=scope.name)
        else:
            output = activation
        _activation_summary(output)
    return output
Example #9
0
def linear_layer(state_below, scope_name, n_inputs, n_outputs, stddev, wd):
    """
    Standard linear neural network layer
    """
    with tf.variable_scope(scope_name) as scope:
        weights = _variable_with_weight_decay(
            'weights', [n_inputs, n_outputs],
            stddev=stddev, wd=wd
        )
        biases = _variable_on_cpu(
            'biases', [n_outputs], tf.constant_initializer(0.0)
        )
        output = tf.nn.xw_plus_b(state_below, weights, biases, name=scope.name)
        _activation_summary(output)
    return output
Example #10
0
def conv_layer(state_below, scope_name, n_inputs, n_outputs, filter_shape, stddev, wd):
    """
    A Standard convolutional layer
    """
    with tf.variable_scope(scope_name) as scope:
        kernel = _variable_with_weight_decay(
            "weights", shape=[filter_shape[0], filter_shape[1], n_inputs, n_outputs],
            wd=wd
        )
        conv = tf.nn.conv2d(state_below, kernel, [1, 1, 1, 1], padding='SAME')
        biases = _variable_on_cpu("biases", [n_outputs], tf.constant_initializer(0.0))
        bias = tf.add(conv, biases)
        output = tf.nn.relu(bias, name=scope.name)
        _activation_summary(output)
    return output
Example #11
0
def conv_layer(state_below, scope_name, n_inputs, n_outputs, filter_shape,
               stddev, wd):
    """
    A Standard convolutional layer
    """
    with tf.variable_scope(scope_name) as scope:
        kernel = _variable_with_weight_decay(
            "weights",
            shape=[filter_shape[0], filter_shape[1], n_inputs, n_outputs],
            wd=wd)
        conv = tf.nn.conv2d(state_below, kernel, [1, 1, 1, 1], padding='SAME')
        biases = _variable_on_cpu("biases", [n_outputs],
                                  tf.constant_initializer(0.0))
        bias = tf.add(conv, biases)
        output = tf.nn.relu(bias, name=scope.name)
        _activation_summary(output)
    return output
Example #12
0
    def __init__(self,
                 input_dim=None,
                 output_dim=1,
                 init_path=None,
                 opt_algo='gd',
                 learning_rate=1e-2,
                 l2_weight=0,
                 sync=False,
                 workers=20):
        Model.__init__(self)

        #self.graph = tf.Graph()
        #with self.graph.as_default():
        with tf.device('/cpu:0'):
            self.X = tf.sparse_placeholder(dtype)
            self.y = tf.placeholder(dtype)

        init_vars = [('w', [input_dim, output_dim], 'xavier', dtype),
                     ('b', [output_dim], 'zero', dtype)]
        self.vars = utils.init_var_map(init_vars, init_path)
        w = self.vars['w']
        b = self.vars['b']

        xw = tf.sparse_tensor_dense_matmul(self.X, w)
        logits = tf.reshape(xw + b, [-1])
        self.y_prob = tf.sigmoid(logits)

        self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=self.y, logits=logits)) + \
                        l2_weight * tf.nn.l2_loss(xw)

        self.global_step = _variable_on_cpu(
            'global_step', [],
            initializer=tf.constant_initializer(0),
            trainable=False)
        if sync:
            self.optimizer = utils.get_sync_optimizer(opt_algo, learning_rate,
                                                      workers)
        else:
            self.optimizer = utils.get_optimizer(opt_algo, learning_rate)

        self.train_op = self.optimizer.minimize(self.loss,
                                                global_step=self.global_step)
Example #13
0
def conv_layer_with_bn(inputT, shape, train_phase, activation=True, name=None):
    in_channel = shape[2]
    out_channel = shape[3]
    k_size = shape[0]
    with tf.variable_scope(name) as scope:
        kernel = _variable_with_weight_decay(
            'ort_weights',
            shape=shape,
            initializer=orthogonal_initializer(),
            wd=None)
        conv = tf.nn.conv2d(inputT, kernel, [1, 1, 1, 1], padding='SAME')
        biases = _variable_on_cpu('biases', [out_channel],
                                  tf.constant_initializer(0.0))
        bias = tf.nn.bias_add(conv, biases)
        if activation is True:
            conv_out = tf.nn.relu(
                batch_norm_layer(bias, train_phase, scope.name))
        else:
            conv_out = batch_norm_layer(bias, train_phase, scope.name)
    return conv_out
Example #14
0
    def setup_graph(
        self, images, phase_train
    ):  # previous inference() labels,inference, batch_size -- in order to get batch_size at running time
        #rather than using fixed batch_size in graph set up, revise it in inference:
        batchsize = tf.shape(images)[0]  # yike !!!
        print('GGG')
        print(images.get_shape())
        # norm1
        norm1 = tf.nn.lrn(images,
                          depth_radius=5,
                          bias=1.0,
                          alpha=0.0001,
                          beta=0.75,
                          name='norm1')
        print(norm1.get_shape())
        # conv1
        conv1 = conv_layer_with_bn(
            norm1, [7, 7, images.get_shape().as_list()[3], 64],
            phase_train,
            name="conv1")  # yike: 7 too large? how about 3?
        print(conv1.get_shape())
        # pool1
        pool1, pool1_indices = tf.nn.max_pool_with_argmax(conv1,
                                                          ksize=[1, 2, 2, 1],
                                                          strides=[1, 2, 2, 1],
                                                          padding='SAME',
                                                          name='pool1')
        print('111111')
        print(pool1.get_shape())
        print(pool1_indices.get_shape())
        # conv2
        conv2 = conv_layer_with_bn(pool1, [7, 7, 64, 64],
                                   phase_train,
                                   name="conv2")

        # pool2
        pool2, pool2_indices = tf.nn.max_pool_with_argmax(conv2,
                                                          ksize=[1, 2, 2, 1],
                                                          strides=[1, 2, 2, 1],
                                                          padding='SAME',
                                                          name='pool2')
        print('22222')
        print(pool2.get_shape())
        print(pool2_indices.get_shape())

        # conv3
        conv3 = conv_layer_with_bn(pool2, [7, 7, 64, 64],
                                   phase_train,
                                   name="conv3")

        # pool3
        pool3, pool3_indices = tf.nn.max_pool_with_argmax(conv3,
                                                          ksize=[1, 2, 2, 1],
                                                          strides=[1, 2, 2, 1],
                                                          padding='SAME',
                                                          name='pool3')

        print('33333')
        print(pool3.get_shape())
        print(pool3_indices.get_shape())

        # conv4
        conv4 = conv_layer_with_bn(pool3, [7, 7, 64, 64],
                                   phase_train,
                                   name="conv4")

        # pool4
        pool4, pool4_indices = tf.nn.max_pool_with_argmax(conv4,
                                                          ksize=[1, 2, 2, 1],
                                                          strides=[1, 2, 2, 1],
                                                          padding='SAME',
                                                          name='pool4')
        print('44444')
        print(pool4.get_shape())
        print(pool4_indices.get_shape())
        """ End of encoder """
        """ start upsample """
        # upsample4
        # Need to change when using different dataset out_w, out_h
        # upsample4 = upsample_with_pool_indices(pool4, pool4_indices, pool4.get_shape(), out_w=45, out_h=60, scale=2, name='upsample4')
        pool3_shape = pool3.get_shape()
        upsample4 = deconv_layer(
            pool4, [2, 2, 64, 64],
            tf.stack([batchsize, pool3_shape[1], pool3_shape[2],
                      64]), 2, "up4")  #45, 60,
        #concat 4 yike
        #combined4=tf.concat(axis=3,values=(upsample4,pool3))
        combined4 = tf.concat(axis=3, values=(upsample4, conv4))

        #print(tf.stack([batchsize, 45, 60, 64]))
        # decode 4
        conv_decode4 = conv_layer_with_bn(combined4, [7, 7, 128, 64],
                                          phase_train,
                                          False,
                                          name="conv_decode4")
        print('d4444444')
        print(conv_decode4.get_shape())
        # upsample 3
        # upsample3 = upsample_with_pool_indices(conv_decode4, pool3_indices, conv_decode4.get_shape(), scale=2, name='upsample3')
        pool2_shape = pool2.get_shape()
        upsample3 = deconv_layer(
            conv_decode4, [2, 2, 64, 64],
            tf.stack([batchsize, pool2_shape[1], pool2_shape[2],
                      64]), 2, "up3")  #90, 120
        #concat 3 yike
        #       combined3=tf.concat(axis=3,values=(upsample3,pool2))
        combined3 = tf.concat(axis=3, values=(upsample3, conv3))

        # decode 3
        conv_decode3 = conv_layer_with_bn(combined3, [7, 7, 128, 64],
                                          phase_train,
                                          False,
                                          name="conv_decode3")
        print('d333333')
        print(conv_decode3.get_shape())
        # upsample2
        # upsample2 = upsample_with_pool_indices(conv_decode3, pool2_indices, conv_decode3.get_shape(), scale=2, name='upsample2')
        pool1_shape = pool1.get_shape()
        upsample2 = deconv_layer(
            conv_decode3, [2, 2, 64, 64],
            tf.stack([batchsize, pool1_shape[1], pool1_shape[2],
                      64]), 2, "up2")  #180, 240
        #concat 2 yike
        #combined2=tf.concat(axis=3,values=(upsample2,pool1))
        combined2 = tf.concat(axis=3, values=(upsample2, conv2))
        # decode 2
        conv_decode2 = conv_layer_with_bn(combined2, [7, 7, 128, 64],
                                          phase_train,
                                          False,
                                          name="conv_decode2")
        print('d22222')
        print(conv_decode2.get_shape())
        # upsample1
        # upsample1 = upsample_with_pool_indices(conv_decode2, pool1_indices, conv_decode2.get_shape(), scale=2, name='upsample1')
        upsample1 = deconv_layer(
            conv_decode2, [2, 2, 64, 64],
            tf.stack([batchsize, self.args.image_h, self.args.image_w,
                      64]), 2, "up1"
        )  # IMAGE_HEIGHT, IMAGE_WIDTH yike !!!! deconv_layer(conv_decode2, [2, 2, 64, 64], [batch_size, 360, 480, 64], 2, "up1")

        #concat 1 yike
        #combined2=tf.concat(axis=3,values=(upsample2,pool1))
        combined1 = tf.concat(axis=3, values=(upsample1, conv1))

        # decode4
        conv_decode1 = conv_layer_with_bn(combined1, [7, 7, 128, 64],
                                          phase_train,
                                          False,
                                          name="conv_decode1")
        print('d111111')
        print(conv_decode1.get_shape())
        """ end of Decode """
        """ Start Classify """
        # output predicted class number (6)
        with tf.variable_scope('conv_classifier') as scope:
            kernel = _variable_with_weight_decay(
                'weights',
                shape=[1, 1, 64, self.num_classes],
                initializer=msra_initializer(1, 64),
                wd=0.0005)
            conv = tf.nn.conv2d(conv_decode1,
                                kernel, [1, 1, 1, 1],
                                padding='SAME')
            print('cv')
            print(conv.get_shape())
            biases = _variable_on_cpu('biases', [self.num_classes],
                                      tf.constant_initializer(0.0))
            print(biases.get_shape())
            logit = tf.nn.bias_add(conv, biases, name=scope.name)
            #conv_classifier = tf.nn.bias_add(conv, biases, name=scope.name)
            #print(conv_classifier.get_shape())
            #logit = conv_classifier
            #print('LLL')
            #print(labels)
            #print(conv_classifier)

            #loss = cal_loss(conv_classifier, labels)
            print(logit.get_shape())

        return logit  # loss
Example #15
0
    def __init__(self,
                 data_dir=None,
                 summary_dir=None,
                 eval_dir=None,
                 batch_size=None,
                 input_dim=None,
                 output_dim=1,
                 layer_sizes=None,
                 layer_acts=None,
                 drop_out=None,
                 layer_l2=None,
                 kernel_l2=None,
                 l2_w=0,
                 init_path=None,
                 opt_algo='gd',
                 learning_rate=1e-2,
                 sync=False,
                 workers=20):
        Model.__init__(self)

        eprint("------- create graph ---------------")

        init_vars = []
        num_inputs = len(layer_sizes[0])
        factor_order = layer_sizes[1]
        for i in range(num_inputs):
            layer_input = layer_sizes[0][i]
            layer_output = factor_order
            init_vars.append(('w0_%d' % i, [layer_input,
                                            layer_output], 'tnormal', dtype))
            init_vars.append(('b0_%d' % i, [layer_output], 'zero', dtype))

        init_vars.append(('w1', [num_inputs * factor_order,
                                 layer_sizes[2]], 'tnormal', dtype))
        init_vars.append(('k1', [num_inputs,
                                 layer_sizes[2]], 'tnormal', dtype))
        init_vars.append(('b1', [layer_sizes[2]], 'zero', dtype))

        for i in range(2, len(layer_sizes) - 1):
            layer_input = layer_sizes[i]
            layer_output = layer_sizes[i + 1]
            init_vars.append((
                'w%d' % i,
                [layer_input, layer_output],
                'tnormal',
            ))
            init_vars.append(('b%d' % i, [layer_output], 'zero', dtype))

        with tf.name_scope('input_%d' % FLAGS.task_index) as scope:
            self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)]
            self.B = tf.sparse_placeholder(tf.float32, name='B')
            self.y = tf.placeholder(dtype)

        self.keep_prob_train = 1 - np.array(drop_out)
        self.keep_prob_test = np.ones_like(drop_out)
        self.layer_keeps = tf.placeholder(dtype)

        self.vars = utils.init_var_map(init_vars, init_path)
        w0 = [self.vars['w0_%d' % i] for i in range(num_inputs)]
        b0 = [self.vars['b0_%d' % i] for i in range(num_inputs)]
        xw = [
            tf.sparse_tensor_dense_matmul(self.X[i], w0[i])
            for i in range(num_inputs)
        ]
        x = tf.concat([xw[i] + b0[i] for i in range(num_inputs)], 1)
        l = tf.nn.dropout(utils.activate(x, layer_acts[0]),
                          self.layer_keeps[0])

        w1 = self.vars['w1']
        k1 = self.vars['k1']
        b1 = self.vars['b1']
        p = tf.reduce_sum(
            tf.reshape(
                tf.matmul(
                    tf.reshape(
                        tf.transpose(
                            tf.reshape(l, [-1, num_inputs, factor_order]),
                            [0, 2, 1]), [-1, num_inputs]), k1),
                [-1, factor_order, layer_sizes[2]]), 1)
        l = tf.nn.dropout(
            utils.activate(tf.matmul(l, w1) + b1 + p, layer_acts[1]),
            self.layer_keeps[1])

        for i in range(2, len(layer_sizes) - 1):
            wi = self.vars['w%d' % i]
            bi = self.vars['b%d' % i]
            l = tf.nn.dropout(
                utils.activate(tf.matmul(l, wi) + bi, layer_acts[i]),
                self.layer_keeps[i])

        ## logits
        l = tf.reshape(l, [-1])
        self.y_prob = tf.sigmoid(l)

        self.loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=l, labels=self.y))

        if layer_l2 is not None:
            self.loss += layer_l2[0] * tf.nn.l2_loss(tf.concat(xw, 1))
            for i in range(1, len(layer_sizes) - 1):
                wi = self.vars['w%d' % i]
                self.loss += layer_l2[i] * tf.nn.l2_loss(wi)
        if kernel_l2 is not None:
            self.loss += kernel_l2 * tf.nn.l2_loss(k1)

        self.global_step = _variable_on_cpu(
            'global_step', [],
            initializer=tf.constant_initializer(0),
            trainable=False)

        if sync:
            self.optimizer = utils.get_sync_optimizer(opt_algo, learning_rate,
                                                      workers)
        else:
            self.optimizer = utils.get_optimizer(opt_algo, learning_rate)

        self.train_op = self.optimizer.minimize(self.loss,
                                                global_step=self.global_step)

        self.summary_op = tf.summary.merge_all()
Example #16
0
    def __init__(self,
                 data_dir=None,
                 summary_dir=None,
                 eval_dir=None,
                 batch_size=None,
                 input_dim=None,
                 output_dim=1,
                 layer_sizes=None,
                 layer_acts=None,
                 drop_out=None,
                 init_path=None,
                 opt_algo='gd',
                 learning_rate=1e-2,
                 l2_w=0,
                 layer_l2=None,
                 sync=False,
                 workers=20):
        Model.__init__(self)

        eprint("-------- create graph ----------")

        init_vars = []

        # linear part
        init_vars.append(('linear', [input_dim, output_dim], 'xavier', dtype))
        init_vars.append(('bias', [output_dim], 'zero', dtype))

        num_inputs = len(layer_sizes[0])
        factor_order = layer_sizes[1]
        for i in range(num_inputs):
            layer_input = layer_sizes[0][i]
            layer_output = factor_order
            # field_sizes[i] stores the i-th field feature number
            init_vars.append(('w0_%d' % i, [layer_input,
                                            layer_output], 'xavier', dtype))
            init_vars.append(('b0_%d' % i, [layer_output], 'zero', dtype))

        # full connection
        node_in = num_inputs * factor_order
        init_vars.append(('w1', [node_in, layer_sizes[2]], 'xavier', dtype))
        init_vars.append(('b1', [layer_sizes[2]], 'zero', dtype))
        for i in range(2, len(layer_sizes) - 1):
            layer_input = layer_sizes[i]
            layer_output = layer_sizes[i + 1]
            init_vars.append(('w%d' % i, [layer_input,
                                          layer_output], 'xavier', dtype))
            init_vars.append(('b%d' % i, [layer_output], 'zero', dtype))

        #self.graph = tf.Graph()
        #with self.graph.as_default():
        #with tf.device('/cpu:0'):
        with tf.name_scope('input_%d' % FLAGS.task_index) as scope:
            self.X = [tf.sparse_placeholder(dtype) for i in range(num_inputs)]
            self.B = tf.sparse_placeholder(tf.float32, name='B')
            self.y = tf.placeholder(dtype)

        self.keep_prob_train = 1 - np.array(drop_out)
        self.keep_prob_test = np.ones_like(drop_out)
        self.layer_keeps = tf.placeholder(dtype)

        self.vars = utils.init_var_map(init_vars, init_path)
        w0 = [self.vars['w0_%d' % i] for i in range(num_inputs)]
        b0 = [self.vars['b0_%d' % i] for i in range(num_inputs)]
        xw = [
            tf.sparse_tensor_dense_matmul(self.X[i], w0[i])
            for i in range(num_inputs)
        ]
        x = tf.concat([xw[i] + b0[i] for i in range(num_inputs)], 1)

        ## normalize
        fmX = tf.sparse_add(self.X[0], self.X[1])
        for i in range(2, num_inputs):
            fmX = tf.sparse_add(fmX, self.X[i])
        Xnorm = tf.reshape(1.0 / tf.sparse_reduce_sum(fmX, 1),
                           [-1, output_dim])

        l = tf.nn.dropout(utils.activate(x, layer_acts[0]),
                          self.layer_keeps[0])

        for i in range(1, len(layer_sizes) - 1):
            wi = self.vars['w%d' % i]
            bi = self.vars['b%d' % i]
            eprint(l.get_shape(), wi.get_shape(), bi.get_shape())
            l = tf.nn.dropout(
                utils.activate(tf.matmul(l, wi) + bi, layer_acts[i]),
                self.layer_keeps[i])

        ## FM linear part
        fmb = self.vars['bias']
        fmw = self.vars['linear']
        Xw = tf.sparse_tensor_dense_matmul(self.B, fmw)
        ## cross term
        # XV, shape: input_dim*k
        fmXV = tf.add_n(xw)
        XV_square = tf.square(fmXV)
        eprint(XV_square.get_shape())
        # X^2 * V^2, shape: input_dim*k
        fmX2 = [
            tf.SparseTensor(self.X[i].indices, tf.square(self.X[i].values),
                            tf.to_int64(tf.shape(self.X[i])))
            for i in range(num_inputs)
        ]
        fmV2 = [tf.square(w0[i]) for i in range(num_inputs)]
        fmX2V2 = [
            tf.sparse_tensor_dense_matmul(fmX2[i], fmV2[i])
            for i in range(num_inputs)
        ]
        X2V2 = tf.add_n(fmX2V2)
        eprint(X2V2.get_shape())

        # 1/2 * row_sum(XV_square - X2V2), shape: input_dim*1
        p = 0.5 * Xnorm * tf.reshape(tf.reduce_sum(XV_square - X2V2, 1),
                                     [-1, output_dim])

        ## logits
        logits = tf.reshape(l + Xw + fmb + p, [-1])
        ## predict
        self.y_prob = tf.sigmoid(logits)

        self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.y)) + \
                l2_w * tf.nn.l2_loss(Xw)
        if layer_l2 is not None:
            self.loss += layer_l2[0] * tf.nn.l2_loss(tf.concat(xw, 1))
            for i in range(1, len(layer_sizes) - 1):
                wi = self.vars['w%d' % i]
                self.loss += layer_l2[i] * tf.nn.l2_loss(wi)

        self.global_step = _variable_on_cpu(
            'global_step', [],
            initializer=tf.constant_initializer(0),
            trainable=False)

        if sync:
            self.optimizer = utils.get_sync_optimizer(opt_algo, learning_rate,
                                                      workers)
        else:
            self.optimizer = utils.get_optimizer(opt_algo, learning_rate)

        self.train_op = self.optimizer.minimize(self.loss,
                                                global_step=self.global_step)
        self.summary_op = tf.summary.merge_all()
Example #17
0
    def __init__(self,
                 data_dir=None,
                 summary_dir=None,
                 eval_dir=None,
                 batch_size=None,
                 input_dim=None,
                 output_dim=1,
                 factor_order=10,
                 init_path=None,
                 opt_algo='gd',
                 learning_rate=1e-2,
                 l2_w=0,
                 sync=False,
                 workers=20):
        Model.__init__(self)

        eprint("-------- create graph ----------")
        with tf.name_scope('input_%d' % FLAGS.task_index) as scope:
            self.X = tf.sparse_placeholder(tf.float32, name='X')
            self.B = tf.sparse_placeholder(tf.float32, name='B')
            self.y = tf.placeholder(tf.float32, shape=[None], name='y')

        init_vars = [('linear', [input_dim, output_dim], 'xavier', dtype),
                     ('U', [input_dim, factor_order], 'xavier', dtype),
                     ('V', [input_dim, factor_order], 'xavier', dtype),
                     ('bias', [output_dim], 'zero', dtype)]

        self.vars = utils.init_var_map(init_vars, None)
        w = self.vars['linear']
        U = self.vars['U']
        V = self.vars['V']
        b = self.vars['bias']

        ## normalize
        Xnorm = tf.reshape(1.0 / tf.sparse_reduce_sum(self.X, 1),
                           [-1, output_dim])

        ## linear term
        Xw = tf.sparse_tensor_dense_matmul(self.B, w, name="Xw")

        ## cross term
        XU = tf.sparse_tensor_dense_matmul(self.X, U, name="XU")
        XV = tf.sparse_tensor_dense_matmul(self.X, V, name="XV")
        X_square = tf.SparseTensor(self.X.indices, tf.square(self.X.values),
                                   tf.to_int64(tf.shape(self.X)))
        p = 0.5 * Xnorm * tf.reshape(
            tf.reduce_sum(
                XU * XV - tf.sparse_tensor_dense_matmul(X_square, U * V), 1),
            [-1, output_dim])

        logits = tf.reshape(b + Xw + p, [-1])

        self.y_prob = tf.sigmoid(logits)
        #
        self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.y)) + \
                        l2_w * tf.nn.l2_loss(Xw)

        self.global_step = _variable_on_cpu(
            'global_step', [],
            initializer=tf.constant_initializer(0),
            trainable=False)

        if sync:
            self.optimizer = utils.get_sync_optimizer(opt_algo, learning_rate,
                                                      workers)
        else:
            self.optimizer = utils.get_optimizer(opt_algo, learning_rate)

        self.train_op = self.optimizer.minimize(self.loss,
                                                global_step=self.global_step)
        self.summary_op = tf.summary.merge_all()
Example #18
0
    def __init__(self,
                 data_dir=None,
                 eval_dir=None,
                 summary_dir=None,
                 num_epochs=1,
                 batch_size=None,
                 input_dim=None,
                 output_dim=1,
                 factor_order=10,
                 init_path=None,
                 opt_algo='gd',
                 learning_rate=1e-2,
                 l2_w=0,
                 l2_v=0,
                 sync=False,
                 workers=20):
        Model.__init__(self)

        data_file_list = tf.gfile.ListDirectory(data_dir)
        data_file_list = [x for x in data_file_list if '.tf' in x]
        data_file_list = [os.path.join(data_dir, x) for x in data_file_list]
        data_file_list.sort()
        eprint("input files:", data_file_list)
        input_files = data_file_list

        eprint("-------- create graph ----------")
        #self.graph = tf.Graph()
        #with self.graph.as_default():
        with tf.device('/cpu:0'):
            self.X = tf.sparse_placeholder(tf.float32, name='X')
            self.B = tf.sparse_placeholder(tf.float32, name='B')
            self.y = tf.placeholder(tf.float32, shape=[None], name='y')

        init_vars = [('linear', [input_dim, output_dim], 'xavier', dtype),
                     ('V', [input_dim, factor_order], 'xavier', dtype),
                     ('bias', [output_dim], 'zero', dtype)]

        self.vars = utils.init_var_map(init_vars, None)
        w = self.vars['linear']
        V = self.vars['V']
        b = self.vars['bias']

        ## linear term
        Xw = tf.sparse_tensor_dense_matmul(self.B, w)

        ## cross term
        # X^2
        X_square = tf.SparseTensor(self.X.indices, tf.square(self.X.values),
                                   tf.to_int64(tf.shape(self.X)))
        # XV, shape: input_dim*k
        XV_square = tf.square(tf.sparse_tensor_dense_matmul(self.X, V))
        # X^2 * V^2, shape: input_dim*k
        X2V2 = tf.sparse_tensor_dense_matmul(X_square, tf.square(V))

        ## normalize
        Xnorm = tf.reshape(1.0 / tf.sparse_reduce_sum(self.X, 1),
                           [-1, output_dim])
        # 1/2 * row_sum(XV_square - X2V2), shape: input_dim*1
        p = 0.5 * Xnorm * tf.reshape(tf.reduce_sum(XV_square - X2V2, 1),
                                     [-1, output_dim])

        logits = tf.reshape(b + Xw + p, [-1])

        self.y_prob = tf.sigmoid(logits)

        self.loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=self.y)) + \
                        l2_w * tf.nn.l2_loss(Xw)
        self.global_step = _variable_on_cpu(
            'global_step', [],
            initializer=tf.constant_initializer(0),
            trainable=False)
        if sync:
            self.optimizer = utils.get_sync_optimizer(opt_algo, learning_rate,
                                                      workers)
        else:
            self.optimizer = utils.get_optimizer(opt_algo, learning_rate)

        self.train_op = self.optimizer.minimize(self.loss,
                                                global_step=self.global_step)
        self.summary_op = tf.summary.merge_all()
Example #19
0
def inception_v1_module(feed,
                        feed_dim=256,
                        map_size=(128, 192, 96, 64),
                        reduce1x1_size=64,
                        batch_norm=False):
    """
	:param feed: 
	:param map_size: lists of number of feature maps output by each tower (1x1, 3x3, 5x5, 1x1) inside the Inception module
	:param reduce1x1_size: number of feature maps output by each 1×1 convolution that precedes a large convolution
	:return: 
	"""
    def conv2d_s1(x, W):
        return conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

    def max_pool_3x3_s1(x):
        return tf.nn.max_pool(x,
                              ksize=[1, 3, 3, 1],
                              strides=[1, 1, 1, 1],
                              padding='SAME')

    # follows input
    W_conv_1x1_1 = _variable_with_weight_decay(
        'W_conv_1x1_1',
        shape=[1, 1, feed_dim, map_size[0]],
        stddev=5e-2,
        wd=None)
    b_conv_1x1_1 = _variable_on_cpu('b_conv_1x1_1', [map_size[0]],
                                    tf.constant_initializer(0.0))

    # follows input
    W_conv_1x1_2 = _variable_with_weight_decay(
        'W_conv_1x1_2',
        shape=[1, 1, feed_dim, reduce1x1_size],
        stddev=5e-2,
        wd=None)
    b_conv_1x1_2 = _variable_on_cpu('b_conv_1x1_2', [reduce1x1_size],
                                    tf.constant_initializer(0.0))

    # follows input
    W_conv_1x1_3 = _variable_with_weight_decay(
        'W_conv_1x1_3',
        shape=[1, 1, feed_dim, reduce1x1_size],
        stddev=5e-2,
        wd=None)
    b_conv_1x1_3 = _variable_on_cpu('b_conv_1x1_3', [reduce1x1_size],
                                    tf.constant_initializer(0.0))

    # follows 1x1_2
    # attention to the shape paras!!!!
    W_conv_3x3 = _variable_with_weight_decay(
        'W_conv_3x3',
        shape=[3, 3, reduce1x1_size, map_size[1]],
        stddev=5e-2,
        wd=None)
    b_conv_3x3 = _variable_on_cpu('b_conv_3x3', [map_size[1]],
                                  tf.constant_initializer(0.0))

    # follows 1x1_3
    W_conv_5x5 = _variable_with_weight_decay(
        'W_conv_5x5',
        shape=[5, 5, reduce1x1_size, map_size[2]],
        stddev=5e-2,
        wd=None)
    b_conv_5x5 = _variable_on_cpu('b_conv_5x5', [map_size[2]],
                                  tf.constant_initializer(0.0))

    # follows max pooling
    W_conv_1x1_4 = _variable_with_weight_decay(
        'W_conv_1x1_4',
        shape=[1, 1, feed_dim, map_size[3]],
        stddev=5e-2,
        wd=None)
    b_conv_1x1_4 = _variable_on_cpu('b_conv_1x1_4', [map_size[3]],
                                    tf.constant_initializer(0.0))

    # Inception Module
    conv_1x1_1 = conv2d_s1(feed, W_conv_1x1_1) + b_conv_1x1_1
    conv_1x1_2 = tf.nn.relu(conv2d_s1(feed, W_conv_1x1_2) + b_conv_1x1_2)
    conv_1x1_3 = tf.nn.relu(conv2d_s1(feed, W_conv_1x1_3) + b_conv_1x1_3)
    conv_3x3 = conv2d_s1(conv_1x1_2, W_conv_3x3) + b_conv_3x3
    conv_5x5 = conv2d_s1(conv_1x1_3, W_conv_5x5) + b_conv_5x5
    maxpool1 = max_pool_3x3_s1(feed)
    conv_1x1_4 = conv2d_s1(maxpool1, W_conv_1x1_4) + b_conv_1x1_4

    # concatenate all the feature maps and hit them with a relu
    concat = tf.concat([conv_1x1_1, conv_3x3, conv_5x5, conv_1x1_4], 3)
    if batch_norm:
        biases = _variable_on_cpu('biases', sum(map_size),
                                  tf.constant_initializer(0.0))
        mean, variance = tf.nn.moments(concat, axes=[0])
        epsilon = 1e-5
        gamma = _variable_on_cpu('gammas', sum(map_size),
                                 tf.constant_initializer(1.0))
        pre_activation = tf.nn.batch_normalization(concat, mean, variance,
                                                   biases, gamma, epsilon)
    else:
        pre_activation = concat
    after_activation = tf.nn.relu(pre_activation, name='activated_out')
    _activation_summary(after_activation)

    return after_activation
Example #20
0
def conv_bn_relu(x,
                 out_channels,
                 ksize,
                 stride=1,
                 groups=1,
                 qweight=False,
                 qactivation=False,
                 padding='SAME',
                 scale=None,
                 has_bn=True,
                 has_relu=True,
                 phase_train=False,
                 scope=None):
    node = {'input': x, 'output': None, 'W': None, 'b': None}

    cfg_node = {
        'name': scope,
        'type': 'Conv2D',
        'out': out_channels,
        'in': 0,
        'ksize': ksize,
        'stride': stride,
        'groups': groups,
        'padding': padding,
        'active': has_relu
    }

    with tf.variable_scope(scope):
        in_channels = x.shape.as_list()[3]
        cfg_node['in'] = in_channels

        assert in_channels % groups == 0 and out_channels % groups == 0
        shape = [ksize, ksize, in_channels // groups, out_channels]
        kernel = _variable_with_weight_decay('W', shape)
        tf.add_to_collection('params', kernel)
        node['W'] = kernel
        if qweight:
            kernel = int_quantize(kernel,
                                  scale[scope]['W'],
                                  num_bits=8,
                                  phase_train=phase_train)

        if groups == 1:
            f = tf.nn.conv2d(x,
                             kernel, [1, stride, stride, 1],
                             padding=padding)
        else:
            if out_channels == groups and in_channels == groups:
                f = tf.nn.depthwise_conv2d(x,
                                           tf.transpose(kernel, (0, 1, 3, 2)),
                                           [1, stride, stride, 1],
                                           padding=padding)
            else:
                kernel_list = tf.split(kernel, groups, axis=3)
                x_list = tf.split(x, groups, axis=3)
                f = tf.concat([
                    tf.nn.conv2d(x_list[i],
                                 kernel_list[i], [1, stride, stride, 1],
                                 padding=padding) for i in range(groups)
                ],
                              axis=3)

        if has_bn:
            f, bn_info = batch_norm_for_conv(f, phase_train)
            _, moving_mean, moving_variance, beta, gamma = bn_info
            s = gamma / tf.sqrt(moving_variance + cfg.bn_eps)
            node['W'] = kernel * tf.reshape(s, (1, 1, 1, -1))
            node['b'] = beta - s * moving_mean
        else:
            biases = _variable_on_cpu('b', out_channels,
                                      tf.constant_initializer(0.0))
            tf.add_to_collection('params', biases)
            node['b'] = biases

            f = tf.nn.bias_add(f, biases)

        if has_relu:
            f = tf.nn.relu6(f)
        node['output'] = f
        print(scope, f.shape)

        tf.add_to_collection('nodes', node)
        tf.add_to_collection('cfg_nodes', cfg_node)

        if qactivation:
            f = int_quantize(f,
                             scale[scope]['output'],
                             num_bits=8,
                             phase_train=phase_train)
        return f