Ejemplo n.º 1
0
 def __variable_summaries(self, var):
     mean = tf.reduce_mean(var)
     stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
     tf.summary.scalar('min', tf.reduce_min(var))
     tf.summary.scalar('max', tf.reduce_max(var))
     tf.summary.scalar('mean', mean)
     tf.summary.scalar('stddev', stddev)
     tf.summary.histogram('histogram', var)
Ejemplo n.º 2
0
def norm(x, scope, *, axis=-1, epsilon=1e-5):
    """Normalize to mean = 0, std = 1, then do a diagonal affine transform."""
    with tf.variable_scope(scope):
        n_state = x.shape[-1]#.value
        g = tf.get_variable('g', [n_state], initializer=tf.constant_initializer(1))
        b = tf.get_variable('b', [n_state], initializer=tf.constant_initializer(0))
        u = tf.reduce_mean(x, axis=axis, keepdims=True)
        s = tf.reduce_mean(tf.square(x - u), axis=axis, keepdims=True)
        x = (x - u) * tf.rsqrt(s + epsilon)
        x = x * g + b
        return x
Ejemplo n.º 3
0
 def learn(self):
     if self.memory_count > self.memory_size:
         sample_index = np.random.choice(self.memory_size, size=batch_size)
     else:
         sample_index = np.random.choice(self.memory_count, size=batch_size)
     train_data_sets = self.replay_buffer[sample_index, :]
     loss1 = tf.reduce_mean(
         tf.squared_difference(self.rs_p, train_data_sets[:, -1]))
     loss2 = tf.reduce_mean(
         tf.nn.softmax_cross_entropy_with_logits(labels=utils.onehot_mat(
             train_data_sets[:, ]),
                                                 logits=self.state_hat))
Ejemplo n.º 4
0
 def build(self, guidence, newNet):
     with tf.variable_scope("training_variable"):
         inputEmb = tf.nn.embedding_lookup(self.embedding, self.X)
         initFw = tf.nn.rnn_cell.LSTMStateTuple(
             tf.nn.relu(
                 tf.matmul(guidence, self.weights["Fw1"]) +
                 self.biases["Fw1"]),
             tf.nn.relu(
                 tf.matmul(guidence, self.weights["Fw2"]) +
                 self.biases["Fw2"]))
         initBw = tf.nn.rnn_cell.LSTMStateTuple(
             tf.nn.relu(
                 tf.matmul(guidence, self.weights["Bw1"]) +
                 self.biases["Bw1"]),
             tf.nn.relu(
                 tf.matmul(guidence, self.weights["Bw2"]) +
                 self.biases["Bw2"]))
         rnnCellFw = tf.compat.v1.nn.rnn_cell.DropoutWrapper(
             tf.nn.rnn_cell.BasicLSTMCell(self.nHidden),
             input_keep_prob=self.pKeep,
             output_keep_prob=1.0)
         rnnCellBw = tf.compat.v1.nn.rnn_cell.DropoutWrapper(
             tf.nn.rnn_cell.BasicLSTMCell(self.nHidden),
             input_keep_prob=self.pKeep,
             output_keep_prob=1.0)
         outputs, state = tf.nn.bidirectional_dynamic_rnn(
             cell_fw=rnnCellFw,
             cell_bw=rnnCellBw,
             inputs=inputEmb,
             initial_state_fw=initFw,
             initial_state_bw=initBw,
             dtype=tf.float32)
         outputsConcat = tf.concat(outputs, axis=2)
         self.outputs = outputsConcat
         self.RNNState = tf.reduce_mean(outputsConcat, axis=1)
Ejemplo n.º 5
0
    def _build_net(self):
        # Building the structure of neural network.
        def build_layer(s, c_names, n_l1, n_l2, w_initializer, b_initializer):
            with tf.variable_scope('l1'):
                w1 = tf.get_variable('w1', [self.n_features, n_l1],
                                     initializer=w_initializer,
                                     collections=c_names)
                b1 = tf.get_variable('b1', [1, n_l1],
                                     initializer=b_initializer,
                                     collections=c_names)
                l1 = tf.nn.relu(tf.matmul(s, w1) + b1)
            with tf.variable_scope('l2'):
                w2 = tf.get_variable('w2', [n_l1, n_l2],
                                     initializer=w_initializer,
                                     collections=c_names)
                b2 = tf.get_variable('b2', [1, n_l2],
                                     initializer=b_initializer,
                                     collections=c_names)
                l2 = tf.nn.relu(tf.matmul(l1, w2) + b2)
            with tf.variable_scope('l3'):
                w3 = tf.get_variable('w3', [n_l2, self.n_actions],
                                     initializer=w_initializer,
                                     collections=c_names)
                b3 = tf.get_variable('b3', [1, self.n_actions],
                                     initializer=b_initializer,
                                     collections=c_names)
                l3 = tf.nn.relu(tf.matmul(l2, w3) + b3)
            return l3

        # Building the evaluate net

        self.state = tf.placeholder(tf.float32, [None, self.n_features],
                                    name='state')
        self.q_target = tf.placeholder(tf.float32, [None, self.n_actions],
                                       name='q_target')  # expect output

        with tf.variable_scope('eval_net'):
            c_names, n_l1, n_l2, w_initializer, b_initializer = [
                'eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES
            ], 64, 64, tf.random_normal_initializer(
                0.0, 0.3), tf.random_normal_initializer(0., 0.3)
            self.q_eval = build_layer(self.state, c_names, n_l1, n_l2,
                                      w_initializer, b_initializer)

        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(
                tf.squared_difference(self.q_target, self.q_eval))
        with tf.variable_scope('train'):
            self._train_op = tf.train.RMSPropOptimizer(
                self.learning_rate).minimize(self.loss)
        # Building the target net.
        self.state_ = tf.placeholder(tf.float32, [None, self.n_features],
                                     name='state_')
        with tf.variable_scope('target_net'):
            c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]
            self.q_next = build_layer(self.state_, c_names, n_l1, n_l2,
                                      w_initializer, b_initializer)
Ejemplo n.º 6
0
 def predicting(self, rate):
     hidden = tf.nn.relu(
         tf.matmul(self.concatInput, self.weights["MLP1"]) +
         self.biases["MLP1"])
     logits = tf.matmul(hidden, self.weights["MLP2"]) + self.biases["MLP2"]
     predictPossibility = tf.nn.sigmoid(logits)
     accuracy = tf.reduce_mean(
         tf.cast(
             tf.equal(tf.cast(predictPossibility > 0.5, tf.float32),
                      self.y), tf.float32))
     loss = tf.reduce_mean(
         tf.nn.weighted_cross_entropy_with_logits(targets=self.y,
                                                  logits=logits,
                                                  pos_weight=rate))
     tv = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                            'training_variable')
     l2_loss = self.l2_para * tf.reduce_sum([tf.nn.l2_loss(v) for v in tv])
     loss += l2_loss
     return loss, accuracy, predictPossibility
Ejemplo n.º 7
0
def pca(x, dim=2):
    '''
        x:输入矩阵
        dim:降维之后的维度数
    '''
    with tf.name_scope("PCA"):

        m, n = tf.to_float(x.get_shape()[0]), tf.to_int32(x.get_shape()[1])
        assert not tf.assert_less(dim, n)
        mean = tf.reduce_mean(x, axis=1)
        x_new = x - tf.reshape(mean, (-1, 1))
        cov = tf.matmul(x_new, x_new, transpose_a=True) / (m - 1)
        e, v = tf.linalg.eigh(cov, name="eigh")
        e_index_sort = tf.math.top_k(e, sorted=True, k=dim)[1]
        v_new = tf.gather(v, indices=e_index_sort)
        pca = tf.matmul(x_new, v_new, transpose_b=True)
    return pca
Ejemplo n.º 8
0
def train(x_train, y_train):
    n_samples, n_features = x_train.shape

    w = tf.Variable(np.random.rand(input_dim, 1).astype(dtype='float32'),
                    name="weight")
    b = tf.Variable(0.0, dtype=tf.float32, name="bias")

    x = tf.placeholder(dtype=tf.float32, name='x')
    y = tf.placeholder(dtype=tf.float32, name='y')

    predictions = tf.matmul(x, w) + b
    loss = tf.reduce_mean(
        tf.log(1 + tf.exp(tf.multiply(-1.0 * y, predictions))))

    # optimizer = tf.train.GradientDescentOptimizer(learn_rate).minimize(loss)
    optimizer = tf.train.ProximalGradientDescentOptimizer(
        learning_rate=learn_rate,
        l1_regularization_strength=0.1).minimize(loss)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(n_epochs):
            for idx in range(0, n_samples, batch_size):
                iE = min(n_samples, idx + batch_size)
                x_batch = x_train[idx:iE, :]
                y_batch = y_train[idx:iE, :]
                sess.run([optimizer], feed_dict={x: x_batch, y: y_batch})
            curr_w, curr_b = sess.run([w, b])

            for idx in range(len(curr_w)):
                if curr_w[idx] < threshold * -1:
                    curr_w[idx] += threshold
                else:
                    curr_w[idx] -= threshold
            sess.run([tf.assign(w, curr_w)])
    return curr_w, curr_b
Ejemplo n.º 9
0
    def __init__(self,
                 input_width=227,
                 input_height=227,
                 input_channels=3,
                 num_classes=1000,
                 learning_rate=0.01,
                 momentum=0.9,
                 keep_prob=0.5):

        # From article: The learning rate was initialized at 0.01.
        # From article: We trained our models using stochastic gradient descent with a batch size of 128 examples,
        # momentum of 0.9, and weight decay of 0.0005

        # From article: We initialized the weights in each layer from a zero-mean Gaussian distribution with standard
        # deviation 0.01.

        self.input_width = input_width
        self.input_height = input_height
        self.input_channels = input_channels
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.keep_prob = keep_prob

        self.random_mean = 0
        self.random_stddev = 0.01

        # ----------------------------------------------------------------------------------------------------

        # From article: We initialized the neuron biases in the second, fourth, and fifth convolutional layers, as well
        # as in the fully-connected hidden layers, with the constant 1. ... We initialized the neuron biases in the
        # remaining layers with the constant 0.

        # Input: 227x227x3.
        with tf.name_scope('input'):
            self.X = tf.placeholder(dtype=tf.float32,
                                    shape=[
                                        None, self.input_height,
                                        self.input_width, self.input_channels
                                    ],
                                    name='X')

        # Labels: 1000.
        with tf.name_scope('labels'):
            self.Y = tf.placeholder(dtype=tf.float32,
                                    shape=[None, self.num_classes],
                                    name='Y')

        # Dropout keep prob.
        with tf.name_scope('dropout'):
            self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                    shape=(),
                                                    name='dropout_keep_prob')

        # Layer 1.
        # [Input] ==> 227x227x3
        # --> 227x227x3 ==> [Convolution: size=(11x11x3)x96, strides=4, padding=valid] ==> 55x55x96
        # --> 55x55x96 ==> [ReLU] ==> 55x55x96
        # --> 55x55x96 ==> [Local Response Normalization] ==> 55x55x96
        # --> 55x55x96 ==> [Max-Pool: size=3x3, strides=2, padding=valid] ==> 27x27x96
        # --> [Output] ==> 27x27x96
        # Note: 48*2=96, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer1'):
            layer1_activations = self.__conv(
                input=self.X,
                filter_width=11,
                filter_height=11,
                filters_count=96,
                stride_x=4,
                stride_y=4,
                padding='VALID',
                init_biases_with_the_constant_1=False)
            layer1_lrn = self.__local_response_normalization(
                input=layer1_activations)
            layer1_pool = self.__max_pool(input=layer1_lrn,
                                          filter_width=3,
                                          filter_height=3,
                                          stride_x=2,
                                          stride_y=2,
                                          padding='VALID')

        # Layer 2.
        # [Input] ==> 27x27x96
        # --> 27x27x96 ==> [Convolution: size=(5x5x96)x256, strides=1, padding=same] ==> 27x27x256
        # --> 27x27x256 ==> [ReLU] ==> 27x27x256
        # --> 27x27x256 ==> [Local Response Normalization] ==> 27x27x256
        # --> 27x27x256 ==> [Max-Pool: size=3x3, strides=2, padding=valid] ==> 13x13x256
        # --> [Output] ==> 13x13x256
        # Note: 128*2=256, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer2'):
            layer2_activations = self.__conv(
                input=layer1_pool,
                filter_width=5,
                filter_height=5,
                filters_count=256,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=True)
            layer2_lrn = self.__local_response_normalization(
                input=layer2_activations)
            layer2_pool = self.__max_pool(input=layer2_lrn,
                                          filter_width=3,
                                          filter_height=3,
                                          stride_x=2,
                                          stride_y=2,
                                          padding='VALID')

        # Layer 3.
        # [Input] ==> 13x13x256
        # --> 13x13x256 ==> [Convolution: size=(3x3x256)x384, strides=1, padding=same] ==> 13x13x384
        # --> 13x13x384 ==> [ReLU] ==> 13x13x384
        # --> [Output] ==> 13x13x384
        # Note: 192*2=384, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer3'):
            layer3_activations = self.__conv(
                input=layer2_pool,
                filter_width=3,
                filter_height=3,
                filters_count=384,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=False)

        # Layer 4.
        # [Input] ==> 13x13x384
        # --> 13x13x384 ==> [Convolution: size=(3x3x384)x384, strides=1, padding=same] ==> 13x13x384
        # --> 13x13x384 ==> [ReLU] ==> 13x13x384
        # --> [Output] ==> 13x13x384
        # Note: 192*2=384, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer4'):
            layer4_activations = self.__conv(
                input=layer3_activations,
                filter_width=3,
                filter_height=3,
                filters_count=384,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=True)

        # Layer 5.
        # [Input] ==> 13x13x384
        # --> 13x13x384 ==> [Convolution: size=(3x3x384)x256, strides=1, padding=same] ==> 13x13x256
        # --> 13x13x256 ==> [ReLU] ==> 13x13x256
        # --> 13x13x256 ==> [Max-Pool: size=3x3, strides=2, padding=valid] ==> 6x6x256
        # --> [Output] ==> 6x6x256
        # Note: 128*2=256, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer5'):
            layer5_activations = self.__conv(
                input=layer4_activations,
                filter_width=3,
                filter_height=3,
                filters_count=256,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=True)
            layer5_pool = self.__max_pool(input=layer5_activations,
                                          filter_width=3,
                                          filter_height=3,
                                          stride_x=2,
                                          stride_y=2,
                                          padding='VALID')

        # Layer 6.
        # [Input] ==> 6x6x256=9216
        # --> 9216 ==> [Fully Connected: neurons=4096] ==> 4096
        # --> 4096 ==> [ReLU] ==> 4096
        # --> 4096 ==> [Dropout] ==> 4096
        # --> [Output] ==> 4096
        # Note: 2048*2=4096, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer6'):
            pool5_shape = layer5_pool.get_shape().as_list()
            flattened_input_size = pool5_shape[1] * pool5_shape[
                2] * pool5_shape[3]
            layer6_fc = self.__fully_connected(
                input=tf.reshape(layer5_pool, shape=[-1,
                                                     flattened_input_size]),
                inputs_count=flattened_input_size,
                outputs_count=4096,
                relu=True,
                init_biases_with_the_constant_1=True)
            layer6_dropout = self.__dropout(input=layer6_fc)

        # Layer 7.
        # [Input] ==> 4096
        # --> 4096 ==> [Fully Connected: neurons=4096] ==> 4096
        # --> 4096 ==> [ReLU] ==> 4096
        # --> 4096 ==> [Dropout] ==> 4096
        # --> [Output] ==> 4096
        # Note: 2048*2=4096, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer7'):
            layer7_fc = self.__fully_connected(
                input=layer6_dropout,
                inputs_count=4096,
                outputs_count=4096,
                relu=True,
                init_biases_with_the_constant_1=True)
            layer7_dropout = self.__dropout(input=layer7_fc)

        # Layer 8.
        # [Input] ==> 4096
        # --> 4096 ==> [Logits: neurons=1000] ==> 1000
        # --> [Output] ==> 1000
        with tf.name_scope('layer8'):
            layer8_logits = self.__fully_connected(
                input=layer7_dropout,
                inputs_count=4096,
                outputs_count=self.num_classes,
                relu=False,
                name='logits')

        # Cross Entropy.
        with tf.name_scope('cross_entropy'):
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=layer8_logits, labels=self.Y, name='cross_entropy')
            self.__variable_summaries(cross_entropy)

        # Training.
        with tf.name_scope('training'):
            loss_operation = tf.reduce_mean(cross_entropy,
                                            name='loss_operation')
            tf.summary.scalar(name='loss', tensor=loss_operation)

            optimizer = tf.train.MomentumOptimizer(
                learning_rate=self.learning_rate, momentum=self.momentum)

            # self.training_operation = optimizer.minimize(loss_operation, name='training_operation')

            grads_and_vars = optimizer.compute_gradients(loss_operation)
            self.training_operation = optimizer.apply_gradients(
                grads_and_vars, name='training_operation')

            for grad, var in grads_and_vars:
                if grad is not None:
                    with tf.name_scope(var.op.name + '/gradients'):
                        self.__variable_summaries(grad)

        # Accuracy.
        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(layer8_logits, 1),
                                          tf.argmax(self.Y, 1),
                                          name='correct_prediction')
            self.accuracy_operation = tf.reduce_mean(tf.cast(
                correct_prediction, tf.float32),
                                                     name='accuracy_operation')
            tf.summary.scalar(name='accuracy', tensor=self.accuracy_operation)
Ejemplo n.º 10
0
def build_model():

    size = 8  # Single size for easier debugging (for now)
    max_s = [1, 2, 2, 1]  # size of the sliding window for max pooling
    learning_rate = 0.0001

    # frames = tf.placeholder(tf.float32, [None, 256, 256, 5]) # None is the number of samples, rename the variable name later
    frames = tf.placeholder(
        tf.float32, [None, 32, 32, 4], name="frames"
    )  # features: halite_available, others_ship, cargo, self_shipyard
    # can_afford = tf.placeholder(tf.float32, [None, 3])
    turns_left = tf.placeholder(tf.float32, [None, 1], name="turnsleft")
    my_ships = tf.placeholder(tf.float32, [None, 32, 32, 1], name="myships")

    my_ships = tf.cast(my_ships, tf.float32)

    moves = tf.placeholder(tf.uint8, [None, 32, 32, 1], name="moves")
    spawn = tf.placeholder(tf.float32, [None, 1], name="spawn")

    tf.add_to_collection('frames', frames)
    # tf.add_to_collection('can_afford', can_afford)
    tf.add_to_collection('turns_left', turns_left)
    tf.add_to_collection('my_ships', my_ships)
    tf.add_to_collection('moves', moves)
    tf.add_to_collection('spawn', spawn)

    moves = tf.one_hot(moves, 6)

    # ca = tf.layers.dense(can_afford, size)
    tl = tf.layers.dense(turns_left, size)

    # ca = tf.expand_dims(ca, 1)
    # ca = tf.expand_dims(ca, 1)
    tl = tf.expand_dims(tl, 1)
    tl = tf.expand_dims(tl, 1)

    d_l1_a = tf.layers.conv2d(
        frames, size, 3, activation=tf.nn.relu, padding='same'
    )  # input is frames, filters is size, kernal size is 3(x3)
    d_l1_p = tf.nn.max_pool(d_l1_a, max_s, max_s, padding='VALID')  # 16

    d_l2_a = tf.layers.conv2d(d_l1_p,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')
    d_l2_p = tf.nn.max_pool(d_l2_a, max_s, max_s, padding='VALID')  # 8

    d_l3_a = tf.layers.conv2d(d_l2_p,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')
    d_l3_p = tf.nn.max_pool(d_l3_a, max_s, max_s, padding='VALID')  # 4

    d_l4_a = tf.layers.conv2d(d_l3_p,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')
    d_l4_p = tf.nn.max_pool(d_l4_a, max_s, max_s, padding='VALID')  # 2

    d_l5_a = tf.layers.conv2d(d_l4_p,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')
    d_l5_p = tf.nn.max_pool(d_l5_a, max_s, max_s, padding='VALID')  # 1

    final_state = tf.concat([d_l5_p, tl], -1)
    latent = tf.layers.dense(final_state, size, activation=tf.nn.relu)
    # latent = tf.layers.dense(d_l5_p, size, activation=tf.nn.relu)

    u_l5_a = tf.layers.conv2d_transpose(latent,
                                        size,
                                        3,
                                        2,
                                        activation=tf.nn.relu,
                                        padding='same')  # 2
    u_l5_c = tf.concat([u_l5_a, d_l5_a], -1)
    u_l5_s = tf.layers.conv2d(u_l5_c,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')

    u_l4_a = tf.layers.conv2d_transpose(u_l5_s,
                                        size,
                                        3,
                                        2,
                                        activation=tf.nn.relu,
                                        padding='same')  # 4
    u_l4_c = tf.concat([u_l4_a, d_l4_a], -1)
    u_l4_s = tf.layers.conv2d(u_l4_c,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')

    u_l3_a = tf.layers.conv2d_transpose(u_l4_s,
                                        size,
                                        3,
                                        2,
                                        activation=tf.nn.relu,
                                        padding='same')  # 8
    u_l3_c = tf.concat([u_l3_a, d_l3_a], -1)
    u_l3_s = tf.layers.conv2d(u_l3_c,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')

    u_l2_a = tf.layers.conv2d_transpose(u_l3_s,
                                        size,
                                        3,
                                        2,
                                        activation=tf.nn.relu,
                                        padding='same')  # 16
    u_l2_c = tf.concat([u_l2_a, d_l2_a], -1)
    u_l2_s = tf.layers.conv2d(u_l2_c,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')

    u_l1_a = tf.layers.conv2d_transpose(u_l2_s,
                                        size,
                                        3,
                                        2,
                                        activation=tf.nn.relu,
                                        padding='same')  # 32
    u_l1_c = tf.concat([u_l1_a, d_l1_a], -1)
    u_l1_s = tf.layers.conv2d(u_l1_c,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')

    spawn_logits = tf.layers.dense(latent, 1, activation=None)
    #
    spawn_logits = tf.squeeze(spawn_logits, [1, 2])

    moves_logits = tf.layers.conv2d(u_l1_s,
                                    6,
                                    3,
                                    activation=None,
                                    padding='same')

    tf.add_to_collection('m_logits', moves_logits)
    tf.add_to_collection('s_logits', spawn_logits)

    losses = tf.nn.softmax_cross_entropy_with_logits_v2(labels=moves,
                                                        logits=moves_logits,
                                                        dim=-1)

    losses = tf.expand_dims(losses, -1)

    masked_loss = losses * my_ships

    ships_per_frame = tf.reduce_sum(my_ships, axis=[1, 2])

    frame_loss = tf.reduce_sum(masked_loss, axis=[1, 2])

    average_frame_loss = frame_loss / (ships_per_frame + 0.00000001
                                       )  # First frames have no ship

    spawn_losses = tf.nn.sigmoid_cross_entropy_with_logits(labels=spawn,
                                                           logits=spawn_logits)

    spawn_losses = tf.reduce_mean(spawn_losses)

    loss = tf.reduce_mean(average_frame_loss) + 0.01 * spawn_losses

    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    tf.add_to_collection('loss', loss)
    tf.add_to_collection('optimizer', optimizer)

    return
Ejemplo n.º 11
0
    train_step = tf.train.GradientDescentOptimizer(0.01).minimize(
        cross_entropy)

    sess = tf.Session()

    # Train
    init = tf.initialize_all_variables()
    sess.run(init)

    for i in range(1000):
        batch_xs, batch_ys = mnist.train.next_batch(100)
        train_step.run({x: batch_xs, y_: batch_ys}, sess)

    # Test trained model
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

    print(accuracy.eval({x: mnist.test.images, y_: mnist.test.labels}, sess))

# Store variable
_W = W.eval(sess)
_b = b.eval(sess)

sess.close()

# Create new graph for exporting
g_2 = tf.Graph()
with g_2.as_default():
    # Reconstruct graph
    x_2 = tf.placeholder("float", [None, 784], name="input")
    W_2 = tf.constant(_W, name="constant_W")
Ejemplo n.º 12
0
def main():
    args = parser.parse_args()
    enc = encoder.get_encoder(CHECKPOINT_DIR, args.model_name)
    hparams = model.default_hparams()
    with open(os.path.join(CHECKPOINT_DIR, args.model_name,
                           'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if args.sample_length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    if args.model_name == '345M':
        # args.memory_saving_gradients = True
        if args.optimizer == 'adam':
            args.only_train_transformer_layers = True

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.graph_options.rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.OFF
    with tf.Session(config=config) as sess:
        context = tf.placeholder(tf.int32, [args.batch_size, None])
        context_in = randomize(context, hparams, args.noise)
        output = model.model(hparams=hparams, X=context_in)
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=context[:, 1:], logits=output['logits'][:, :-1]))

        if args.val_every > 0:
            val_context = tf.placeholder(tf.int32, [args.val_batch_size, None])
            val_output = model.model(hparams=hparams, X=val_context)
            val_loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=val_context[:, 1:],
                    logits=val_output['logits'][:, :-1]))
            val_loss_summary = tf.summary.scalar('val_loss', val_loss)

        tf_sample = sample.sample_sequence(hparams=hparams,
                                           length=args.sample_length,
                                           context=context,
                                           batch_size=args.batch_size,
                                           temperature=1.0,
                                           top_k=args.top_k,
                                           top_p=args.top_p)

        all_vars = [v for v in tf.trainable_variables() if 'model' in v.name]
        train_vars = [v for v in all_vars if '/h' in v.name
                      ] if args.only_train_transformer_layers else all_vars

        if args.optimizer == 'adam':
            opt = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
        elif args.optimizer == 'sgd':
            opt = tf.train.GradientDescentOptimizer(
                learning_rate=args.learning_rate)
        else:
            exit('Bad optimizer:', args.optimizer)

        if args.accumulate_gradients > 1:
            if args.memory_saving_gradients:
                exit(
                    "Memory saving gradients are not implemented for gradient accumulation yet."
                )
            opt = AccumulatingOptimizer(opt=opt, var_list=train_vars)
            opt_reset = opt.reset()
            opt_compute = opt.compute_gradients(loss)
            opt_apply = opt.apply_gradients()
            summary_loss = tf.summary.scalar('loss', opt_apply)
        else:
            if args.memory_saving_gradients:
                opt_grads = memory_saving_gradients.gradients(loss, train_vars)
            else:
                opt_grads = tf.gradients(loss, train_vars)
            opt_grads = list(zip(opt_grads, train_vars))
            opt_apply = opt.apply_gradients(opt_grads)
            summary_loss = tf.summary.scalar('loss', loss)

        summary_lr = tf.summary.scalar('learning_rate', args.learning_rate)
        summaries = tf.summary.merge([summary_lr, summary_loss])

        summary_log = tf.summary.FileWriter(
            os.path.join(CHECKPOINT_DIR, args.run_name))

        saver = tf.train.Saver(var_list=all_vars,
                               max_to_keep=5,
                               keep_checkpoint_every_n_hours=2)
        sess.run(tf.global_variables_initializer())

        if args.restore_from == 'latest':
            ckpt = tf.train.latest_checkpoint(
                os.path.join(CHECKPOINT_DIR, args.run_name))
            if ckpt is None:
                # Get fresh GPT weights if new run.
                ckpt = tf.train.latest_checkpoint(
                    os.path.join(CHECKPOINT_DIR, args.model_name))
        elif args.restore_from == 'fresh':
            ckpt = tf.train.latest_checkpoint(
                os.path.join(CHECKPOINT_DIR, args.model_name))
        else:
            ckpt = tf.train.latest_checkpoint(args.restore_from)
        print('Loading checkpoint', ckpt)
        saver.restore(sess, ckpt)

        print('Loading dataset...')
        chunks = load_dataset(enc,
                              args.dataset,
                              args.combine,
                              encoding=args.encoding)
        data_sampler = Sampler(chunks)
        if args.val_every > 0:
            if args.val_dataset:
                val_chunks = load_dataset(enc,
                                          args.val_dataset,
                                          args.combine,
                                          encoding=args.encoding)
            else:
                val_chunks = chunks
        print('dataset has', data_sampler.total_size, 'tokens')
        print('Training...')

        if args.val_every > 0:
            # Sample from validation set once with fixed seed to make
            # it deterministic during training as well as across runs.
            val_data_sampler = Sampler(val_chunks, seed=1)
            val_batches = [[
                val_data_sampler.sample(1024)
                for _ in range(args.val_batch_size)
            ] for _ in range(args.val_batch_count)]

        counter = 1
        counter_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'counter')
        if os.path.exists(counter_path):
            # Load the step number if we're resuming a run
            # Add 1 so we don't immediately try to save again
            with open(counter_path, 'r') as fp:
                counter = int(fp.read()) + 1

        def save():
            maketree(os.path.join(CHECKPOINT_DIR, args.run_name))
            print(
                'Saving',
                os.path.join(CHECKPOINT_DIR, args.run_name,
                             'model-{}').format(counter))
            saver.save(sess,
                       os.path.join(CHECKPOINT_DIR, args.run_name, 'model'),
                       global_step=counter)
            with open(counter_path, 'w') as fp:
                fp.write(str(counter) + '\n')

        def generate_samples():
            print('Generating samples...')
            context_tokens = data_sampler.sample(1)
            all_text = []
            index = 0
            while index < args.sample_num:
                out = sess.run(
                    tf_sample,
                    feed_dict={context: args.batch_size * [context_tokens]})
                for i in range(min(args.sample_num - index, args.batch_size)):
                    text = enc.decode(out[i])
                    text = '======== SAMPLE {} ========\n{}\n'.format(
                        index + 1, text)
                    all_text.append(text)
                    index += 1
            print(text)
            maketree(os.path.join(SAMPLE_DIR, args.run_name))
            with open(os.path.join(SAMPLE_DIR, args.run_name,
                                   'samples-{}').format(counter),
                      'w',
                      encoding=args.encoding) as fp:
                fp.write('\n'.join(all_text))

        def validation():
            print('Calculating validation loss...')
            losses = []
            for batch in tqdm.tqdm(val_batches):
                losses.append(
                    sess.run(val_loss, feed_dict={val_context: batch}))
            v_val_loss = np.mean(losses)
            v_summary = sess.run(val_loss_summary,
                                 feed_dict={val_loss: v_val_loss})
            summary_log.add_summary(v_summary, counter)
            summary_log.flush()
            print('[{counter} | {time:2.2f}] validation loss = {loss:2.2f}'.
                  format(counter=counter,
                         time=time.time() - start_time,
                         loss=v_val_loss))

        def sample_batch():
            return [data_sampler.sample(1024) for _ in range(args.batch_size)]

        avg_loss = (0.0, 0.0)
        start_time = time.time()

        try:
            while counter < 1000:
                if counter % args.save_every == 0:
                    save()
                if counter % args.sample_every == 0:
                    generate_samples()
                if args.val_every > 0 and (counter % args.val_every == 0
                                           or counter == 1):
                    validation()

                if args.accumulate_gradients > 1:
                    sess.run(opt_reset)
                    for _ in range(args.accumulate_gradients):
                        sess.run(opt_compute,
                                 feed_dict={context: sample_batch()})
                    (v_loss, v_summary) = sess.run((opt_apply, summaries))
                else:
                    (_, v_loss, v_summary) = sess.run(
                        (opt_apply, loss, summaries),
                        feed_dict={context: sample_batch()})

                summary_log.add_summary(v_summary, counter)

                avg_loss = (avg_loss[0] * 0.99 + v_loss,
                            avg_loss[1] * 0.99 + 1.0)

                print(
                    '[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}'
                    .format(counter=counter,
                            time=time.time() - start_time,
                            loss=v_loss,
                            avg=avg_loss[0] / avg_loss[1]))

                counter += 1
        except KeyboardInterrupt:
            print('interrupted')
            save()
def gain(data_x, gain_parameters):
    '''Impute missing values in data_x
  
  Args:
    - data_x: original data with missing values
    - gain_parameters: GAIN network parameters:
      - batch_size: Batch size
      - hint_rate: Hint rate
      - alpha: Hyperparameter
      - iterations: Iterations
      
  Returns:
    - imputed_data: imputed data
  '''
    # Define mask matrix
    data_m = 1 - np.isnan(data_x)

    # System parameters
    batch_size = gain_parameters['batch_size']
    hint_rate = gain_parameters['hint_rate']
    alpha = gain_parameters['alpha']
    iterations = gain_parameters['iterations']

    # Other parameters
    no, dim = data_x.shape

    # Hidden state dimensions
    h_dim = int(dim)

    # Normalization
    norm_data, norm_parameters = normalization(data_x)
    norm_data_x = np.nan_to_num(norm_data, 0)

    ## GAIN architecture
    # Input placeholders
    # Data vector
    tf.disable_v2_behavior()
    X = tf.placeholder(tf.float32, shape=[None, dim])
    # Mask vector
    M = tf.placeholder(tf.float32, shape=[None, dim])
    # Hint vector
    H = tf.placeholder(tf.float32, shape=[None, dim])

    # Discriminator variables
    D_W1 = tf.Variable(xavier_init([dim * 2, h_dim]))  # Data + Hint as inputs
    D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

    D_W2 = tf.Variable(xavier_init([h_dim, h_dim]))
    D_b2 = tf.Variable(tf.zeros(shape=[h_dim]))

    D_W3 = tf.Variable(xavier_init([h_dim, dim]))
    D_b3 = tf.Variable(tf.zeros(shape=[dim]))  # Multi-variate outputs

    theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3]

    #Generator variables
    # Data + Mask as inputs (Random noise is in missing components)
    G_W1 = tf.Variable(xavier_init([dim * 2, h_dim]))
    G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

    G_W2 = tf.Variable(xavier_init([h_dim, h_dim]))
    G_b2 = tf.Variable(tf.zeros(shape=[h_dim]))

    G_W3 = tf.Variable(xavier_init([h_dim, dim]))
    G_b3 = tf.Variable(tf.zeros(shape=[dim]))

    theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3]

    ## GAIN functions
    # Generator
    def generator(x, m):
        # Concatenate Mask and Data
        inputs = tf.concat(values=[x, m], axis=1)
        G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1)
        G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2)
        # MinMax normalized output
        G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3)
        return G_prob

    # Discriminator
    def discriminator(x, h):
        # Concatenate Data and Hint
        inputs = tf.concat(values=[x, h], axis=1)
        D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1)
        D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2)
        D_logit = tf.matmul(D_h2, D_W3) + D_b3
        D_prob = tf.nn.sigmoid(D_logit)
        return D_prob

    ## GAIN structure
    # Generator
    G_sample = generator(X, M)

    # Combine with observed data
    Hat_X = X * M + G_sample * (1 - M)

    # Discriminator
    D_prob = discriminator(Hat_X, H)

    ## GAIN loss
    D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) \
                                  + (1-M) * tf.log(1. - D_prob + 1e-8))

    G_loss_temp = -tf.reduce_mean((1 - M) * tf.log(D_prob + 1e-8))

    MSE_loss = \
    tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M)

    D_loss = D_loss_temp
    G_loss = G_loss_temp + alpha * MSE_loss

    ## GAIN solver
    D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)
    G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G)

    ## Iterations
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    # Start Iterations
    for it in tqdm(range(iterations)):

        # Sample batch
        batch_idx = sample_batch_index(no, batch_size)
        X_mb = norm_data_x[batch_idx, :]
        M_mb = data_m[batch_idx, :]
        # Sample random vectors
        Z_mb = uniform_sampler(0, 0.01, batch_size, dim)
        # Sample hint vectors
        H_mb_temp = binary_sampler(hint_rate, batch_size, dim)
        H_mb = M_mb * H_mb_temp

        # Combine random vectors with observed vectors
        X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb

        _, D_loss_curr = sess.run([D_solver, D_loss_temp],
                                  feed_dict={
                                      M: M_mb,
                                      X: X_mb,
                                      H: H_mb
                                  })
        _, G_loss_curr, MSE_loss_curr = \
        sess.run([G_solver, G_loss_temp, MSE_loss],
                 feed_dict = {X: X_mb, M: M_mb, H: H_mb})

    ## Return imputed data
    Z_mb = uniform_sampler(0, 0.01, no, dim)
    M_mb = data_m
    X_mb = norm_data_x
    X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb

    imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0]

    imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data

    # Renormalization
    imputed_data = renormalization(imputed_data, norm_parameters)

    # Rounding
    imputed_data = rounding(imputed_data, data_x)

    return imputed_data
Ejemplo n.º 14
0
def main(trainModel=True,
         buildConfusionMatrix=True,
         restore=False,
         buildClassifiedMatrix=True):

    tf.disable_v2_behavior()

    input_images = tf.placeholder(tf.float32, [None, 28, 28], name="Input")
    real = tf.placeholder(tf.float32, [None, CLASSES], name="real_classes")

    layer1 = create_conv_layer(tf.reshape(input_images, [-1, 28, 28, 1]),
                               1,
                               28, [5, 5], [2, 2],
                               name="conv_no_pool")
    layer2 = create_conv_layer(layer1,
                               28,
                               56, [5, 5], [2, 2],
                               name='conv_with_pool')
    conv_result = tf.reshape(layer2, [-1, 7 * 7 * 56])

    relu_layer_weight = tf.Variable(tf.truncated_normal([7 * 7 * 56, 1000],
                                                        stddev=STDDEV * 2),
                                    name='relu_layer_weight')
    rely_layer_bias = tf.Variable(tf.truncated_normal([1000],
                                                      stddev=STDDEV / 2),
                                  name='rely_layer_bias')
    relu_layer = tf.matmul(conv_result, relu_layer_weight) + rely_layer_bias
    relu_layer = tf.nn.relu(relu_layer)
    relu_layer = tf.nn.dropout(relu_layer, DROPOUT)

    final_layer_weight = tf.Variable(tf.truncated_normal([1000, CLASSES],
                                                         stddev=STDDEV * 2),
                                     name='final_layer_weight')
    final_layer_bias = tf.Variable(tf.truncated_normal([CLASSES],
                                                       stddev=STDDEV / 2),
                                   name='final_layer_bias')
    final_layer = tf.matmul(relu_layer, final_layer_weight) + final_layer_bias

    predicts = tf.nn.softmax(final_layer)
    predicts_for_log = tf.clip_by_value(predicts, 1e-9, 0.999999999)

    #crossEntropy = -tf.reduce_mean(tf.reduce_sum(y * tf.log(y_clipped) + (1 - y) * tf.log(1 - y_clipped), axis=1))

    loss = -tf.reduce_mean(
        tf.reduce_sum(real * tf.log(predicts_for_log) +
                      (1 - real) * tf.log(1 - predicts_for_log),
                      axis=1),
        axis=0)
    #test = tf.reduce_sum(real * tf.log(predicts_for_log) + (1 - real) * tf.log(1 - predicts_for_log), axis=1)
    #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=final_layer, labels=real))
    optimiser = tf.train.GradientDescentOptimizer(
        learning_rate=LEARNING_RATE).minimize(loss)

    correct_prediction = tf.equal(tf.argmax(real, axis=1),
                                  tf.argmax(predicts, axis=1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    confusion_matrix = tf.confusion_matrix(labels=tf.argmax(real, axis=1),
                                           predictions=tf.argmax(predicts,
                                                                 axis=1),
                                           num_classes=CLASSES)

    saver = tf.train.Saver()

    # dataset = get_mnist_dataset()
    dataset = get_fashion_dataset()

    with tf.Session() as session:

        session.run(tf.global_variables_initializer())

        if restore:
            saver.restore(session, SAVE_PATH)

        if trainModel:
            train(input_images, real, session, optimiser, loss, accuracy,
                  saver, dataset)

        if buildConfusionMatrix:
            test_cm = session.run(confusion_matrix,
                                  feed_dict={
                                      input_images: dataset.test_x,
                                      real: dataset.test_y
                                  })
            draw_confusion_matrix(test_cm)

        if buildClassifiedMatrix:
            all_probs = session.run(predicts,
                                    feed_dict={
                                        input_images: dataset.test_x,
                                        real: dataset.test_y
                                    })
            max_failure_picture_index = [[(-1, -1.0)] * CLASSES
                                         for _ in range(CLASSES)]
            for i in range(len(all_probs)):
                real = np.argmax(dataset.test_y[i])
                for j in range(CLASSES):
                    if max_failure_picture_index[real][j][1] < all_probs[i][j]:
                        max_failure_picture_index[real][j] = (i,
                                                              all_probs[i][j])
            draw_max_failure_pictures(dataset.test_x,
                                      max_failure_picture_index)
Ejemplo n.º 15
0
    def __init__(self, nHidden, seqLen):
        self.representation_score = {}
        self.y = tf.placeholder(tf.float32, shape=[None, 1])
        self.extractFeature = ExtractFeature.ExtractFeature()
        self.imageFeature = ImageFeature.ImageFeature()
        newNet = tf.reduce_mean(self.imageFeature.outputLS, axis=0)
        self.textFeature = TextFeature.TextFeature(
            nHidden, seqLen, self.extractFeature.finalState, newNet)
        self.l2_para = 1e-7
        with tf.variable_scope("training_variable"):

            self.weights = {
                "MLP1":
                tf.Variable(
                    tf.truncated_normal(shape=[512, 256],
                                        stddev=0.08,
                                        name="MLP1_W")),
                "MLP2":
                tf.Variable(
                    tf.truncated_normal(shape=[256, 1],
                                        stddev=0.08,
                                        name="MLP2_W")),
                "ATT_attr1_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.imageFeature.defaultFeatureSize +
                        self.extractFeature.embSize,
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.extractFeature.embSize / 2)
                    ],
                                        stddev=0.08,
                                        name="ATT_attr1_1")),
                "ATT_attr1_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.textFeature.nHidden * 2 +
                        self.extractFeature.embSize,
                        int(self.textFeature.nHidden +
                            self.extractFeature.embSize / 2)
                    ],
                                        stddev=0.08,
                                        name="ATT_attr1_2")),
                "ATT_attr1_3":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        2 * self.extractFeature.embSize,
                        self.extractFeature.embSize
                    ],
                                        stddev=0.08,
                                        name="ATT_attr1_3")),
                "ATT_attr2_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.extractFeature.embSize / 2), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_attr2_1")),
                "ATT_attr2_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.textFeature.nHidden +
                            self.extractFeature.embSize / 2), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_attr2_2")),
                "ATT_attr2_3":
                tf.Variable(
                    tf.truncated_normal(shape=[self.extractFeature.embSize, 1],
                                        stddev=0.08,
                                        name="ATT_attr2_3")),
                "ATT_img1_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.imageFeature.defaultFeatureSize +
                        self.textFeature.nHidden * 2,
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.textFeature.nHidden)
                    ],
                                        stddev=0.08,
                                        name="ATT_image1_1")),
                "ATT_img1_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.imageFeature.defaultFeatureSize +
                        self.extractFeature.embSize,
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.extractFeature.embSize / 2)
                    ],
                                        stddev=0.08,
                                        name="ATT_image1_2")),
                "ATT_img1_3":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.imageFeature.defaultFeatureSize * 2,
                        self.imageFeature.defaultFeatureSize
                    ],
                                        stddev=0.08,
                                        name="ATT_image1_3")),
                "ATT_img2_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.textFeature.nHidden), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_image2_1")),
                "ATT_img2_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.extractFeature.embSize / 2), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_image2_2")),
                "ATT_img2_3":
                tf.Variable(
                    tf.truncated_normal(
                        shape=[self.imageFeature.defaultFeatureSize, 1],
                        stddev=0.08,
                        name="ATT_image2_3")),
                "ATT_text1_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.imageFeature.defaultFeatureSize +
                        self.textFeature.nHidden * 2,
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.textFeature.nHidden)
                    ],
                                        stddev=0.08,
                                        name="ATT_text1_1")),
                "ATT_text1_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.textFeature.nHidden * 2 +
                        self.extractFeature.embSize,
                        int(self.textFeature.nHidden +
                            self.extractFeature.embSize / 2)
                    ],
                                        stddev=0.08,
                                        name="ATT_text1_2")),
                "ATT_text1_3":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.textFeature.nHidden * 4,
                        self.textFeature.nHidden * 2
                    ],
                                        stddev=0.08,
                                        name="ATT_text1_3")),
                "ATT_text2_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.textFeature.nHidden), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_text2_1")),
                "ATT_text2_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.textFeature.nHidden +
                            self.extractFeature.embSize / 2), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_text2_2")),
                "ATT_text2_3":
                tf.Variable(
                    tf.truncated_normal(
                        shape=[self.textFeature.nHidden * 2, 1],
                        stddev=0.08,
                        name="ATT_text2_3")),
                "ATT_WI1":
                tf.Variable(
                    tf.truncated_normal(
                        shape=[self.imageFeature.defaultFeatureSize, 512],
                        stddev=0.08,
                        name="ATT_WI")),
                "ATT_WT1":
                tf.Variable(
                    tf.truncated_normal(shape=[2 * nHidden, 512],
                                        stddev=0.08,
                                        name="ATT_WT")),
                "ATT_WA1":
                tf.Variable(
                    tf.truncated_normal(shape=[200, 512],
                                        stddev=0.08,
                                        name="ATT_WA")),
                "ATT_WI2":
                tf.Variable(
                    tf.truncated_normal(
                        shape=[self.imageFeature.defaultFeatureSize, 512],
                        stddev=0.08,
                        name="ATT_WI2")),
                "ATT_WT2":
                tf.Variable(
                    tf.truncated_normal(shape=[2 * nHidden, 512],
                                        stddev=0.08,
                                        name="ATT_WT2")),
                "ATT_WA2":
                tf.Variable(
                    tf.truncated_normal(shape=[200, 512],
                                        stddev=0.08,
                                        name="ATT_WA2")),
                "ATT_WF_1":
                tf.Variable(
                    tf.truncated_normal(shape=[512, 1],
                                        stddev=0.08,
                                        name="ATT_WF_1")),
                "ATT_WF_2":
                tf.Variable(
                    tf.truncated_normal(shape=[512, 1],
                                        stddev=0.08,
                                        name="ATT_WF_2")),
                "ATT_WF_3":
                tf.Variable(
                    tf.truncated_normal(shape=[512, 1],
                                        stddev=0.08,
                                        name="ATT_WF_3")),
            }
            self.biases = {
                "MLP1":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[256],
                                dtype=tf.float32,
                                name="MLP1_b")),
                "MLP2":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[1],
                                dtype=tf.float32,
                                name="MLP2_b")),
                "ATT_attr1_1":
                tf.Variable(
                    tf.constant(
                        0.01,
                        shape=[
                            int(self.imageFeature.defaultFeatureSize / 2 +
                                self.extractFeature.embSize / 2)
                        ],
                        name="ATT_attr1_1")),
                "ATT_attr1_2":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[
                                    int(self.textFeature.nHidden +
                                        self.extractFeature.embSize / 2)
                                ],
                                name="ATT_attr1_2")),
                "ATT_attr1_3":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[self.extractFeature.embSize],
                                name="ATT_attr1_3")),
                "ATT_attr2_1":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_1")),
                "ATT_attr2_2":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_2")),
                "ATT_attr2_3":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_3")),
                "ATT_img1_1":
                tf.Variable(
                    tf.constant(
                        0.01,
                        shape=[
                            int(self.imageFeature.defaultFeatureSize / 2 +
                                self.textFeature.nHidden)
                        ],
                        name="ATT_image1_1")),
                "ATT_img1_2":
                tf.Variable(
                    tf.constant(
                        0.01,
                        shape=[
                            int(self.imageFeature.defaultFeatureSize / 2 +
                                self.extractFeature.embSize / 2)
                        ],
                        name="ATT_image1_2")),
                "ATT_img1_3":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[self.imageFeature.defaultFeatureSize],
                                name="ATT_image1_3")),
                "ATT_img2_1":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_1")),
                "ATT_img2_2":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_2")),
                "ATT_img2_3":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_3")),
                "ATT_text1_1":
                tf.Variable(
                    tf.constant(
                        0.01,
                        shape=[
                            int(self.imageFeature.defaultFeatureSize / 2 +
                                self.textFeature.nHidden)
                        ],
                        name="ATT_text1_1")),
                "ATT_text1_2":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[
                                    int(self.textFeature.nHidden +
                                        self.extractFeature.embSize / 2)
                                ],
                                name="ATT_text1_2")),
                "ATT_text1_3":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[self.textFeature.nHidden * 2],
                                name="ATT_text1_3")),
                "ATT_text2_1":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_1")),
                "ATT_text2_2":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_2")),
                "ATT_text2_3":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_3")),
                "ATT_WW":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WW")),
                "ATT_WI":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WI")),
                "ATT_WT":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WT")),
                "ATT_WI1":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WI1")),
                "ATT_WT1":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WT1")),
                "ATT_WA":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WA")),
                "ATT_WF_1":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_1")),
                "ATT_WF_2":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_2")),
                "ATT_WF_3":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_3")),
            }
        print("newnet dimension :", newNet)

        imageVec = self.Attention(newNet, self.imageFeature.outputLS,
                                  self.textFeature.RNNState,
                                  self.extractFeature.finalState, "ATT_img1",
                                  "ATT_img2", 196, True)
        textVec = self.Attention(self.textFeature.RNNState,
                                 self.textFeature.outputs, newNet,
                                 self.extractFeature.finalState, "ATT_text1",
                                 "ATT_text2", self.textFeature.seqLen, False)
        attrVec = self.Attention(self.extractFeature.finalState,
                                 self.extractFeature.inputEmb, newNet,
                                 self.textFeature.RNNState, "ATT_attr1",
                                 "ATT_attr2", 5, False)

        attHidden = tf.tanh(
            tf.matmul(imageVec, self.weights["ATT_WI1"]) +
            self.biases["ATT_WI1"])
        attHidden2 = tf.tanh(
            tf.matmul(textVec, self.weights["ATT_WT1"]) +
            self.biases["ATT_WT1"])
        attHidden3 = tf.tanh(
            tf.matmul(attrVec, self.weights["ATT_WA1"]) +
            self.biases["ATT_WW"])
        scores1 = tf.matmul(attHidden,
                            self.weights["ATT_WF_1"]) + self.biases["ATT_WF_1"]
        scores2 = tf.matmul(attHidden2,
                            self.weights["ATT_WF_2"]) + self.biases["ATT_WF_2"]
        scores3 = tf.matmul(attHidden3,
                            self.weights["ATT_WF_3"]) + self.biases["ATT_WF_3"]
        scoreLS = [scores1, scores2, scores3]
        scoreLS = tf.nn.softmax(scoreLS, dim=0)
        imageVec = tf.tanh(
            tf.matmul(imageVec, self.weights["ATT_WI2"]) +
            self.biases["ATT_WI"])
        textVec = tf.tanh(
            tf.matmul(textVec, self.weights["ATT_WT2"]) +
            self.biases["ATT_WT"])
        attrVec = tf.tanh(
            tf.matmul(attrVec, self.weights["ATT_WA2"]) +
            self.biases["ATT_WA"])
        self.concatInput = scoreLS[0] * imageVec + scoreLS[
            1] * textVec + scoreLS[2] * attrVec