Ejemplo n.º 1
0
 def _build_net(self, c_name):
     # 这里我们将一维的state进行one-hot编码处理, 因此输入的维度是1*n_state
     with tf.variable_scope('f_theta'):
         l1 = _build_layer(self.n_state, 128, self.state, c_name, 'l1')
         l2 = _build_layer(128, 512, l1, c_name, 'l2')
         l3 = _build_layer(512, 1024, l2, c_name, 'l3')
         l4 = _build_layer(1024, 512, l3, c_name, 'fai_s')
     with tf.variable_scope('g_theta_'):
         dl1 = _build_layer(512, 1024, l4, c_name, 'dl1')
         dl2 = _build_layer(1024, 512, dl1, c_name, 'dl2')
         dl3 = _build_layer(512, 128, dl2, c_name, 'dl3')
         s_hat = _build_layer(128, self.n_state, dl3, c_name, 'dl4')
     with tf.variable_scope('R_s'):
         r_s = _build_layer(512, 1, l4, c_name, 'r', has_activate=False)
     with tf.variable_scope("mu_alpha"):
         M = np.stack([
             np.zeros(self.fai_s_size, object) for i in range(self.n_action)
         ])
         for i in range(self.n_action):
             miu_layer = _build_layer(
                 256, 512,
                 _build_layer(
                     512, 256,
                     _build_layer(512, 512, l4, c_name, 'm%s-1' % i,
                                  'miu_alpha_l1_w', 'miu_alpha_l1_b'),
                     c_name, 'm%s-2' % i, 'miu_alpha_l2_w',
                     'miu_alpha_l2_b'), c_name, 'm%s-3' % i,
                 'miu_alpha_l3_w', 'miu_alpha_l3_b')
             M[i] = miu_layer
     return l4, s_hat, r_s, M
Ejemplo n.º 2
0
 def build_layer(s, c_names, n_l1, n_l2, w_initializer, b_initializer):
     with tf.variable_scope('l1'):
         w1 = tf.get_variable('w1', [self.n_features, n_l1],
                              initializer=w_initializer,
                              collections=c_names)
         b1 = tf.get_variable('b1', [1, n_l1],
                              initializer=b_initializer,
                              collections=c_names)
         l1 = tf.nn.relu(tf.matmul(s, w1) + b1)
     with tf.variable_scope('l2'):
         w2 = tf.get_variable('w2', [n_l1, n_l2],
                              initializer=w_initializer,
                              collections=c_names)
         b2 = tf.get_variable('b2', [1, n_l2],
                              initializer=b_initializer,
                              collections=c_names)
         l2 = tf.nn.relu(tf.matmul(l1, w2) + b2)
     with tf.variable_scope('l3'):
         w3 = tf.get_variable('w3', [n_l2, self.n_actions],
                              initializer=w_initializer,
                              collections=c_names)
         b3 = tf.get_variable('b3', [1, self.n_actions],
                              initializer=b_initializer,
                              collections=c_names)
         l3 = tf.nn.relu(tf.matmul(l2, w3) + b3)
     return l3
Ejemplo n.º 3
0
    def _build_net(self):
        # Building the structure of neural network.
        def build_layer(s, c_names, n_l1, n_l2, w_initializer, b_initializer):
            with tf.variable_scope('l1'):
                w1 = tf.get_variable('w1', [self.n_features, n_l1],
                                     initializer=w_initializer,
                                     collections=c_names)
                b1 = tf.get_variable('b1', [1, n_l1],
                                     initializer=b_initializer,
                                     collections=c_names)
                l1 = tf.nn.relu(tf.matmul(s, w1) + b1)
            with tf.variable_scope('l2'):
                w2 = tf.get_variable('w2', [n_l1, n_l2],
                                     initializer=w_initializer,
                                     collections=c_names)
                b2 = tf.get_variable('b2', [1, n_l2],
                                     initializer=b_initializer,
                                     collections=c_names)
                l2 = tf.nn.relu(tf.matmul(l1, w2) + b2)
            with tf.variable_scope('l3'):
                w3 = tf.get_variable('w3', [n_l2, self.n_actions],
                                     initializer=w_initializer,
                                     collections=c_names)
                b3 = tf.get_variable('b3', [1, self.n_actions],
                                     initializer=b_initializer,
                                     collections=c_names)
                l3 = tf.nn.relu(tf.matmul(l2, w3) + b3)
            return l3

        # Building the evaluate net

        self.state = tf.placeholder(tf.float32, [None, self.n_features],
                                    name='state')
        self.q_target = tf.placeholder(tf.float32, [None, self.n_actions],
                                       name='q_target')  # expect output

        with tf.variable_scope('eval_net'):
            c_names, n_l1, n_l2, w_initializer, b_initializer = [
                'eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES
            ], 64, 64, tf.random_normal_initializer(
                0.0, 0.3), tf.random_normal_initializer(0., 0.3)
            self.q_eval = build_layer(self.state, c_names, n_l1, n_l2,
                                      w_initializer, b_initializer)

        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(
                tf.squared_difference(self.q_target, self.q_eval))
        with tf.variable_scope('train'):
            self._train_op = tf.train.RMSPropOptimizer(
                self.learning_rate).minimize(self.loss)
        # Building the target net.
        self.state_ = tf.placeholder(tf.float32, [None, self.n_features],
                                     name='state_')
        with tf.variable_scope('target_net'):
            c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]
            self.q_next = build_layer(self.state_, c_names, n_l1, n_l2,
                                      w_initializer, b_initializer)
Ejemplo n.º 4
0
    def __init__(self,
                 n_state,
                 n_action,
                 learning_rate,
                 gamma,
                 replay_buffer_size=3000,
                 sess: tf.Session = None):
        self.n_state = n_state
        self.n_action = n_action
        self.fai_s_size = 512
        # shape: (state_size, action_size)
        self.w = np.zeros([n_state])
        self.learning_rate = learning_rate
        self.gamma = gamma
        self.replay_buffer = np.zeros(
            [replay_buffer_size, self.n_state * 2 + 2])
        self.memory_size = replay_buffer_size
        self.memory_count = 0
        self.state = tf.placeholder(tf.float32, [None, self.n_state])
        self.state_hat = tf.placeholder(tf.float32, [None, self.n_state])
        self.state_ = tf.placeholder(tf.float32, [None, self.n_state])
        self.rs_p = tf.placeholder(tf.float32, [None, 1])
        if sess is None:
            self.sess = tf.Session()
        else:
            self.sess = sess
        self.eval_collection_name = [
            'eval_net_collection', tf.GraphKeys.GLOBAL_VARIABLES
        ]
        self.target_collection_name = [
            'target_net_collection', tf.GraphKeys.GLOBAL_VARIABLES
        ]

        shutil.rmtree("./log")
        os.mkdir("./log")
        with tf.variable_scope('assign_op'):
            e_params = tf.get_collection('eval_collection_name')
            t_params = tf.get_collection('target_net_collection')
            self.assign_op = [
                tf.assign(t, e) for t, e in zip(t_params, e_params)
            ]
        with tf.variable_scope('eval_net'):
            self.eval_fai, self.eval_s_hat, self.eval_r_s, self.eval_M = self._build_net(
                self.eval_collection_name)
        with tf.variable_scope('target_net'):
            self.eval_fai, self.target_s_hat, self.target_r_s, self.target_M = self._build_net(
                self.target_collection_name)
        tf.summary.FileWriter("./log", self.sess.graph)
        self.sess.run(tf.global_variables_initializer())
Ejemplo n.º 5
0
 def build(self, guidence, newNet):
     with tf.variable_scope("training_variable"):
         inputEmb = tf.nn.embedding_lookup(self.embedding, self.X)
         initFw = tf.nn.rnn_cell.LSTMStateTuple(
             tf.nn.relu(
                 tf.matmul(guidence, self.weights["Fw1"]) +
                 self.biases["Fw1"]),
             tf.nn.relu(
                 tf.matmul(guidence, self.weights["Fw2"]) +
                 self.biases["Fw2"]))
         initBw = tf.nn.rnn_cell.LSTMStateTuple(
             tf.nn.relu(
                 tf.matmul(guidence, self.weights["Bw1"]) +
                 self.biases["Bw1"]),
             tf.nn.relu(
                 tf.matmul(guidence, self.weights["Bw2"]) +
                 self.biases["Bw2"]))
         rnnCellFw = tf.compat.v1.nn.rnn_cell.DropoutWrapper(
             tf.nn.rnn_cell.BasicLSTMCell(self.nHidden),
             input_keep_prob=self.pKeep,
             output_keep_prob=1.0)
         rnnCellBw = tf.compat.v1.nn.rnn_cell.DropoutWrapper(
             tf.nn.rnn_cell.BasicLSTMCell(self.nHidden),
             input_keep_prob=self.pKeep,
             output_keep_prob=1.0)
         outputs, state = tf.nn.bidirectional_dynamic_rnn(
             cell_fw=rnnCellFw,
             cell_bw=rnnCellBw,
             inputs=inputEmb,
             initial_state_fw=initFw,
             initial_state_bw=initBw,
             dtype=tf.float32)
         outputsConcat = tf.concat(outputs, axis=2)
         self.outputs = outputsConcat
         self.RNNState = tf.reduce_mean(outputsConcat, axis=1)
Ejemplo n.º 6
0
 def __init__(self, nHidden, seqLen, guidence, newNet):
     self.nHidden = nHidden
     self.seqLen = seqLen
     tmp = self.getEmbedding()
     self.embedding = tf.Variable(tmp)
     with tf.variable_scope("training_variable"):
         self.weights = {
             "ATT":
             tf.Variable(
                 tf.truncated_normal(shape=[2 * self.nHidden, self.nHidden],
                                     stddev=0.08,
                                     name="text_att")),
             "ATTG":
             tf.Variable(
                 tf.truncated_normal(shape=[200, self.nHidden],
                                     stddev=0.08,
                                     name="text_att2")),
             "ATTS":
             tf.Variable(
                 tf.truncated_normal(shape=[self.nHidden, 1],
                                     stddev=0.08,
                                     name="text_att3")),
             "Fw1":
             tf.Variable(
                 tf.truncated_normal(shape=[200, self.nHidden],
                                     stddev=0.08,
                                     name="init_fw1")),
             "Fw2":
             tf.Variable(
                 tf.truncated_normal(shape=[200, self.nHidden],
                                     stddev=0.08,
                                     name="init_fw2")),
             "Bw1":
             tf.Variable(
                 tf.truncated_normal(shape=[200, self.nHidden],
                                     stddev=0.08,
                                     name="init_bw1")),
             "Bw2":
             tf.Variable(
                 tf.truncated_normal(shape=[200, self.nHidden],
                                     stddev=0.08,
                                     name="init_bw2")),
         }
         self.biases = {
             "Fw1":
             tf.Variable(
                 tf.constant(0.01, shape=[self.nHidden], name="init_Fw1")),
             "Fw2":
             tf.Variable(
                 tf.constant(0.01, shape=[self.nHidden], name="init_Fw2")),
             "Bw1":
             tf.Variable(
                 tf.constant(0.01, shape=[self.nHidden], name="init_Bw1")),
             "Bw2":
             tf.Variable(
                 tf.constant(0.01, shape=[self.nHidden], name="init_Bw2")),
         }
     self.X = tf.placeholder(tf.int32, [None, self.seqLen])
     self.pKeep = tf.placeholder(tf.float32)
     self.build(guidence, newNet)
Ejemplo n.º 7
0
def model(hparams, X, past=None, scope='model', reuse=tf.AUTO_REUSE):
    with tf.variable_scope(scope, reuse=reuse):
        results = {}
        batch, sequence = shape_list(X)

        wpe = tf.get_variable('wpe', [hparams.n_ctx, hparams.n_embd],
                              initializer=tf.random_normal_initializer(stddev=0.01))
        wte = tf.get_variable('wte', [hparams.n_vocab, hparams.n_embd],
                              initializer=tf.random_normal_initializer(stddev=0.02))
        past_length = 0 if past is None else tf.shape(past)[-2]
        h = tf.gather(wte, X) + tf.gather(wpe, positions_for(X, past_length))

        # Transformer
        presents = []
        pasts = tf.unstack(past, axis=1) if past is not None else [None] * hparams.n_layer
        assert len(pasts) == hparams.n_layer
        for layer, past in enumerate(pasts):
            h, present = block(h, 'h%d' % layer, past=past, hparams=hparams)
            if layer == 10:
                tf.add_to_collection('checkpoints', h)
            presents.append(present)
        results['present'] = tf.stack(presents, axis=1)
        h = norm(h, 'ln_f')

        # Language model loss.  Do tokens <n predict token n?
        h_flat = tf.reshape(h, [batch * sequence, hparams.n_embd])
        logits = tf.matmul(h_flat, wte, transpose_b=True)
        logits = tf.reshape(logits, [batch, sequence, hparams.n_vocab])
        results['logits'] = logits
        return results
Ejemplo n.º 8
0
def block(x, scope, *, past, hparams):
    with tf.variable_scope(scope):
        # nx = x.shape[-1].value
        nx = x.shape[-1]
        a, present = attn(norm(x, 'ln_1'), 'attn', nx, past=past, hparams=hparams)
        x = x + a
        m = mlp(norm(x, 'ln_2'), 'mlp', nx * 4, hparams=hparams)
        x = x + m
        return x, present
Ejemplo n.º 9
0
def conv1d(x, scope, nf, *, w_init_stdev=0.02):
    with tf.variable_scope(scope):
        *start, nx = shape_list(x)
        w = tf.get_variable('w', [1, nx, nf],
                            initializer=tf.random_normal_initializer(stddev=w_init_stdev))
        b = tf.get_variable('b', [nf], initializer=tf.constant_initializer(0))
        c = tf.reshape(tf.matmul(tf.reshape(x, [-1, nx]), tf.reshape(w, [-1, nf])) + b,
                       start + [nf])
        return c
Ejemplo n.º 10
0
def norm(x, scope, *, axis=-1, epsilon=1e-5):
    """Normalize to mean = 0, std = 1, then do a diagonal affine transform."""
    with tf.variable_scope(scope):
        n_state = x.shape[-1]#.value
        g = tf.get_variable('g', [n_state], initializer=tf.constant_initializer(1))
        b = tf.get_variable('b', [n_state], initializer=tf.constant_initializer(0))
        u = tf.reduce_mean(x, axis=axis, keepdims=True)
        s = tf.reduce_mean(tf.square(x - u), axis=axis, keepdims=True)
        x = (x - u) * tf.rsqrt(s + epsilon)
        x = x * g + b
        return x
Ejemplo n.º 11
0
def top_p_logits(logits, p):
    with tf.variable_scope('top_p_logits'):
        logits_sort = tf.sort(logits, direction='DESCENDING')
        probs_sort = tf.nn.softmax(logits_sort)
        probs_sums = tf.cumsum(probs_sort, axis=1, exclusive=True)
        logits_masked = tf.where(probs_sums < p, logits_sort, tf.ones_like(logits_sort)*1000) # [batchsize, vocab]
        min_logits = tf.reduce_min(logits_masked, axis=1, keepdims=True) # [batchsize, 1]
        return tf.where(
            logits < min_logits,
            tf.ones_like(logits, dtype=logits.dtype) * -1e10,
            logits,
        )
Ejemplo n.º 12
0
    def build_learning_model(self):
        # with tf.variable_scope("Natural-DQN"):
        #     self.natural_dqn = DoubleDQN(self.action_space, self.n_features, memory_size=self.memory_size,
        #                                  e_greedy_increment=0.001, double_q=False, sess=self.sess, output_graph=True)

        with tf.variable_scope("Double-DQN"):
            self.double_dqn = DoubleDQN(self.action_space,
                                        self.n_features,
                                        memory_size=self.memory_size,
                                        double_q=True,
                                        sess=self.sess,
                                        e_greedy_increment=0.001,
                                        output_graph=True)
        self.sess.run(tf.global_variables_initializer())
Ejemplo n.º 13
0
    def __init__(self,
                 n_actions,
                 n_features,
                 learning_rate=0.005,
                 reward_decay=0.9,
                 replace_decay=0.9,
                 e_greedy=0.9,
                 replace_target_iter=200,
                 memory_size=3000,
                 batch_size=32,
                 e_greedy_increment=None,
                 output_graph=False,
                 double_q=True,
                 sess: tf.Session = None):
        self.n_actions = n_actions
        self.n_features = n_features
        self.learning_rate = learning_rate
        self.gamma = reward_decay
        self.replace_decay = replace_decay
        self.replace_target_iter = replace_target_iter
        self.epsilon_max = e_greedy
        self.memory_size = memory_size
        self.batch_size = batch_size
        self.e_greedy_increment = e_greedy_increment
        self.output_graph = output_graph
        self.double_q = double_q
        self.memory_counter = 0
        self.learn_step_counter = 0
        self.memory = np.zeros((self.memory_size, self.n_features * 2 + 2))
        self.epsilon = 0 if self.e_greedy_increment is not None else self.epsilon_max
        self._build_net()

        e_params = tf.get_collection('eval_net_params')
        t_params = tf.get_collection('target_net_params')
        with tf.variable_scope("assign_op"):
            self.replace_target_op = [
                tf.assign(t, e) for t, e in zip(t_params, e_params)
            ]

        if sess is None:
            self.sess = tf.Session()
            self.sess.run(tf.global_variables_initializer())
        else:
            self.sess = sess

        if output_graph:
            tf.summary.FileWriter("./logs/", self.sess.graph)
        self.cost_his = []  # 损失函数历史记录
Ejemplo n.º 14
0
def attn(x, scope, n_state, *, past, hparams):
    assert x.shape.ndims == 3  # Should be [batch, sequence, features]
    assert n_state % hparams.n_head == 0
    if past is not None:
        assert past.shape.ndims == 5  # Should be [batch, 2, heads, sequence, features],
        # where 2 is [k, v]

    def split_heads(x):
        # From [batch, sequence, features] to [batch, heads, sequence, features]
        return tf.transpose(split_states(x, hparams.n_head), [0, 2, 1, 3])

    def merge_heads(x):
        # Reverse of split_heads
        return merge_states(tf.transpose(x, [0, 2, 1, 3]))

    def mask_attn_weights(w):
        # w has shape [batch, heads, dst_sequence, src_sequence], where information flows from
        # src to dst.
        _, _, nd, ns = shape_list(w)
        b = attention_mask(nd, ns, dtype=w.dtype)
        b = tf.reshape(b, [1, 1, nd, ns])
        w = w * b - tf.cast(1e10, w.dtype) * (1 - b)
        return w

    def multihead_attn(q, k, v):
        # q, k, v have shape [batch, heads, sequence, features]
        w = tf.matmul(q, k, transpose_b=True)
        # w = w * tf.rsqrt(tf.cast(v.shape[-1].value, w.dtype))
        w = w * tf.rsqrt(tf.cast(v.shape[-1], w.dtype))

        w = mask_attn_weights(w)
        w = softmax(w)
        a = tf.matmul(w, v)
        return a

    with tf.variable_scope(scope):
        c = conv1d(x, 'c_attn', n_state * 3)
        q, k, v = map(split_heads, tf.split(c, 3, axis=2))
        present = tf.stack([k, v], axis=1)
        if past is not None:
            pk, pv = tf.unstack(past, axis=1)
            k = tf.concat([pk, k], axis=-2)
            v = tf.concat([pv, v], axis=-2)
        a = multihead_attn(q, k, v)
        a = merge_heads(a)
        a = conv1d(a, 'c_proj', n_state)
        return a, present
Ejemplo n.º 15
0
def _build_layer(input_dim,
                 output_dim,
                 input_data,
                 c_name,
                 layer_name,
                 w_name='w',
                 bias_name='b',
                 has_activate=True):
    with tf.variable_scope(layer_name):
        w = tf.get_variable(w_name, [input_dim, output_dim],
                            dtype=tf.float32,
                            initializer=tf.truncated_normal_initializer(),
                            collections=c_name)
        b = tf.get_variable(bias_name, [output_dim],
                            dtype=tf.float32,
                            initializer=tf.truncated_normal_initializer(),
                            collections=c_name)
        if has_activate:
            l = tf.nn.relu(tf.add(tf.matmul(input_data, w), b))
        else:
            l = tf.add(tf.matmul(input_data, w), b)
    return l
Ejemplo n.º 16
0
 def __init__(self):
     self.embedding = self.getEmb()
     self.embSize = self.embedding.shape[1]
     self.vocabSize = self.embedding.shape[0]
     self.x = tf.placeholder(tf.int32, [None, 5])
     with tf.variable_scope("training_variable"):
         self.weights = {
             "MLP1":
             tf.Variable(
                 tf.truncated_normal(
                     shape=[self.embSize,
                            int(self.embSize / 2)],
                     stddev=0.08)),
             "MLP2":
             tf.Variable(
                 tf.truncated_normal(shape=[int(self.embSize / 2), 1],
                                     stddev=0.08))
         }
         self.biases = {
             "MLP1":
             tf.Variable(
                 tf.constant(0.01,
                             shape=[int(self.embSize / 2)],
                             dtype=tf.float32)),
             "MLP2":
             tf.Variable(tf.constant(0.01, shape=[1], dtype=tf.float32))
         }
     self.inputEmb = tf.nn.embedding_lookup(self.embedding, self.x)
     p1 = tf.matmul(tf.reshape(self.inputEmb, [-1, self.embSize]),
                    self.weights["MLP1"]) + self.biases["MLP1"]
     p1 = tf.matmul(tf.nn.relu(p1),
                    self.weights["MLP2"]) + self.biases["MLP2"]
     p1 = tf.reshape(p1, [-1, 5])
     p1 = tf.reshape(tf.nn.softmax(p1), [-1, 1, 5])
     self.finalState = tf.reshape(tf.matmul(p1, self.inputEmb),
                                  [-1, self.embSize])
Ejemplo n.º 17
0
def mlp(x, scope, n_state, *, hparams):
    with tf.variable_scope(scope):
        nx = x.shape[-1]#.value
        h = gelu(conv1d(x, 'c_fc', n_state))
        h2 = conv1d(h, 'c_proj', nx)
        return h2
Ejemplo n.º 18
0
    def __init__(self, nHidden, seqLen):
        self.representation_score = {}
        self.y = tf.placeholder(tf.float32, shape=[None, 1])
        self.extractFeature = ExtractFeature.ExtractFeature()
        self.imageFeature = ImageFeature.ImageFeature()
        newNet = tf.reduce_mean(self.imageFeature.outputLS, axis=0)
        self.textFeature = TextFeature.TextFeature(
            nHidden, seqLen, self.extractFeature.finalState, newNet)
        self.l2_para = 1e-7
        with tf.variable_scope("training_variable"):

            self.weights = {
                "MLP1":
                tf.Variable(
                    tf.truncated_normal(shape=[512, 256],
                                        stddev=0.08,
                                        name="MLP1_W")),
                "MLP2":
                tf.Variable(
                    tf.truncated_normal(shape=[256, 1],
                                        stddev=0.08,
                                        name="MLP2_W")),
                "ATT_attr1_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.imageFeature.defaultFeatureSize +
                        self.extractFeature.embSize,
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.extractFeature.embSize / 2)
                    ],
                                        stddev=0.08,
                                        name="ATT_attr1_1")),
                "ATT_attr1_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.textFeature.nHidden * 2 +
                        self.extractFeature.embSize,
                        int(self.textFeature.nHidden +
                            self.extractFeature.embSize / 2)
                    ],
                                        stddev=0.08,
                                        name="ATT_attr1_2")),
                "ATT_attr1_3":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        2 * self.extractFeature.embSize,
                        self.extractFeature.embSize
                    ],
                                        stddev=0.08,
                                        name="ATT_attr1_3")),
                "ATT_attr2_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.extractFeature.embSize / 2), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_attr2_1")),
                "ATT_attr2_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.textFeature.nHidden +
                            self.extractFeature.embSize / 2), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_attr2_2")),
                "ATT_attr2_3":
                tf.Variable(
                    tf.truncated_normal(shape=[self.extractFeature.embSize, 1],
                                        stddev=0.08,
                                        name="ATT_attr2_3")),
                "ATT_img1_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.imageFeature.defaultFeatureSize +
                        self.textFeature.nHidden * 2,
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.textFeature.nHidden)
                    ],
                                        stddev=0.08,
                                        name="ATT_image1_1")),
                "ATT_img1_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.imageFeature.defaultFeatureSize +
                        self.extractFeature.embSize,
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.extractFeature.embSize / 2)
                    ],
                                        stddev=0.08,
                                        name="ATT_image1_2")),
                "ATT_img1_3":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.imageFeature.defaultFeatureSize * 2,
                        self.imageFeature.defaultFeatureSize
                    ],
                                        stddev=0.08,
                                        name="ATT_image1_3")),
                "ATT_img2_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.textFeature.nHidden), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_image2_1")),
                "ATT_img2_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.extractFeature.embSize / 2), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_image2_2")),
                "ATT_img2_3":
                tf.Variable(
                    tf.truncated_normal(
                        shape=[self.imageFeature.defaultFeatureSize, 1],
                        stddev=0.08,
                        name="ATT_image2_3")),
                "ATT_text1_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.imageFeature.defaultFeatureSize +
                        self.textFeature.nHidden * 2,
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.textFeature.nHidden)
                    ],
                                        stddev=0.08,
                                        name="ATT_text1_1")),
                "ATT_text1_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.textFeature.nHidden * 2 +
                        self.extractFeature.embSize,
                        int(self.textFeature.nHidden +
                            self.extractFeature.embSize / 2)
                    ],
                                        stddev=0.08,
                                        name="ATT_text1_2")),
                "ATT_text1_3":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.textFeature.nHidden * 4,
                        self.textFeature.nHidden * 2
                    ],
                                        stddev=0.08,
                                        name="ATT_text1_3")),
                "ATT_text2_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.textFeature.nHidden), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_text2_1")),
                "ATT_text2_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.textFeature.nHidden +
                            self.extractFeature.embSize / 2), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_text2_2")),
                "ATT_text2_3":
                tf.Variable(
                    tf.truncated_normal(
                        shape=[self.textFeature.nHidden * 2, 1],
                        stddev=0.08,
                        name="ATT_text2_3")),
                "ATT_WI1":
                tf.Variable(
                    tf.truncated_normal(
                        shape=[self.imageFeature.defaultFeatureSize, 512],
                        stddev=0.08,
                        name="ATT_WI")),
                "ATT_WT1":
                tf.Variable(
                    tf.truncated_normal(shape=[2 * nHidden, 512],
                                        stddev=0.08,
                                        name="ATT_WT")),
                "ATT_WA1":
                tf.Variable(
                    tf.truncated_normal(shape=[200, 512],
                                        stddev=0.08,
                                        name="ATT_WA")),
                "ATT_WI2":
                tf.Variable(
                    tf.truncated_normal(
                        shape=[self.imageFeature.defaultFeatureSize, 512],
                        stddev=0.08,
                        name="ATT_WI2")),
                "ATT_WT2":
                tf.Variable(
                    tf.truncated_normal(shape=[2 * nHidden, 512],
                                        stddev=0.08,
                                        name="ATT_WT2")),
                "ATT_WA2":
                tf.Variable(
                    tf.truncated_normal(shape=[200, 512],
                                        stddev=0.08,
                                        name="ATT_WA2")),
                "ATT_WF_1":
                tf.Variable(
                    tf.truncated_normal(shape=[512, 1],
                                        stddev=0.08,
                                        name="ATT_WF_1")),
                "ATT_WF_2":
                tf.Variable(
                    tf.truncated_normal(shape=[512, 1],
                                        stddev=0.08,
                                        name="ATT_WF_2")),
                "ATT_WF_3":
                tf.Variable(
                    tf.truncated_normal(shape=[512, 1],
                                        stddev=0.08,
                                        name="ATT_WF_3")),
            }
            self.biases = {
                "MLP1":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[256],
                                dtype=tf.float32,
                                name="MLP1_b")),
                "MLP2":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[1],
                                dtype=tf.float32,
                                name="MLP2_b")),
                "ATT_attr1_1":
                tf.Variable(
                    tf.constant(
                        0.01,
                        shape=[
                            int(self.imageFeature.defaultFeatureSize / 2 +
                                self.extractFeature.embSize / 2)
                        ],
                        name="ATT_attr1_1")),
                "ATT_attr1_2":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[
                                    int(self.textFeature.nHidden +
                                        self.extractFeature.embSize / 2)
                                ],
                                name="ATT_attr1_2")),
                "ATT_attr1_3":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[self.extractFeature.embSize],
                                name="ATT_attr1_3")),
                "ATT_attr2_1":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_1")),
                "ATT_attr2_2":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_2")),
                "ATT_attr2_3":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_3")),
                "ATT_img1_1":
                tf.Variable(
                    tf.constant(
                        0.01,
                        shape=[
                            int(self.imageFeature.defaultFeatureSize / 2 +
                                self.textFeature.nHidden)
                        ],
                        name="ATT_image1_1")),
                "ATT_img1_2":
                tf.Variable(
                    tf.constant(
                        0.01,
                        shape=[
                            int(self.imageFeature.defaultFeatureSize / 2 +
                                self.extractFeature.embSize / 2)
                        ],
                        name="ATT_image1_2")),
                "ATT_img1_3":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[self.imageFeature.defaultFeatureSize],
                                name="ATT_image1_3")),
                "ATT_img2_1":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_1")),
                "ATT_img2_2":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_2")),
                "ATT_img2_3":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_3")),
                "ATT_text1_1":
                tf.Variable(
                    tf.constant(
                        0.01,
                        shape=[
                            int(self.imageFeature.defaultFeatureSize / 2 +
                                self.textFeature.nHidden)
                        ],
                        name="ATT_text1_1")),
                "ATT_text1_2":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[
                                    int(self.textFeature.nHidden +
                                        self.extractFeature.embSize / 2)
                                ],
                                name="ATT_text1_2")),
                "ATT_text1_3":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[self.textFeature.nHidden * 2],
                                name="ATT_text1_3")),
                "ATT_text2_1":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_1")),
                "ATT_text2_2":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_2")),
                "ATT_text2_3":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_3")),
                "ATT_WW":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WW")),
                "ATT_WI":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WI")),
                "ATT_WT":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WT")),
                "ATT_WI1":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WI1")),
                "ATT_WT1":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WT1")),
                "ATT_WA":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WA")),
                "ATT_WF_1":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_1")),
                "ATT_WF_2":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_2")),
                "ATT_WF_3":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_3")),
            }
        print("newnet dimension :", newNet)

        imageVec = self.Attention(newNet, self.imageFeature.outputLS,
                                  self.textFeature.RNNState,
                                  self.extractFeature.finalState, "ATT_img1",
                                  "ATT_img2", 196, True)
        textVec = self.Attention(self.textFeature.RNNState,
                                 self.textFeature.outputs, newNet,
                                 self.extractFeature.finalState, "ATT_text1",
                                 "ATT_text2", self.textFeature.seqLen, False)
        attrVec = self.Attention(self.extractFeature.finalState,
                                 self.extractFeature.inputEmb, newNet,
                                 self.textFeature.RNNState, "ATT_attr1",
                                 "ATT_attr2", 5, False)

        attHidden = tf.tanh(
            tf.matmul(imageVec, self.weights["ATT_WI1"]) +
            self.biases["ATT_WI1"])
        attHidden2 = tf.tanh(
            tf.matmul(textVec, self.weights["ATT_WT1"]) +
            self.biases["ATT_WT1"])
        attHidden3 = tf.tanh(
            tf.matmul(attrVec, self.weights["ATT_WA1"]) +
            self.biases["ATT_WW"])
        scores1 = tf.matmul(attHidden,
                            self.weights["ATT_WF_1"]) + self.biases["ATT_WF_1"]
        scores2 = tf.matmul(attHidden2,
                            self.weights["ATT_WF_2"]) + self.biases["ATT_WF_2"]
        scores3 = tf.matmul(attHidden3,
                            self.weights["ATT_WF_3"]) + self.biases["ATT_WF_3"]
        scoreLS = [scores1, scores2, scores3]
        scoreLS = tf.nn.softmax(scoreLS, dim=0)
        imageVec = tf.tanh(
            tf.matmul(imageVec, self.weights["ATT_WI2"]) +
            self.biases["ATT_WI"])
        textVec = tf.tanh(
            tf.matmul(textVec, self.weights["ATT_WT2"]) +
            self.biases["ATT_WT"])
        attrVec = tf.tanh(
            tf.matmul(attrVec, self.weights["ATT_WA2"]) +
            self.biases["ATT_WA"])
        self.concatInput = scoreLS[0] * imageVec + scoreLS[
            1] * textVec + scoreLS[2] * attrVec