def _build_net(self, c_name): # 这里我们将一维的state进行one-hot编码处理, 因此输入的维度是1*n_state with tf.variable_scope('f_theta'): l1 = _build_layer(self.n_state, 128, self.state, c_name, 'l1') l2 = _build_layer(128, 512, l1, c_name, 'l2') l3 = _build_layer(512, 1024, l2, c_name, 'l3') l4 = _build_layer(1024, 512, l3, c_name, 'fai_s') with tf.variable_scope('g_theta_'): dl1 = _build_layer(512, 1024, l4, c_name, 'dl1') dl2 = _build_layer(1024, 512, dl1, c_name, 'dl2') dl3 = _build_layer(512, 128, dl2, c_name, 'dl3') s_hat = _build_layer(128, self.n_state, dl3, c_name, 'dl4') with tf.variable_scope('R_s'): r_s = _build_layer(512, 1, l4, c_name, 'r', has_activate=False) with tf.variable_scope("mu_alpha"): M = np.stack([ np.zeros(self.fai_s_size, object) for i in range(self.n_action) ]) for i in range(self.n_action): miu_layer = _build_layer( 256, 512, _build_layer( 512, 256, _build_layer(512, 512, l4, c_name, 'm%s-1' % i, 'miu_alpha_l1_w', 'miu_alpha_l1_b'), c_name, 'm%s-2' % i, 'miu_alpha_l2_w', 'miu_alpha_l2_b'), c_name, 'm%s-3' % i, 'miu_alpha_l3_w', 'miu_alpha_l3_b') M[i] = miu_layer return l4, s_hat, r_s, M
def build_layer(s, c_names, n_l1, n_l2, w_initializer, b_initializer): with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) l1 = tf.nn.relu(tf.matmul(s, w1) + b1) with tf.variable_scope('l2'): w2 = tf.get_variable('w2', [n_l1, n_l2], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, n_l2], initializer=b_initializer, collections=c_names) l2 = tf.nn.relu(tf.matmul(l1, w2) + b2) with tf.variable_scope('l3'): w3 = tf.get_variable('w3', [n_l2, self.n_actions], initializer=w_initializer, collections=c_names) b3 = tf.get_variable('b3', [1, self.n_actions], initializer=b_initializer, collections=c_names) l3 = tf.nn.relu(tf.matmul(l2, w3) + b3) return l3
def _build_net(self): # Building the structure of neural network. def build_layer(s, c_names, n_l1, n_l2, w_initializer, b_initializer): with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) l1 = tf.nn.relu(tf.matmul(s, w1) + b1) with tf.variable_scope('l2'): w2 = tf.get_variable('w2', [n_l1, n_l2], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, n_l2], initializer=b_initializer, collections=c_names) l2 = tf.nn.relu(tf.matmul(l1, w2) + b2) with tf.variable_scope('l3'): w3 = tf.get_variable('w3', [n_l2, self.n_actions], initializer=w_initializer, collections=c_names) b3 = tf.get_variable('b3', [1, self.n_actions], initializer=b_initializer, collections=c_names) l3 = tf.nn.relu(tf.matmul(l2, w3) + b3) return l3 # Building the evaluate net self.state = tf.placeholder(tf.float32, [None, self.n_features], name='state') self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='q_target') # expect output with tf.variable_scope('eval_net'): c_names, n_l1, n_l2, w_initializer, b_initializer = [ 'eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES ], 64, 64, tf.random_normal_initializer( 0.0, 0.3), tf.random_normal_initializer(0., 0.3) self.q_eval = build_layer(self.state, c_names, n_l1, n_l2, w_initializer, b_initializer) with tf.variable_scope('loss'): self.loss = tf.reduce_mean( tf.squared_difference(self.q_target, self.q_eval)) with tf.variable_scope('train'): self._train_op = tf.train.RMSPropOptimizer( self.learning_rate).minimize(self.loss) # Building the target net. self.state_ = tf.placeholder(tf.float32, [None, self.n_features], name='state_') with tf.variable_scope('target_net'): c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES] self.q_next = build_layer(self.state_, c_names, n_l1, n_l2, w_initializer, b_initializer)
def __init__(self, n_state, n_action, learning_rate, gamma, replay_buffer_size=3000, sess: tf.Session = None): self.n_state = n_state self.n_action = n_action self.fai_s_size = 512 # shape: (state_size, action_size) self.w = np.zeros([n_state]) self.learning_rate = learning_rate self.gamma = gamma self.replay_buffer = np.zeros( [replay_buffer_size, self.n_state * 2 + 2]) self.memory_size = replay_buffer_size self.memory_count = 0 self.state = tf.placeholder(tf.float32, [None, self.n_state]) self.state_hat = tf.placeholder(tf.float32, [None, self.n_state]) self.state_ = tf.placeholder(tf.float32, [None, self.n_state]) self.rs_p = tf.placeholder(tf.float32, [None, 1]) if sess is None: self.sess = tf.Session() else: self.sess = sess self.eval_collection_name = [ 'eval_net_collection', tf.GraphKeys.GLOBAL_VARIABLES ] self.target_collection_name = [ 'target_net_collection', tf.GraphKeys.GLOBAL_VARIABLES ] shutil.rmtree("./log") os.mkdir("./log") with tf.variable_scope('assign_op'): e_params = tf.get_collection('eval_collection_name') t_params = tf.get_collection('target_net_collection') self.assign_op = [ tf.assign(t, e) for t, e in zip(t_params, e_params) ] with tf.variable_scope('eval_net'): self.eval_fai, self.eval_s_hat, self.eval_r_s, self.eval_M = self._build_net( self.eval_collection_name) with tf.variable_scope('target_net'): self.eval_fai, self.target_s_hat, self.target_r_s, self.target_M = self._build_net( self.target_collection_name) tf.summary.FileWriter("./log", self.sess.graph) self.sess.run(tf.global_variables_initializer())
def build(self, guidence, newNet): with tf.variable_scope("training_variable"): inputEmb = tf.nn.embedding_lookup(self.embedding, self.X) initFw = tf.nn.rnn_cell.LSTMStateTuple( tf.nn.relu( tf.matmul(guidence, self.weights["Fw1"]) + self.biases["Fw1"]), tf.nn.relu( tf.matmul(guidence, self.weights["Fw2"]) + self.biases["Fw2"])) initBw = tf.nn.rnn_cell.LSTMStateTuple( tf.nn.relu( tf.matmul(guidence, self.weights["Bw1"]) + self.biases["Bw1"]), tf.nn.relu( tf.matmul(guidence, self.weights["Bw2"]) + self.biases["Bw2"])) rnnCellFw = tf.compat.v1.nn.rnn_cell.DropoutWrapper( tf.nn.rnn_cell.BasicLSTMCell(self.nHidden), input_keep_prob=self.pKeep, output_keep_prob=1.0) rnnCellBw = tf.compat.v1.nn.rnn_cell.DropoutWrapper( tf.nn.rnn_cell.BasicLSTMCell(self.nHidden), input_keep_prob=self.pKeep, output_keep_prob=1.0) outputs, state = tf.nn.bidirectional_dynamic_rnn( cell_fw=rnnCellFw, cell_bw=rnnCellBw, inputs=inputEmb, initial_state_fw=initFw, initial_state_bw=initBw, dtype=tf.float32) outputsConcat = tf.concat(outputs, axis=2) self.outputs = outputsConcat self.RNNState = tf.reduce_mean(outputsConcat, axis=1)
def __init__(self, nHidden, seqLen, guidence, newNet): self.nHidden = nHidden self.seqLen = seqLen tmp = self.getEmbedding() self.embedding = tf.Variable(tmp) with tf.variable_scope("training_variable"): self.weights = { "ATT": tf.Variable( tf.truncated_normal(shape=[2 * self.nHidden, self.nHidden], stddev=0.08, name="text_att")), "ATTG": tf.Variable( tf.truncated_normal(shape=[200, self.nHidden], stddev=0.08, name="text_att2")), "ATTS": tf.Variable( tf.truncated_normal(shape=[self.nHidden, 1], stddev=0.08, name="text_att3")), "Fw1": tf.Variable( tf.truncated_normal(shape=[200, self.nHidden], stddev=0.08, name="init_fw1")), "Fw2": tf.Variable( tf.truncated_normal(shape=[200, self.nHidden], stddev=0.08, name="init_fw2")), "Bw1": tf.Variable( tf.truncated_normal(shape=[200, self.nHidden], stddev=0.08, name="init_bw1")), "Bw2": tf.Variable( tf.truncated_normal(shape=[200, self.nHidden], stddev=0.08, name="init_bw2")), } self.biases = { "Fw1": tf.Variable( tf.constant(0.01, shape=[self.nHidden], name="init_Fw1")), "Fw2": tf.Variable( tf.constant(0.01, shape=[self.nHidden], name="init_Fw2")), "Bw1": tf.Variable( tf.constant(0.01, shape=[self.nHidden], name="init_Bw1")), "Bw2": tf.Variable( tf.constant(0.01, shape=[self.nHidden], name="init_Bw2")), } self.X = tf.placeholder(tf.int32, [None, self.seqLen]) self.pKeep = tf.placeholder(tf.float32) self.build(guidence, newNet)
def model(hparams, X, past=None, scope='model', reuse=tf.AUTO_REUSE): with tf.variable_scope(scope, reuse=reuse): results = {} batch, sequence = shape_list(X) wpe = tf.get_variable('wpe', [hparams.n_ctx, hparams.n_embd], initializer=tf.random_normal_initializer(stddev=0.01)) wte = tf.get_variable('wte', [hparams.n_vocab, hparams.n_embd], initializer=tf.random_normal_initializer(stddev=0.02)) past_length = 0 if past is None else tf.shape(past)[-2] h = tf.gather(wte, X) + tf.gather(wpe, positions_for(X, past_length)) # Transformer presents = [] pasts = tf.unstack(past, axis=1) if past is not None else [None] * hparams.n_layer assert len(pasts) == hparams.n_layer for layer, past in enumerate(pasts): h, present = block(h, 'h%d' % layer, past=past, hparams=hparams) if layer == 10: tf.add_to_collection('checkpoints', h) presents.append(present) results['present'] = tf.stack(presents, axis=1) h = norm(h, 'ln_f') # Language model loss. Do tokens <n predict token n? h_flat = tf.reshape(h, [batch * sequence, hparams.n_embd]) logits = tf.matmul(h_flat, wte, transpose_b=True) logits = tf.reshape(logits, [batch, sequence, hparams.n_vocab]) results['logits'] = logits return results
def block(x, scope, *, past, hparams): with tf.variable_scope(scope): # nx = x.shape[-1].value nx = x.shape[-1] a, present = attn(norm(x, 'ln_1'), 'attn', nx, past=past, hparams=hparams) x = x + a m = mlp(norm(x, 'ln_2'), 'mlp', nx * 4, hparams=hparams) x = x + m return x, present
def conv1d(x, scope, nf, *, w_init_stdev=0.02): with tf.variable_scope(scope): *start, nx = shape_list(x) w = tf.get_variable('w', [1, nx, nf], initializer=tf.random_normal_initializer(stddev=w_init_stdev)) b = tf.get_variable('b', [nf], initializer=tf.constant_initializer(0)) c = tf.reshape(tf.matmul(tf.reshape(x, [-1, nx]), tf.reshape(w, [-1, nf])) + b, start + [nf]) return c
def norm(x, scope, *, axis=-1, epsilon=1e-5): """Normalize to mean = 0, std = 1, then do a diagonal affine transform.""" with tf.variable_scope(scope): n_state = x.shape[-1]#.value g = tf.get_variable('g', [n_state], initializer=tf.constant_initializer(1)) b = tf.get_variable('b', [n_state], initializer=tf.constant_initializer(0)) u = tf.reduce_mean(x, axis=axis, keepdims=True) s = tf.reduce_mean(tf.square(x - u), axis=axis, keepdims=True) x = (x - u) * tf.rsqrt(s + epsilon) x = x * g + b return x
def top_p_logits(logits, p): with tf.variable_scope('top_p_logits'): logits_sort = tf.sort(logits, direction='DESCENDING') probs_sort = tf.nn.softmax(logits_sort) probs_sums = tf.cumsum(probs_sort, axis=1, exclusive=True) logits_masked = tf.where(probs_sums < p, logits_sort, tf.ones_like(logits_sort)*1000) # [batchsize, vocab] min_logits = tf.reduce_min(logits_masked, axis=1, keepdims=True) # [batchsize, 1] return tf.where( logits < min_logits, tf.ones_like(logits, dtype=logits.dtype) * -1e10, logits, )
def build_learning_model(self): # with tf.variable_scope("Natural-DQN"): # self.natural_dqn = DoubleDQN(self.action_space, self.n_features, memory_size=self.memory_size, # e_greedy_increment=0.001, double_q=False, sess=self.sess, output_graph=True) with tf.variable_scope("Double-DQN"): self.double_dqn = DoubleDQN(self.action_space, self.n_features, memory_size=self.memory_size, double_q=True, sess=self.sess, e_greedy_increment=0.001, output_graph=True) self.sess.run(tf.global_variables_initializer())
def __init__(self, n_actions, n_features, learning_rate=0.005, reward_decay=0.9, replace_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=3000, batch_size=32, e_greedy_increment=None, output_graph=False, double_q=True, sess: tf.Session = None): self.n_actions = n_actions self.n_features = n_features self.learning_rate = learning_rate self.gamma = reward_decay self.replace_decay = replace_decay self.replace_target_iter = replace_target_iter self.epsilon_max = e_greedy self.memory_size = memory_size self.batch_size = batch_size self.e_greedy_increment = e_greedy_increment self.output_graph = output_graph self.double_q = double_q self.memory_counter = 0 self.learn_step_counter = 0 self.memory = np.zeros((self.memory_size, self.n_features * 2 + 2)) self.epsilon = 0 if self.e_greedy_increment is not None else self.epsilon_max self._build_net() e_params = tf.get_collection('eval_net_params') t_params = tf.get_collection('target_net_params') with tf.variable_scope("assign_op"): self.replace_target_op = [ tf.assign(t, e) for t, e in zip(t_params, e_params) ] if sess is None: self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) else: self.sess = sess if output_graph: tf.summary.FileWriter("./logs/", self.sess.graph) self.cost_his = [] # 损失函数历史记录
def attn(x, scope, n_state, *, past, hparams): assert x.shape.ndims == 3 # Should be [batch, sequence, features] assert n_state % hparams.n_head == 0 if past is not None: assert past.shape.ndims == 5 # Should be [batch, 2, heads, sequence, features], # where 2 is [k, v] def split_heads(x): # From [batch, sequence, features] to [batch, heads, sequence, features] return tf.transpose(split_states(x, hparams.n_head), [0, 2, 1, 3]) def merge_heads(x): # Reverse of split_heads return merge_states(tf.transpose(x, [0, 2, 1, 3])) def mask_attn_weights(w): # w has shape [batch, heads, dst_sequence, src_sequence], where information flows from # src to dst. _, _, nd, ns = shape_list(w) b = attention_mask(nd, ns, dtype=w.dtype) b = tf.reshape(b, [1, 1, nd, ns]) w = w * b - tf.cast(1e10, w.dtype) * (1 - b) return w def multihead_attn(q, k, v): # q, k, v have shape [batch, heads, sequence, features] w = tf.matmul(q, k, transpose_b=True) # w = w * tf.rsqrt(tf.cast(v.shape[-1].value, w.dtype)) w = w * tf.rsqrt(tf.cast(v.shape[-1], w.dtype)) w = mask_attn_weights(w) w = softmax(w) a = tf.matmul(w, v) return a with tf.variable_scope(scope): c = conv1d(x, 'c_attn', n_state * 3) q, k, v = map(split_heads, tf.split(c, 3, axis=2)) present = tf.stack([k, v], axis=1) if past is not None: pk, pv = tf.unstack(past, axis=1) k = tf.concat([pk, k], axis=-2) v = tf.concat([pv, v], axis=-2) a = multihead_attn(q, k, v) a = merge_heads(a) a = conv1d(a, 'c_proj', n_state) return a, present
def _build_layer(input_dim, output_dim, input_data, c_name, layer_name, w_name='w', bias_name='b', has_activate=True): with tf.variable_scope(layer_name): w = tf.get_variable(w_name, [input_dim, output_dim], dtype=tf.float32, initializer=tf.truncated_normal_initializer(), collections=c_name) b = tf.get_variable(bias_name, [output_dim], dtype=tf.float32, initializer=tf.truncated_normal_initializer(), collections=c_name) if has_activate: l = tf.nn.relu(tf.add(tf.matmul(input_data, w), b)) else: l = tf.add(tf.matmul(input_data, w), b) return l
def __init__(self): self.embedding = self.getEmb() self.embSize = self.embedding.shape[1] self.vocabSize = self.embedding.shape[0] self.x = tf.placeholder(tf.int32, [None, 5]) with tf.variable_scope("training_variable"): self.weights = { "MLP1": tf.Variable( tf.truncated_normal( shape=[self.embSize, int(self.embSize / 2)], stddev=0.08)), "MLP2": tf.Variable( tf.truncated_normal(shape=[int(self.embSize / 2), 1], stddev=0.08)) } self.biases = { "MLP1": tf.Variable( tf.constant(0.01, shape=[int(self.embSize / 2)], dtype=tf.float32)), "MLP2": tf.Variable(tf.constant(0.01, shape=[1], dtype=tf.float32)) } self.inputEmb = tf.nn.embedding_lookup(self.embedding, self.x) p1 = tf.matmul(tf.reshape(self.inputEmb, [-1, self.embSize]), self.weights["MLP1"]) + self.biases["MLP1"] p1 = tf.matmul(tf.nn.relu(p1), self.weights["MLP2"]) + self.biases["MLP2"] p1 = tf.reshape(p1, [-1, 5]) p1 = tf.reshape(tf.nn.softmax(p1), [-1, 1, 5]) self.finalState = tf.reshape(tf.matmul(p1, self.inputEmb), [-1, self.embSize])
def mlp(x, scope, n_state, *, hparams): with tf.variable_scope(scope): nx = x.shape[-1]#.value h = gelu(conv1d(x, 'c_fc', n_state)) h2 = conv1d(h, 'c_proj', nx) return h2
def __init__(self, nHidden, seqLen): self.representation_score = {} self.y = tf.placeholder(tf.float32, shape=[None, 1]) self.extractFeature = ExtractFeature.ExtractFeature() self.imageFeature = ImageFeature.ImageFeature() newNet = tf.reduce_mean(self.imageFeature.outputLS, axis=0) self.textFeature = TextFeature.TextFeature( nHidden, seqLen, self.extractFeature.finalState, newNet) self.l2_para = 1e-7 with tf.variable_scope("training_variable"): self.weights = { "MLP1": tf.Variable( tf.truncated_normal(shape=[512, 256], stddev=0.08, name="MLP1_W")), "MLP2": tf.Variable( tf.truncated_normal(shape=[256, 1], stddev=0.08, name="MLP2_W")), "ATT_attr1_1": tf.Variable( tf.truncated_normal(shape=[ self.imageFeature.defaultFeatureSize + self.extractFeature.embSize, int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2) ], stddev=0.08, name="ATT_attr1_1")), "ATT_attr1_2": tf.Variable( tf.truncated_normal(shape=[ self.textFeature.nHidden * 2 + self.extractFeature.embSize, int(self.textFeature.nHidden + self.extractFeature.embSize / 2) ], stddev=0.08, name="ATT_attr1_2")), "ATT_attr1_3": tf.Variable( tf.truncated_normal(shape=[ 2 * self.extractFeature.embSize, self.extractFeature.embSize ], stddev=0.08, name="ATT_attr1_3")), "ATT_attr2_1": tf.Variable( tf.truncated_normal(shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2), 1 ], stddev=0.08, name="ATT_attr2_1")), "ATT_attr2_2": tf.Variable( tf.truncated_normal(shape=[ int(self.textFeature.nHidden + self.extractFeature.embSize / 2), 1 ], stddev=0.08, name="ATT_attr2_2")), "ATT_attr2_3": tf.Variable( tf.truncated_normal(shape=[self.extractFeature.embSize, 1], stddev=0.08, name="ATT_attr2_3")), "ATT_img1_1": tf.Variable( tf.truncated_normal(shape=[ self.imageFeature.defaultFeatureSize + self.textFeature.nHidden * 2, int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden) ], stddev=0.08, name="ATT_image1_1")), "ATT_img1_2": tf.Variable( tf.truncated_normal(shape=[ self.imageFeature.defaultFeatureSize + self.extractFeature.embSize, int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2) ], stddev=0.08, name="ATT_image1_2")), "ATT_img1_3": tf.Variable( tf.truncated_normal(shape=[ self.imageFeature.defaultFeatureSize * 2, self.imageFeature.defaultFeatureSize ], stddev=0.08, name="ATT_image1_3")), "ATT_img2_1": tf.Variable( tf.truncated_normal(shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden), 1 ], stddev=0.08, name="ATT_image2_1")), "ATT_img2_2": tf.Variable( tf.truncated_normal(shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2), 1 ], stddev=0.08, name="ATT_image2_2")), "ATT_img2_3": tf.Variable( tf.truncated_normal( shape=[self.imageFeature.defaultFeatureSize, 1], stddev=0.08, name="ATT_image2_3")), "ATT_text1_1": tf.Variable( tf.truncated_normal(shape=[ self.imageFeature.defaultFeatureSize + self.textFeature.nHidden * 2, int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden) ], stddev=0.08, name="ATT_text1_1")), "ATT_text1_2": tf.Variable( tf.truncated_normal(shape=[ self.textFeature.nHidden * 2 + self.extractFeature.embSize, int(self.textFeature.nHidden + self.extractFeature.embSize / 2) ], stddev=0.08, name="ATT_text1_2")), "ATT_text1_3": tf.Variable( tf.truncated_normal(shape=[ self.textFeature.nHidden * 4, self.textFeature.nHidden * 2 ], stddev=0.08, name="ATT_text1_3")), "ATT_text2_1": tf.Variable( tf.truncated_normal(shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden), 1 ], stddev=0.08, name="ATT_text2_1")), "ATT_text2_2": tf.Variable( tf.truncated_normal(shape=[ int(self.textFeature.nHidden + self.extractFeature.embSize / 2), 1 ], stddev=0.08, name="ATT_text2_2")), "ATT_text2_3": tf.Variable( tf.truncated_normal( shape=[self.textFeature.nHidden * 2, 1], stddev=0.08, name="ATT_text2_3")), "ATT_WI1": tf.Variable( tf.truncated_normal( shape=[self.imageFeature.defaultFeatureSize, 512], stddev=0.08, name="ATT_WI")), "ATT_WT1": tf.Variable( tf.truncated_normal(shape=[2 * nHidden, 512], stddev=0.08, name="ATT_WT")), "ATT_WA1": tf.Variable( tf.truncated_normal(shape=[200, 512], stddev=0.08, name="ATT_WA")), "ATT_WI2": tf.Variable( tf.truncated_normal( shape=[self.imageFeature.defaultFeatureSize, 512], stddev=0.08, name="ATT_WI2")), "ATT_WT2": tf.Variable( tf.truncated_normal(shape=[2 * nHidden, 512], stddev=0.08, name="ATT_WT2")), "ATT_WA2": tf.Variable( tf.truncated_normal(shape=[200, 512], stddev=0.08, name="ATT_WA2")), "ATT_WF_1": tf.Variable( tf.truncated_normal(shape=[512, 1], stddev=0.08, name="ATT_WF_1")), "ATT_WF_2": tf.Variable( tf.truncated_normal(shape=[512, 1], stddev=0.08, name="ATT_WF_2")), "ATT_WF_3": tf.Variable( tf.truncated_normal(shape=[512, 1], stddev=0.08, name="ATT_WF_3")), } self.biases = { "MLP1": tf.Variable( tf.constant(0.01, shape=[256], dtype=tf.float32, name="MLP1_b")), "MLP2": tf.Variable( tf.constant(0.01, shape=[1], dtype=tf.float32, name="MLP2_b")), "ATT_attr1_1": tf.Variable( tf.constant( 0.01, shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2) ], name="ATT_attr1_1")), "ATT_attr1_2": tf.Variable( tf.constant(0.01, shape=[ int(self.textFeature.nHidden + self.extractFeature.embSize / 2) ], name="ATT_attr1_2")), "ATT_attr1_3": tf.Variable( tf.constant(0.01, shape=[self.extractFeature.embSize], name="ATT_attr1_3")), "ATT_attr2_1": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_1")), "ATT_attr2_2": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_2")), "ATT_attr2_3": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_3")), "ATT_img1_1": tf.Variable( tf.constant( 0.01, shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden) ], name="ATT_image1_1")), "ATT_img1_2": tf.Variable( tf.constant( 0.01, shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2) ], name="ATT_image1_2")), "ATT_img1_3": tf.Variable( tf.constant(0.01, shape=[self.imageFeature.defaultFeatureSize], name="ATT_image1_3")), "ATT_img2_1": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_1")), "ATT_img2_2": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_2")), "ATT_img2_3": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_3")), "ATT_text1_1": tf.Variable( tf.constant( 0.01, shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden) ], name="ATT_text1_1")), "ATT_text1_2": tf.Variable( tf.constant(0.01, shape=[ int(self.textFeature.nHidden + self.extractFeature.embSize / 2) ], name="ATT_text1_2")), "ATT_text1_3": tf.Variable( tf.constant(0.01, shape=[self.textFeature.nHidden * 2], name="ATT_text1_3")), "ATT_text2_1": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_1")), "ATT_text2_2": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_2")), "ATT_text2_3": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_3")), "ATT_WW": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WW")), "ATT_WI": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WI")), "ATT_WT": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WT")), "ATT_WI1": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WI1")), "ATT_WT1": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WT1")), "ATT_WA": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WA")), "ATT_WF_1": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_1")), "ATT_WF_2": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_2")), "ATT_WF_3": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_3")), } print("newnet dimension :", newNet) imageVec = self.Attention(newNet, self.imageFeature.outputLS, self.textFeature.RNNState, self.extractFeature.finalState, "ATT_img1", "ATT_img2", 196, True) textVec = self.Attention(self.textFeature.RNNState, self.textFeature.outputs, newNet, self.extractFeature.finalState, "ATT_text1", "ATT_text2", self.textFeature.seqLen, False) attrVec = self.Attention(self.extractFeature.finalState, self.extractFeature.inputEmb, newNet, self.textFeature.RNNState, "ATT_attr1", "ATT_attr2", 5, False) attHidden = tf.tanh( tf.matmul(imageVec, self.weights["ATT_WI1"]) + self.biases["ATT_WI1"]) attHidden2 = tf.tanh( tf.matmul(textVec, self.weights["ATT_WT1"]) + self.biases["ATT_WT1"]) attHidden3 = tf.tanh( tf.matmul(attrVec, self.weights["ATT_WA1"]) + self.biases["ATT_WW"]) scores1 = tf.matmul(attHidden, self.weights["ATT_WF_1"]) + self.biases["ATT_WF_1"] scores2 = tf.matmul(attHidden2, self.weights["ATT_WF_2"]) + self.biases["ATT_WF_2"] scores3 = tf.matmul(attHidden3, self.weights["ATT_WF_3"]) + self.biases["ATT_WF_3"] scoreLS = [scores1, scores2, scores3] scoreLS = tf.nn.softmax(scoreLS, dim=0) imageVec = tf.tanh( tf.matmul(imageVec, self.weights["ATT_WI2"]) + self.biases["ATT_WI"]) textVec = tf.tanh( tf.matmul(textVec, self.weights["ATT_WT2"]) + self.biases["ATT_WT"]) attrVec = tf.tanh( tf.matmul(attrVec, self.weights["ATT_WA2"]) + self.biases["ATT_WA"]) self.concatInput = scoreLS[0] * imageVec + scoreLS[ 1] * textVec + scoreLS[2] * attrVec