def __variable_summaries(self, var): mean = tf.reduce_mean(var) stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) tf.summary.scalar('min', tf.reduce_min(var)) tf.summary.scalar('max', tf.reduce_max(var)) tf.summary.scalar('mean', mean) tf.summary.scalar('stddev', stddev) tf.summary.histogram('histogram', var)
def norm(x, scope, *, axis=-1, epsilon=1e-5): """Normalize to mean = 0, std = 1, then do a diagonal affine transform.""" with tf.variable_scope(scope): n_state = x.shape[-1]#.value g = tf.get_variable('g', [n_state], initializer=tf.constant_initializer(1)) b = tf.get_variable('b', [n_state], initializer=tf.constant_initializer(0)) u = tf.reduce_mean(x, axis=axis, keepdims=True) s = tf.reduce_mean(tf.square(x - u), axis=axis, keepdims=True) x = (x - u) * tf.rsqrt(s + epsilon) x = x * g + b return x
def learn(self): if self.memory_count > self.memory_size: sample_index = np.random.choice(self.memory_size, size=batch_size) else: sample_index = np.random.choice(self.memory_count, size=batch_size) train_data_sets = self.replay_buffer[sample_index, :] loss1 = tf.reduce_mean( tf.squared_difference(self.rs_p, train_data_sets[:, -1])) loss2 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=utils.onehot_mat( train_data_sets[:, ]), logits=self.state_hat))
def build(self, guidence, newNet): with tf.variable_scope("training_variable"): inputEmb = tf.nn.embedding_lookup(self.embedding, self.X) initFw = tf.nn.rnn_cell.LSTMStateTuple( tf.nn.relu( tf.matmul(guidence, self.weights["Fw1"]) + self.biases["Fw1"]), tf.nn.relu( tf.matmul(guidence, self.weights["Fw2"]) + self.biases["Fw2"])) initBw = tf.nn.rnn_cell.LSTMStateTuple( tf.nn.relu( tf.matmul(guidence, self.weights["Bw1"]) + self.biases["Bw1"]), tf.nn.relu( tf.matmul(guidence, self.weights["Bw2"]) + self.biases["Bw2"])) rnnCellFw = tf.compat.v1.nn.rnn_cell.DropoutWrapper( tf.nn.rnn_cell.BasicLSTMCell(self.nHidden), input_keep_prob=self.pKeep, output_keep_prob=1.0) rnnCellBw = tf.compat.v1.nn.rnn_cell.DropoutWrapper( tf.nn.rnn_cell.BasicLSTMCell(self.nHidden), input_keep_prob=self.pKeep, output_keep_prob=1.0) outputs, state = tf.nn.bidirectional_dynamic_rnn( cell_fw=rnnCellFw, cell_bw=rnnCellBw, inputs=inputEmb, initial_state_fw=initFw, initial_state_bw=initBw, dtype=tf.float32) outputsConcat = tf.concat(outputs, axis=2) self.outputs = outputsConcat self.RNNState = tf.reduce_mean(outputsConcat, axis=1)
def _build_net(self): # Building the structure of neural network. def build_layer(s, c_names, n_l1, n_l2, w_initializer, b_initializer): with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) l1 = tf.nn.relu(tf.matmul(s, w1) + b1) with tf.variable_scope('l2'): w2 = tf.get_variable('w2', [n_l1, n_l2], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, n_l2], initializer=b_initializer, collections=c_names) l2 = tf.nn.relu(tf.matmul(l1, w2) + b2) with tf.variable_scope('l3'): w3 = tf.get_variable('w3', [n_l2, self.n_actions], initializer=w_initializer, collections=c_names) b3 = tf.get_variable('b3', [1, self.n_actions], initializer=b_initializer, collections=c_names) l3 = tf.nn.relu(tf.matmul(l2, w3) + b3) return l3 # Building the evaluate net self.state = tf.placeholder(tf.float32, [None, self.n_features], name='state') self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='q_target') # expect output with tf.variable_scope('eval_net'): c_names, n_l1, n_l2, w_initializer, b_initializer = [ 'eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES ], 64, 64, tf.random_normal_initializer( 0.0, 0.3), tf.random_normal_initializer(0., 0.3) self.q_eval = build_layer(self.state, c_names, n_l1, n_l2, w_initializer, b_initializer) with tf.variable_scope('loss'): self.loss = tf.reduce_mean( tf.squared_difference(self.q_target, self.q_eval)) with tf.variable_scope('train'): self._train_op = tf.train.RMSPropOptimizer( self.learning_rate).minimize(self.loss) # Building the target net. self.state_ = tf.placeholder(tf.float32, [None, self.n_features], name='state_') with tf.variable_scope('target_net'): c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES] self.q_next = build_layer(self.state_, c_names, n_l1, n_l2, w_initializer, b_initializer)
def predicting(self, rate): hidden = tf.nn.relu( tf.matmul(self.concatInput, self.weights["MLP1"]) + self.biases["MLP1"]) logits = tf.matmul(hidden, self.weights["MLP2"]) + self.biases["MLP2"] predictPossibility = tf.nn.sigmoid(logits) accuracy = tf.reduce_mean( tf.cast( tf.equal(tf.cast(predictPossibility > 0.5, tf.float32), self.y), tf.float32)) loss = tf.reduce_mean( tf.nn.weighted_cross_entropy_with_logits(targets=self.y, logits=logits, pos_weight=rate)) tv = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, 'training_variable') l2_loss = self.l2_para * tf.reduce_sum([tf.nn.l2_loss(v) for v in tv]) loss += l2_loss return loss, accuracy, predictPossibility
def pca(x, dim=2): ''' x:输入矩阵 dim:降维之后的维度数 ''' with tf.name_scope("PCA"): m, n = tf.to_float(x.get_shape()[0]), tf.to_int32(x.get_shape()[1]) assert not tf.assert_less(dim, n) mean = tf.reduce_mean(x, axis=1) x_new = x - tf.reshape(mean, (-1, 1)) cov = tf.matmul(x_new, x_new, transpose_a=True) / (m - 1) e, v = tf.linalg.eigh(cov, name="eigh") e_index_sort = tf.math.top_k(e, sorted=True, k=dim)[1] v_new = tf.gather(v, indices=e_index_sort) pca = tf.matmul(x_new, v_new, transpose_b=True) return pca
def train(x_train, y_train): n_samples, n_features = x_train.shape w = tf.Variable(np.random.rand(input_dim, 1).astype(dtype='float32'), name="weight") b = tf.Variable(0.0, dtype=tf.float32, name="bias") x = tf.placeholder(dtype=tf.float32, name='x') y = tf.placeholder(dtype=tf.float32, name='y') predictions = tf.matmul(x, w) + b loss = tf.reduce_mean( tf.log(1 + tf.exp(tf.multiply(-1.0 * y, predictions)))) # optimizer = tf.train.GradientDescentOptimizer(learn_rate).minimize(loss) optimizer = tf.train.ProximalGradientDescentOptimizer( learning_rate=learn_rate, l1_regularization_strength=0.1).minimize(loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(n_epochs): for idx in range(0, n_samples, batch_size): iE = min(n_samples, idx + batch_size) x_batch = x_train[idx:iE, :] y_batch = y_train[idx:iE, :] sess.run([optimizer], feed_dict={x: x_batch, y: y_batch}) curr_w, curr_b = sess.run([w, b]) for idx in range(len(curr_w)): if curr_w[idx] < threshold * -1: curr_w[idx] += threshold else: curr_w[idx] -= threshold sess.run([tf.assign(w, curr_w)]) return curr_w, curr_b
def __init__(self, input_width=227, input_height=227, input_channels=3, num_classes=1000, learning_rate=0.01, momentum=0.9, keep_prob=0.5): # From article: The learning rate was initialized at 0.01. # From article: We trained our models using stochastic gradient descent with a batch size of 128 examples, # momentum of 0.9, and weight decay of 0.0005 # From article: We initialized the weights in each layer from a zero-mean Gaussian distribution with standard # deviation 0.01. self.input_width = input_width self.input_height = input_height self.input_channels = input_channels self.num_classes = num_classes self.learning_rate = learning_rate self.momentum = momentum self.keep_prob = keep_prob self.random_mean = 0 self.random_stddev = 0.01 # ---------------------------------------------------------------------------------------------------- # From article: We initialized the neuron biases in the second, fourth, and fifth convolutional layers, as well # as in the fully-connected hidden layers, with the constant 1. ... We initialized the neuron biases in the # remaining layers with the constant 0. # Input: 227x227x3. with tf.name_scope('input'): self.X = tf.placeholder(dtype=tf.float32, shape=[ None, self.input_height, self.input_width, self.input_channels ], name='X') # Labels: 1000. with tf.name_scope('labels'): self.Y = tf.placeholder(dtype=tf.float32, shape=[None, self.num_classes], name='Y') # Dropout keep prob. with tf.name_scope('dropout'): self.dropout_keep_prob = tf.placeholder(dtype=tf.float32, shape=(), name='dropout_keep_prob') # Layer 1. # [Input] ==> 227x227x3 # --> 227x227x3 ==> [Convolution: size=(11x11x3)x96, strides=4, padding=valid] ==> 55x55x96 # --> 55x55x96 ==> [ReLU] ==> 55x55x96 # --> 55x55x96 ==> [Local Response Normalization] ==> 55x55x96 # --> 55x55x96 ==> [Max-Pool: size=3x3, strides=2, padding=valid] ==> 27x27x96 # --> [Output] ==> 27x27x96 # Note: 48*2=96, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom. with tf.name_scope('layer1'): layer1_activations = self.__conv( input=self.X, filter_width=11, filter_height=11, filters_count=96, stride_x=4, stride_y=4, padding='VALID', init_biases_with_the_constant_1=False) layer1_lrn = self.__local_response_normalization( input=layer1_activations) layer1_pool = self.__max_pool(input=layer1_lrn, filter_width=3, filter_height=3, stride_x=2, stride_y=2, padding='VALID') # Layer 2. # [Input] ==> 27x27x96 # --> 27x27x96 ==> [Convolution: size=(5x5x96)x256, strides=1, padding=same] ==> 27x27x256 # --> 27x27x256 ==> [ReLU] ==> 27x27x256 # --> 27x27x256 ==> [Local Response Normalization] ==> 27x27x256 # --> 27x27x256 ==> [Max-Pool: size=3x3, strides=2, padding=valid] ==> 13x13x256 # --> [Output] ==> 13x13x256 # Note: 128*2=256, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom. with tf.name_scope('layer2'): layer2_activations = self.__conv( input=layer1_pool, filter_width=5, filter_height=5, filters_count=256, stride_x=1, stride_y=1, padding='SAME', init_biases_with_the_constant_1=True) layer2_lrn = self.__local_response_normalization( input=layer2_activations) layer2_pool = self.__max_pool(input=layer2_lrn, filter_width=3, filter_height=3, stride_x=2, stride_y=2, padding='VALID') # Layer 3. # [Input] ==> 13x13x256 # --> 13x13x256 ==> [Convolution: size=(3x3x256)x384, strides=1, padding=same] ==> 13x13x384 # --> 13x13x384 ==> [ReLU] ==> 13x13x384 # --> [Output] ==> 13x13x384 # Note: 192*2=384, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom. with tf.name_scope('layer3'): layer3_activations = self.__conv( input=layer2_pool, filter_width=3, filter_height=3, filters_count=384, stride_x=1, stride_y=1, padding='SAME', init_biases_with_the_constant_1=False) # Layer 4. # [Input] ==> 13x13x384 # --> 13x13x384 ==> [Convolution: size=(3x3x384)x384, strides=1, padding=same] ==> 13x13x384 # --> 13x13x384 ==> [ReLU] ==> 13x13x384 # --> [Output] ==> 13x13x384 # Note: 192*2=384, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom. with tf.name_scope('layer4'): layer4_activations = self.__conv( input=layer3_activations, filter_width=3, filter_height=3, filters_count=384, stride_x=1, stride_y=1, padding='SAME', init_biases_with_the_constant_1=True) # Layer 5. # [Input] ==> 13x13x384 # --> 13x13x384 ==> [Convolution: size=(3x3x384)x256, strides=1, padding=same] ==> 13x13x256 # --> 13x13x256 ==> [ReLU] ==> 13x13x256 # --> 13x13x256 ==> [Max-Pool: size=3x3, strides=2, padding=valid] ==> 6x6x256 # --> [Output] ==> 6x6x256 # Note: 128*2=256, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom. with tf.name_scope('layer5'): layer5_activations = self.__conv( input=layer4_activations, filter_width=3, filter_height=3, filters_count=256, stride_x=1, stride_y=1, padding='SAME', init_biases_with_the_constant_1=True) layer5_pool = self.__max_pool(input=layer5_activations, filter_width=3, filter_height=3, stride_x=2, stride_y=2, padding='VALID') # Layer 6. # [Input] ==> 6x6x256=9216 # --> 9216 ==> [Fully Connected: neurons=4096] ==> 4096 # --> 4096 ==> [ReLU] ==> 4096 # --> 4096 ==> [Dropout] ==> 4096 # --> [Output] ==> 4096 # Note: 2048*2=4096, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom. with tf.name_scope('layer6'): pool5_shape = layer5_pool.get_shape().as_list() flattened_input_size = pool5_shape[1] * pool5_shape[ 2] * pool5_shape[3] layer6_fc = self.__fully_connected( input=tf.reshape(layer5_pool, shape=[-1, flattened_input_size]), inputs_count=flattened_input_size, outputs_count=4096, relu=True, init_biases_with_the_constant_1=True) layer6_dropout = self.__dropout(input=layer6_fc) # Layer 7. # [Input] ==> 4096 # --> 4096 ==> [Fully Connected: neurons=4096] ==> 4096 # --> 4096 ==> [ReLU] ==> 4096 # --> 4096 ==> [Dropout] ==> 4096 # --> [Output] ==> 4096 # Note: 2048*2=4096, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom. with tf.name_scope('layer7'): layer7_fc = self.__fully_connected( input=layer6_dropout, inputs_count=4096, outputs_count=4096, relu=True, init_biases_with_the_constant_1=True) layer7_dropout = self.__dropout(input=layer7_fc) # Layer 8. # [Input] ==> 4096 # --> 4096 ==> [Logits: neurons=1000] ==> 1000 # --> [Output] ==> 1000 with tf.name_scope('layer8'): layer8_logits = self.__fully_connected( input=layer7_dropout, inputs_count=4096, outputs_count=self.num_classes, relu=False, name='logits') # Cross Entropy. with tf.name_scope('cross_entropy'): cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( logits=layer8_logits, labels=self.Y, name='cross_entropy') self.__variable_summaries(cross_entropy) # Training. with tf.name_scope('training'): loss_operation = tf.reduce_mean(cross_entropy, name='loss_operation') tf.summary.scalar(name='loss', tensor=loss_operation) optimizer = tf.train.MomentumOptimizer( learning_rate=self.learning_rate, momentum=self.momentum) # self.training_operation = optimizer.minimize(loss_operation, name='training_operation') grads_and_vars = optimizer.compute_gradients(loss_operation) self.training_operation = optimizer.apply_gradients( grads_and_vars, name='training_operation') for grad, var in grads_and_vars: if grad is not None: with tf.name_scope(var.op.name + '/gradients'): self.__variable_summaries(grad) # Accuracy. with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(layer8_logits, 1), tf.argmax(self.Y, 1), name='correct_prediction') self.accuracy_operation = tf.reduce_mean(tf.cast( correct_prediction, tf.float32), name='accuracy_operation') tf.summary.scalar(name='accuracy', tensor=self.accuracy_operation)
def build_model(): size = 8 # Single size for easier debugging (for now) max_s = [1, 2, 2, 1] # size of the sliding window for max pooling learning_rate = 0.0001 # frames = tf.placeholder(tf.float32, [None, 256, 256, 5]) # None is the number of samples, rename the variable name later frames = tf.placeholder( tf.float32, [None, 32, 32, 4], name="frames" ) # features: halite_available, others_ship, cargo, self_shipyard # can_afford = tf.placeholder(tf.float32, [None, 3]) turns_left = tf.placeholder(tf.float32, [None, 1], name="turnsleft") my_ships = tf.placeholder(tf.float32, [None, 32, 32, 1], name="myships") my_ships = tf.cast(my_ships, tf.float32) moves = tf.placeholder(tf.uint8, [None, 32, 32, 1], name="moves") spawn = tf.placeholder(tf.float32, [None, 1], name="spawn") tf.add_to_collection('frames', frames) # tf.add_to_collection('can_afford', can_afford) tf.add_to_collection('turns_left', turns_left) tf.add_to_collection('my_ships', my_ships) tf.add_to_collection('moves', moves) tf.add_to_collection('spawn', spawn) moves = tf.one_hot(moves, 6) # ca = tf.layers.dense(can_afford, size) tl = tf.layers.dense(turns_left, size) # ca = tf.expand_dims(ca, 1) # ca = tf.expand_dims(ca, 1) tl = tf.expand_dims(tl, 1) tl = tf.expand_dims(tl, 1) d_l1_a = tf.layers.conv2d( frames, size, 3, activation=tf.nn.relu, padding='same' ) # input is frames, filters is size, kernal size is 3(x3) d_l1_p = tf.nn.max_pool(d_l1_a, max_s, max_s, padding='VALID') # 16 d_l2_a = tf.layers.conv2d(d_l1_p, size, 3, activation=tf.nn.relu, padding='same') d_l2_p = tf.nn.max_pool(d_l2_a, max_s, max_s, padding='VALID') # 8 d_l3_a = tf.layers.conv2d(d_l2_p, size, 3, activation=tf.nn.relu, padding='same') d_l3_p = tf.nn.max_pool(d_l3_a, max_s, max_s, padding='VALID') # 4 d_l4_a = tf.layers.conv2d(d_l3_p, size, 3, activation=tf.nn.relu, padding='same') d_l4_p = tf.nn.max_pool(d_l4_a, max_s, max_s, padding='VALID') # 2 d_l5_a = tf.layers.conv2d(d_l4_p, size, 3, activation=tf.nn.relu, padding='same') d_l5_p = tf.nn.max_pool(d_l5_a, max_s, max_s, padding='VALID') # 1 final_state = tf.concat([d_l5_p, tl], -1) latent = tf.layers.dense(final_state, size, activation=tf.nn.relu) # latent = tf.layers.dense(d_l5_p, size, activation=tf.nn.relu) u_l5_a = tf.layers.conv2d_transpose(latent, size, 3, 2, activation=tf.nn.relu, padding='same') # 2 u_l5_c = tf.concat([u_l5_a, d_l5_a], -1) u_l5_s = tf.layers.conv2d(u_l5_c, size, 3, activation=tf.nn.relu, padding='same') u_l4_a = tf.layers.conv2d_transpose(u_l5_s, size, 3, 2, activation=tf.nn.relu, padding='same') # 4 u_l4_c = tf.concat([u_l4_a, d_l4_a], -1) u_l4_s = tf.layers.conv2d(u_l4_c, size, 3, activation=tf.nn.relu, padding='same') u_l3_a = tf.layers.conv2d_transpose(u_l4_s, size, 3, 2, activation=tf.nn.relu, padding='same') # 8 u_l3_c = tf.concat([u_l3_a, d_l3_a], -1) u_l3_s = tf.layers.conv2d(u_l3_c, size, 3, activation=tf.nn.relu, padding='same') u_l2_a = tf.layers.conv2d_transpose(u_l3_s, size, 3, 2, activation=tf.nn.relu, padding='same') # 16 u_l2_c = tf.concat([u_l2_a, d_l2_a], -1) u_l2_s = tf.layers.conv2d(u_l2_c, size, 3, activation=tf.nn.relu, padding='same') u_l1_a = tf.layers.conv2d_transpose(u_l2_s, size, 3, 2, activation=tf.nn.relu, padding='same') # 32 u_l1_c = tf.concat([u_l1_a, d_l1_a], -1) u_l1_s = tf.layers.conv2d(u_l1_c, size, 3, activation=tf.nn.relu, padding='same') spawn_logits = tf.layers.dense(latent, 1, activation=None) # spawn_logits = tf.squeeze(spawn_logits, [1, 2]) moves_logits = tf.layers.conv2d(u_l1_s, 6, 3, activation=None, padding='same') tf.add_to_collection('m_logits', moves_logits) tf.add_to_collection('s_logits', spawn_logits) losses = tf.nn.softmax_cross_entropy_with_logits_v2(labels=moves, logits=moves_logits, dim=-1) losses = tf.expand_dims(losses, -1) masked_loss = losses * my_ships ships_per_frame = tf.reduce_sum(my_ships, axis=[1, 2]) frame_loss = tf.reduce_sum(masked_loss, axis=[1, 2]) average_frame_loss = frame_loss / (ships_per_frame + 0.00000001 ) # First frames have no ship spawn_losses = tf.nn.sigmoid_cross_entropy_with_logits(labels=spawn, logits=spawn_logits) spawn_losses = tf.reduce_mean(spawn_losses) loss = tf.reduce_mean(average_frame_loss) + 0.01 * spawn_losses optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss) tf.add_to_collection('loss', loss) tf.add_to_collection('optimizer', optimizer) return
train_step = tf.train.GradientDescentOptimizer(0.01).minimize( cross_entropy) sess = tf.Session() # Train init = tf.initialize_all_variables() sess.run(init) for i in range(1000): batch_xs, batch_ys = mnist.train.next_batch(100) train_step.run({x: batch_xs, y_: batch_ys}, sess) # Test trained model correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print(accuracy.eval({x: mnist.test.images, y_: mnist.test.labels}, sess)) # Store variable _W = W.eval(sess) _b = b.eval(sess) sess.close() # Create new graph for exporting g_2 = tf.Graph() with g_2.as_default(): # Reconstruct graph x_2 = tf.placeholder("float", [None, 784], name="input") W_2 = tf.constant(_W, name="constant_W")
def main(): args = parser.parse_args() enc = encoder.get_encoder(CHECKPOINT_DIR, args.model_name) hparams = model.default_hparams() with open(os.path.join(CHECKPOINT_DIR, args.model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if args.sample_length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) if args.model_name == '345M': # args.memory_saving_gradients = True if args.optimizer == 'adam': args.only_train_transformer_layers = True config = tf.ConfigProto() config.gpu_options.allow_growth = True config.graph_options.rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.OFF with tf.Session(config=config) as sess: context = tf.placeholder(tf.int32, [args.batch_size, None]) context_in = randomize(context, hparams, args.noise) output = model.model(hparams=hparams, X=context_in) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=context[:, 1:], logits=output['logits'][:, :-1])) if args.val_every > 0: val_context = tf.placeholder(tf.int32, [args.val_batch_size, None]) val_output = model.model(hparams=hparams, X=val_context) val_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=val_context[:, 1:], logits=val_output['logits'][:, :-1])) val_loss_summary = tf.summary.scalar('val_loss', val_loss) tf_sample = sample.sample_sequence(hparams=hparams, length=args.sample_length, context=context, batch_size=args.batch_size, temperature=1.0, top_k=args.top_k, top_p=args.top_p) all_vars = [v for v in tf.trainable_variables() if 'model' in v.name] train_vars = [v for v in all_vars if '/h' in v.name ] if args.only_train_transformer_layers else all_vars if args.optimizer == 'adam': opt = tf.train.AdamOptimizer(learning_rate=args.learning_rate) elif args.optimizer == 'sgd': opt = tf.train.GradientDescentOptimizer( learning_rate=args.learning_rate) else: exit('Bad optimizer:', args.optimizer) if args.accumulate_gradients > 1: if args.memory_saving_gradients: exit( "Memory saving gradients are not implemented for gradient accumulation yet." ) opt = AccumulatingOptimizer(opt=opt, var_list=train_vars) opt_reset = opt.reset() opt_compute = opt.compute_gradients(loss) opt_apply = opt.apply_gradients() summary_loss = tf.summary.scalar('loss', opt_apply) else: if args.memory_saving_gradients: opt_grads = memory_saving_gradients.gradients(loss, train_vars) else: opt_grads = tf.gradients(loss, train_vars) opt_grads = list(zip(opt_grads, train_vars)) opt_apply = opt.apply_gradients(opt_grads) summary_loss = tf.summary.scalar('loss', loss) summary_lr = tf.summary.scalar('learning_rate', args.learning_rate) summaries = tf.summary.merge([summary_lr, summary_loss]) summary_log = tf.summary.FileWriter( os.path.join(CHECKPOINT_DIR, args.run_name)) saver = tf.train.Saver(var_list=all_vars, max_to_keep=5, keep_checkpoint_every_n_hours=2) sess.run(tf.global_variables_initializer()) if args.restore_from == 'latest': ckpt = tf.train.latest_checkpoint( os.path.join(CHECKPOINT_DIR, args.run_name)) if ckpt is None: # Get fresh GPT weights if new run. ckpt = tf.train.latest_checkpoint( os.path.join(CHECKPOINT_DIR, args.model_name)) elif args.restore_from == 'fresh': ckpt = tf.train.latest_checkpoint( os.path.join(CHECKPOINT_DIR, args.model_name)) else: ckpt = tf.train.latest_checkpoint(args.restore_from) print('Loading checkpoint', ckpt) saver.restore(sess, ckpt) print('Loading dataset...') chunks = load_dataset(enc, args.dataset, args.combine, encoding=args.encoding) data_sampler = Sampler(chunks) if args.val_every > 0: if args.val_dataset: val_chunks = load_dataset(enc, args.val_dataset, args.combine, encoding=args.encoding) else: val_chunks = chunks print('dataset has', data_sampler.total_size, 'tokens') print('Training...') if args.val_every > 0: # Sample from validation set once with fixed seed to make # it deterministic during training as well as across runs. val_data_sampler = Sampler(val_chunks, seed=1) val_batches = [[ val_data_sampler.sample(1024) for _ in range(args.val_batch_size) ] for _ in range(args.val_batch_count)] counter = 1 counter_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'counter') if os.path.exists(counter_path): # Load the step number if we're resuming a run # Add 1 so we don't immediately try to save again with open(counter_path, 'r') as fp: counter = int(fp.read()) + 1 def save(): maketree(os.path.join(CHECKPOINT_DIR, args.run_name)) print( 'Saving', os.path.join(CHECKPOINT_DIR, args.run_name, 'model-{}').format(counter)) saver.save(sess, os.path.join(CHECKPOINT_DIR, args.run_name, 'model'), global_step=counter) with open(counter_path, 'w') as fp: fp.write(str(counter) + '\n') def generate_samples(): print('Generating samples...') context_tokens = data_sampler.sample(1) all_text = [] index = 0 while index < args.sample_num: out = sess.run( tf_sample, feed_dict={context: args.batch_size * [context_tokens]}) for i in range(min(args.sample_num - index, args.batch_size)): text = enc.decode(out[i]) text = '======== SAMPLE {} ========\n{}\n'.format( index + 1, text) all_text.append(text) index += 1 print(text) maketree(os.path.join(SAMPLE_DIR, args.run_name)) with open(os.path.join(SAMPLE_DIR, args.run_name, 'samples-{}').format(counter), 'w', encoding=args.encoding) as fp: fp.write('\n'.join(all_text)) def validation(): print('Calculating validation loss...') losses = [] for batch in tqdm.tqdm(val_batches): losses.append( sess.run(val_loss, feed_dict={val_context: batch})) v_val_loss = np.mean(losses) v_summary = sess.run(val_loss_summary, feed_dict={val_loss: v_val_loss}) summary_log.add_summary(v_summary, counter) summary_log.flush() print('[{counter} | {time:2.2f}] validation loss = {loss:2.2f}'. format(counter=counter, time=time.time() - start_time, loss=v_val_loss)) def sample_batch(): return [data_sampler.sample(1024) for _ in range(args.batch_size)] avg_loss = (0.0, 0.0) start_time = time.time() try: while counter < 1000: if counter % args.save_every == 0: save() if counter % args.sample_every == 0: generate_samples() if args.val_every > 0 and (counter % args.val_every == 0 or counter == 1): validation() if args.accumulate_gradients > 1: sess.run(opt_reset) for _ in range(args.accumulate_gradients): sess.run(opt_compute, feed_dict={context: sample_batch()}) (v_loss, v_summary) = sess.run((opt_apply, summaries)) else: (_, v_loss, v_summary) = sess.run( (opt_apply, loss, summaries), feed_dict={context: sample_batch()}) summary_log.add_summary(v_summary, counter) avg_loss = (avg_loss[0] * 0.99 + v_loss, avg_loss[1] * 0.99 + 1.0) print( '[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}' .format(counter=counter, time=time.time() - start_time, loss=v_loss, avg=avg_loss[0] / avg_loss[1])) counter += 1 except KeyboardInterrupt: print('interrupted') save()
def gain(data_x, gain_parameters): '''Impute missing values in data_x Args: - data_x: original data with missing values - gain_parameters: GAIN network parameters: - batch_size: Batch size - hint_rate: Hint rate - alpha: Hyperparameter - iterations: Iterations Returns: - imputed_data: imputed data ''' # Define mask matrix data_m = 1 - np.isnan(data_x) # System parameters batch_size = gain_parameters['batch_size'] hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] iterations = gain_parameters['iterations'] # Other parameters no, dim = data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data, norm_parameters = normalization(data_x) norm_data_x = np.nan_to_num(norm_data, 0) ## GAIN architecture # Input placeholders # Data vector tf.disable_v2_behavior() X = tf.placeholder(tf.float32, shape=[None, dim]) # Mask vector M = tf.placeholder(tf.float32, shape=[None, dim]) # Hint vector H = tf.placeholder(tf.float32, shape=[None, dim]) # Discriminator variables D_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) # Data + Hint as inputs D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, h_dim])) D_b2 = tf.Variable(tf.zeros(shape=[h_dim])) D_W3 = tf.Variable(xavier_init([h_dim, dim])) D_b3 = tf.Variable(tf.zeros(shape=[dim])) # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] #Generator variables # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, h_dim])) G_b2 = tf.Variable(tf.zeros(shape=[h_dim])) G_W3 = tf.Variable(xavier_init([h_dim, dim])) G_b3 = tf.Variable(tf.zeros(shape=[dim])) theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3] ## GAIN functions # Generator def generator(x, m): # Concatenate Mask and Data inputs = tf.concat(values=[x, m], axis=1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values=[x, h], axis=1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob ## GAIN structure # Generator G_sample = generator(X, M) # Combine with observed data Hat_X = X * M + G_sample * (1 - M) # Discriminator D_prob = discriminator(Hat_X, H) ## GAIN loss D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) \ + (1-M) * tf.log(1. - D_prob + 1e-8)) G_loss_temp = -tf.reduce_mean((1 - M) * tf.log(D_prob + 1e-8)) MSE_loss = \ tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M) D_loss = D_loss_temp G_loss = G_loss_temp + alpha * MSE_loss ## GAIN solver D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) ## Iterations sess = tf.Session() sess.run(tf.global_variables_initializer()) # Start Iterations for it in tqdm(range(iterations)): # Sample batch batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = data_m[batch_idx, :] # Sample random vectors Z_mb = uniform_sampler(0, 0.01, batch_size, dim) # Sample hint vectors H_mb_temp = binary_sampler(hint_rate, batch_size, dim) H_mb = M_mb * H_mb_temp # Combine random vectors with observed vectors X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb _, D_loss_curr = sess.run([D_solver, D_loss_temp], feed_dict={ M: M_mb, X: X_mb, H: H_mb }) _, G_loss_curr, MSE_loss_curr = \ sess.run([G_solver, G_loss_temp, MSE_loss], feed_dict = {X: X_mb, M: M_mb, H: H_mb}) ## Return imputed data Z_mb = uniform_sampler(0, 0.01, no, dim) M_mb = data_m X_mb = norm_data_x X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0] imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data # Renormalization imputed_data = renormalization(imputed_data, norm_parameters) # Rounding imputed_data = rounding(imputed_data, data_x) return imputed_data
def main(trainModel=True, buildConfusionMatrix=True, restore=False, buildClassifiedMatrix=True): tf.disable_v2_behavior() input_images = tf.placeholder(tf.float32, [None, 28, 28], name="Input") real = tf.placeholder(tf.float32, [None, CLASSES], name="real_classes") layer1 = create_conv_layer(tf.reshape(input_images, [-1, 28, 28, 1]), 1, 28, [5, 5], [2, 2], name="conv_no_pool") layer2 = create_conv_layer(layer1, 28, 56, [5, 5], [2, 2], name='conv_with_pool') conv_result = tf.reshape(layer2, [-1, 7 * 7 * 56]) relu_layer_weight = tf.Variable(tf.truncated_normal([7 * 7 * 56, 1000], stddev=STDDEV * 2), name='relu_layer_weight') rely_layer_bias = tf.Variable(tf.truncated_normal([1000], stddev=STDDEV / 2), name='rely_layer_bias') relu_layer = tf.matmul(conv_result, relu_layer_weight) + rely_layer_bias relu_layer = tf.nn.relu(relu_layer) relu_layer = tf.nn.dropout(relu_layer, DROPOUT) final_layer_weight = tf.Variable(tf.truncated_normal([1000, CLASSES], stddev=STDDEV * 2), name='final_layer_weight') final_layer_bias = tf.Variable(tf.truncated_normal([CLASSES], stddev=STDDEV / 2), name='final_layer_bias') final_layer = tf.matmul(relu_layer, final_layer_weight) + final_layer_bias predicts = tf.nn.softmax(final_layer) predicts_for_log = tf.clip_by_value(predicts, 1e-9, 0.999999999) #crossEntropy = -tf.reduce_mean(tf.reduce_sum(y * tf.log(y_clipped) + (1 - y) * tf.log(1 - y_clipped), axis=1)) loss = -tf.reduce_mean( tf.reduce_sum(real * tf.log(predicts_for_log) + (1 - real) * tf.log(1 - predicts_for_log), axis=1), axis=0) #test = tf.reduce_sum(real * tf.log(predicts_for_log) + (1 - real) * tf.log(1 - predicts_for_log), axis=1) #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=final_layer, labels=real)) optimiser = tf.train.GradientDescentOptimizer( learning_rate=LEARNING_RATE).minimize(loss) correct_prediction = tf.equal(tf.argmax(real, axis=1), tf.argmax(predicts, axis=1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) confusion_matrix = tf.confusion_matrix(labels=tf.argmax(real, axis=1), predictions=tf.argmax(predicts, axis=1), num_classes=CLASSES) saver = tf.train.Saver() # dataset = get_mnist_dataset() dataset = get_fashion_dataset() with tf.Session() as session: session.run(tf.global_variables_initializer()) if restore: saver.restore(session, SAVE_PATH) if trainModel: train(input_images, real, session, optimiser, loss, accuracy, saver, dataset) if buildConfusionMatrix: test_cm = session.run(confusion_matrix, feed_dict={ input_images: dataset.test_x, real: dataset.test_y }) draw_confusion_matrix(test_cm) if buildClassifiedMatrix: all_probs = session.run(predicts, feed_dict={ input_images: dataset.test_x, real: dataset.test_y }) max_failure_picture_index = [[(-1, -1.0)] * CLASSES for _ in range(CLASSES)] for i in range(len(all_probs)): real = np.argmax(dataset.test_y[i]) for j in range(CLASSES): if max_failure_picture_index[real][j][1] < all_probs[i][j]: max_failure_picture_index[real][j] = (i, all_probs[i][j]) draw_max_failure_pictures(dataset.test_x, max_failure_picture_index)
def __init__(self, nHidden, seqLen): self.representation_score = {} self.y = tf.placeholder(tf.float32, shape=[None, 1]) self.extractFeature = ExtractFeature.ExtractFeature() self.imageFeature = ImageFeature.ImageFeature() newNet = tf.reduce_mean(self.imageFeature.outputLS, axis=0) self.textFeature = TextFeature.TextFeature( nHidden, seqLen, self.extractFeature.finalState, newNet) self.l2_para = 1e-7 with tf.variable_scope("training_variable"): self.weights = { "MLP1": tf.Variable( tf.truncated_normal(shape=[512, 256], stddev=0.08, name="MLP1_W")), "MLP2": tf.Variable( tf.truncated_normal(shape=[256, 1], stddev=0.08, name="MLP2_W")), "ATT_attr1_1": tf.Variable( tf.truncated_normal(shape=[ self.imageFeature.defaultFeatureSize + self.extractFeature.embSize, int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2) ], stddev=0.08, name="ATT_attr1_1")), "ATT_attr1_2": tf.Variable( tf.truncated_normal(shape=[ self.textFeature.nHidden * 2 + self.extractFeature.embSize, int(self.textFeature.nHidden + self.extractFeature.embSize / 2) ], stddev=0.08, name="ATT_attr1_2")), "ATT_attr1_3": tf.Variable( tf.truncated_normal(shape=[ 2 * self.extractFeature.embSize, self.extractFeature.embSize ], stddev=0.08, name="ATT_attr1_3")), "ATT_attr2_1": tf.Variable( tf.truncated_normal(shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2), 1 ], stddev=0.08, name="ATT_attr2_1")), "ATT_attr2_2": tf.Variable( tf.truncated_normal(shape=[ int(self.textFeature.nHidden + self.extractFeature.embSize / 2), 1 ], stddev=0.08, name="ATT_attr2_2")), "ATT_attr2_3": tf.Variable( tf.truncated_normal(shape=[self.extractFeature.embSize, 1], stddev=0.08, name="ATT_attr2_3")), "ATT_img1_1": tf.Variable( tf.truncated_normal(shape=[ self.imageFeature.defaultFeatureSize + self.textFeature.nHidden * 2, int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden) ], stddev=0.08, name="ATT_image1_1")), "ATT_img1_2": tf.Variable( tf.truncated_normal(shape=[ self.imageFeature.defaultFeatureSize + self.extractFeature.embSize, int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2) ], stddev=0.08, name="ATT_image1_2")), "ATT_img1_3": tf.Variable( tf.truncated_normal(shape=[ self.imageFeature.defaultFeatureSize * 2, self.imageFeature.defaultFeatureSize ], stddev=0.08, name="ATT_image1_3")), "ATT_img2_1": tf.Variable( tf.truncated_normal(shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden), 1 ], stddev=0.08, name="ATT_image2_1")), "ATT_img2_2": tf.Variable( tf.truncated_normal(shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2), 1 ], stddev=0.08, name="ATT_image2_2")), "ATT_img2_3": tf.Variable( tf.truncated_normal( shape=[self.imageFeature.defaultFeatureSize, 1], stddev=0.08, name="ATT_image2_3")), "ATT_text1_1": tf.Variable( tf.truncated_normal(shape=[ self.imageFeature.defaultFeatureSize + self.textFeature.nHidden * 2, int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden) ], stddev=0.08, name="ATT_text1_1")), "ATT_text1_2": tf.Variable( tf.truncated_normal(shape=[ self.textFeature.nHidden * 2 + self.extractFeature.embSize, int(self.textFeature.nHidden + self.extractFeature.embSize / 2) ], stddev=0.08, name="ATT_text1_2")), "ATT_text1_3": tf.Variable( tf.truncated_normal(shape=[ self.textFeature.nHidden * 4, self.textFeature.nHidden * 2 ], stddev=0.08, name="ATT_text1_3")), "ATT_text2_1": tf.Variable( tf.truncated_normal(shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden), 1 ], stddev=0.08, name="ATT_text2_1")), "ATT_text2_2": tf.Variable( tf.truncated_normal(shape=[ int(self.textFeature.nHidden + self.extractFeature.embSize / 2), 1 ], stddev=0.08, name="ATT_text2_2")), "ATT_text2_3": tf.Variable( tf.truncated_normal( shape=[self.textFeature.nHidden * 2, 1], stddev=0.08, name="ATT_text2_3")), "ATT_WI1": tf.Variable( tf.truncated_normal( shape=[self.imageFeature.defaultFeatureSize, 512], stddev=0.08, name="ATT_WI")), "ATT_WT1": tf.Variable( tf.truncated_normal(shape=[2 * nHidden, 512], stddev=0.08, name="ATT_WT")), "ATT_WA1": tf.Variable( tf.truncated_normal(shape=[200, 512], stddev=0.08, name="ATT_WA")), "ATT_WI2": tf.Variable( tf.truncated_normal( shape=[self.imageFeature.defaultFeatureSize, 512], stddev=0.08, name="ATT_WI2")), "ATT_WT2": tf.Variable( tf.truncated_normal(shape=[2 * nHidden, 512], stddev=0.08, name="ATT_WT2")), "ATT_WA2": tf.Variable( tf.truncated_normal(shape=[200, 512], stddev=0.08, name="ATT_WA2")), "ATT_WF_1": tf.Variable( tf.truncated_normal(shape=[512, 1], stddev=0.08, name="ATT_WF_1")), "ATT_WF_2": tf.Variable( tf.truncated_normal(shape=[512, 1], stddev=0.08, name="ATT_WF_2")), "ATT_WF_3": tf.Variable( tf.truncated_normal(shape=[512, 1], stddev=0.08, name="ATT_WF_3")), } self.biases = { "MLP1": tf.Variable( tf.constant(0.01, shape=[256], dtype=tf.float32, name="MLP1_b")), "MLP2": tf.Variable( tf.constant(0.01, shape=[1], dtype=tf.float32, name="MLP2_b")), "ATT_attr1_1": tf.Variable( tf.constant( 0.01, shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2) ], name="ATT_attr1_1")), "ATT_attr1_2": tf.Variable( tf.constant(0.01, shape=[ int(self.textFeature.nHidden + self.extractFeature.embSize / 2) ], name="ATT_attr1_2")), "ATT_attr1_3": tf.Variable( tf.constant(0.01, shape=[self.extractFeature.embSize], name="ATT_attr1_3")), "ATT_attr2_1": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_1")), "ATT_attr2_2": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_2")), "ATT_attr2_3": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_3")), "ATT_img1_1": tf.Variable( tf.constant( 0.01, shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden) ], name="ATT_image1_1")), "ATT_img1_2": tf.Variable( tf.constant( 0.01, shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.extractFeature.embSize / 2) ], name="ATT_image1_2")), "ATT_img1_3": tf.Variable( tf.constant(0.01, shape=[self.imageFeature.defaultFeatureSize], name="ATT_image1_3")), "ATT_img2_1": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_1")), "ATT_img2_2": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_2")), "ATT_img2_3": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_3")), "ATT_text1_1": tf.Variable( tf.constant( 0.01, shape=[ int(self.imageFeature.defaultFeatureSize / 2 + self.textFeature.nHidden) ], name="ATT_text1_1")), "ATT_text1_2": tf.Variable( tf.constant(0.01, shape=[ int(self.textFeature.nHidden + self.extractFeature.embSize / 2) ], name="ATT_text1_2")), "ATT_text1_3": tf.Variable( tf.constant(0.01, shape=[self.textFeature.nHidden * 2], name="ATT_text1_3")), "ATT_text2_1": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_1")), "ATT_text2_2": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_2")), "ATT_text2_3": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_3")), "ATT_WW": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WW")), "ATT_WI": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WI")), "ATT_WT": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WT")), "ATT_WI1": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WI1")), "ATT_WT1": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WT1")), "ATT_WA": tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WA")), "ATT_WF_1": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_1")), "ATT_WF_2": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_2")), "ATT_WF_3": tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_3")), } print("newnet dimension :", newNet) imageVec = self.Attention(newNet, self.imageFeature.outputLS, self.textFeature.RNNState, self.extractFeature.finalState, "ATT_img1", "ATT_img2", 196, True) textVec = self.Attention(self.textFeature.RNNState, self.textFeature.outputs, newNet, self.extractFeature.finalState, "ATT_text1", "ATT_text2", self.textFeature.seqLen, False) attrVec = self.Attention(self.extractFeature.finalState, self.extractFeature.inputEmb, newNet, self.textFeature.RNNState, "ATT_attr1", "ATT_attr2", 5, False) attHidden = tf.tanh( tf.matmul(imageVec, self.weights["ATT_WI1"]) + self.biases["ATT_WI1"]) attHidden2 = tf.tanh( tf.matmul(textVec, self.weights["ATT_WT1"]) + self.biases["ATT_WT1"]) attHidden3 = tf.tanh( tf.matmul(attrVec, self.weights["ATT_WA1"]) + self.biases["ATT_WW"]) scores1 = tf.matmul(attHidden, self.weights["ATT_WF_1"]) + self.biases["ATT_WF_1"] scores2 = tf.matmul(attHidden2, self.weights["ATT_WF_2"]) + self.biases["ATT_WF_2"] scores3 = tf.matmul(attHidden3, self.weights["ATT_WF_3"]) + self.biases["ATT_WF_3"] scoreLS = [scores1, scores2, scores3] scoreLS = tf.nn.softmax(scoreLS, dim=0) imageVec = tf.tanh( tf.matmul(imageVec, self.weights["ATT_WI2"]) + self.biases["ATT_WI"]) textVec = tf.tanh( tf.matmul(textVec, self.weights["ATT_WT2"]) + self.biases["ATT_WT"]) attrVec = tf.tanh( tf.matmul(attrVec, self.weights["ATT_WA2"]) + self.biases["ATT_WA"]) self.concatInput = scoreLS[0] * imageVec + scoreLS[ 1] * textVec + scoreLS[2] * attrVec