def __init__(self, sess, t_test, t_learn_start, model_dir, variables, max_to_keep=20): self.sess = sess self.t_test = t_test self.t_learn_start = t_learn_start self.reset() self.max_avg_ep_reward = 0 with tf.variable_scope('t'): self.t_op = tf.Variable(0, trainable=False, name='t') self.t_add_op = self.t_op.assign_add(1) self.model_dir = model_dir self.saver = tf.train.Saver(variables + [self.t_op], max_to_keep=max_to_keep) self.writer = tf.train.SummaryWriter('./logs/%s' % self.model_dir, self.sess.graph) with tf.variable_scope('summary'): scalar_summary_tags = [ 'average/reward', 'average/loss', 'average/q', 'episode/max reward', 'episode/min reward', 'episode/avg reward', ] self.summary_placeholders = {} self.summary_ops = {} for tag in scalar_summary_tags: self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag.replace(' ', '_')) self.summary_ops[tag] = tf.scalar_summary(tag, self.summary_placeholders[tag]) histogram_summary_tags = ['episode/rewards'] for tag in histogram_summary_tags: self.summary_placeholders[tag] = tf.placeholder('float32', None, name=tag.replace(' ', '_')) self.summary_ops[tag] = tf.histogram_summary(tag, self.summary_placeholders[tag])
def _build_net(self): def build_layers(s, c_names, n_l1, w_initializer, b_initializer): with tf.variable_scope('l1'): w1 = tf.get_variable('w1', [self.n_features, n_l1], initializer=w_initializer, collections=c_names) b1 = tf.get_variable('b1', [1, n_l1], initializer=b_initializer, collections=c_names) l1 = tf.nn.relu(tf.matmul(s, w1) + b1) with tf.variable_scope('l2'): w2 = tf.get_variable('w2', [n_l1, self.n_actions], initializer=w_initializer, collections=c_names) b2 = tf.get_variable('b2', [1, self.n_actions], initializer=b_initializer, collections=c_names) out = tf.matmul(l1, w2) + b2 return out # ------------------ build evaluate_net ------------------ self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') # input self.q_target = tf.placeholder(tf.float32, [None, self.n_actions], name='Q_target') # for calculating loss with tf.variable_scope('eval_net'): c_names, n_l1, w_initializer, b_initializer = \ ['eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES], 20, \ tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) # config of layers self.q_eval = build_layers(self.s, c_names, n_l1, w_initializer, b_initializer) with tf.variable_scope('loss'): self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval)) with tf.variable_scope('train'): self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss) # ------------------ build target_net ------------------ self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') # input with tf.variable_scope('target_net'): c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES] self.q_next = build_layers(self.s_, c_names, n_l1, w_initializer, b_initializer)
def build_greedy_training(self, state, network_states): """Extracts features and advances a batch using the oracle path. Args: state: MasterState from the 'AdvanceMaster' op that advances the underlying master to this component. network_states: dictionary of component NetworkState objects Returns: state handle: final state after advancing cost: regularization cost, possibly associated with embedding matrices correct: since no gold path is available, 0. total: since no gold path is available, 0. """ logging.info('Building component: %s', self.spec.name) stride = state.current_batch_size * self.training_beam_size with tf.variable_scope(self.name, reuse=True): state.handle, fixed_embeddings = fetch_differentiable_fixed_embeddings( self, state, stride) linked_embeddings = [ fetch_linked_embedding(self, network_states, spec) for spec in self.spec.linked_feature ] with tf.variable_scope(self.name, reuse=True): tensors = self.network.create( fixed_embeddings, linked_embeddings, None, None, True, stride=stride) update_network_states(self, tensors, network_states, stride) cost = self.add_regularizer(tf.constant(0.)) correct, total = tf.constant(0), tf.constant(0) return state.handle, cost, correct, total
def inference_small_config(x, c): c["bottleneck"] = False c["ksize"] = 3 c["stride"] = 1 with tf.variable_scope("scale1"): c["conv_filters_out"] = 16 c["block_filters_internal"] = 16 c["stack_stride"] = 1 x = conv(x, c) x = bn(x, c) x = activation(x) x = stack(x, c) with tf.variable_scope("scale2"): c["block_filters_internal"] = 32 c["stack_stride"] = 2 x = stack(x, c) with tf.variable_scope("scale3"): c["block_filters_internal"] = 64 c["stack_stride"] = 2 x = stack(x, c) # post-net x = tf.reduce_mean(x, reduction_indices=[1, 2], name="avg_pool") if c["num_classes"] != None: with tf.variable_scope("fc"): x = fc(x, c) return x
def _extract_feature_ids(self, state, network_states, during_training): """Extracts feature IDs and advances a batch using the oracle path. Args: state: MasterState from the 'AdvanceMaster' op that advances the underlying master to this component. network_states: Dictionary of component NetworkState objects. during_training: Whether the graph is being constructed during training. Returns: state handle: Final state after advancing. """ logging.info('Building component: %s', self.spec.name) if during_training: stride = state.current_batch_size * self.training_beam_size else: stride = state.current_batch_size * self.inference_beam_size with tf.variable_scope(self.name, reuse=True): state.handle, ids = extract_fixed_feature_ids(self, state, stride) with tf.variable_scope(self.name, reuse=True): tensors = self.network.create( ids, [], None, None, during_training, stride=stride) update_network_states(self, tensors, network_states, stride) return state.handle
def __call__(self, inputs, state, scope=None): """Gated recurrent unit (GRU) with nunits cells.""" with tf.variable_scope(scope or type(self).__name__): # "GRUCell" with tf.variable_scope("Gates"): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. r, u = array_ops.split(1, 2, _linear([inputs, state], 2 * self._num_units, True, 1.0, self.weights_init, self.trainable, self.restore, self.reuse)) r, u = self._inner_activation(r), self._inner_activation(u) with tf.variable_scope("Candidate"): c = self._activation( _linear([inputs, r * state], self._num_units, True, 0., self.weights_init, self.trainable, self.restore, self.reuse)) new_h = u * state + (1 - u) * c self.W, self.b = list(), list() # Retrieve RNN Variables with tf.variable_scope('Gates/Linear', reuse=True): self.W.append(tf.get_variable('Matrix')) self.b.append(tf.get_variable('Bias')) with tf.variable_scope('Candidate/Linear', reuse=True): self.W.append(tf.get_variable('Matrix')) self.b.append(tf.get_variable('Bias')) return new_h, new_h
def build_graph(self, input, output): input, output = input / 128.0 - 1, output / 128.0 - 1 with argscope([Conv2D, Conv2DTranspose], kernel_initializer=tf.truncated_normal_initializer(stddev=0.02)): with tf.variable_scope('gen'): fake_output = self.generator(input) with tf.variable_scope('discrim'): real_pred = self.discriminator(input, output) fake_pred = self.discriminator(input, fake_output) self.build_losses(real_pred, fake_pred) errL1 = tf.reduce_mean(tf.abs(fake_output - output), name='L1_loss') self.g_loss = tf.add(self.g_loss, LAMBDA * errL1, name='total_g_loss') add_moving_summary(errL1, self.g_loss) # tensorboard visualization if IN_CH == 1: input = tf.image.grayscale_to_rgb(input) if OUT_CH == 1: output = tf.image.grayscale_to_rgb(output) fake_output = tf.image.grayscale_to_rgb(fake_output) visualize_tensors('input,output,fake', [input, output, fake_output], max_outputs=max(30, BATCH)) self.collect_variables()
def inference(inputs, name): ''' アーキテクチャの定義、グラフのビルド ''' # layer1 layer1_name = 'fc1_' + name with tf.variable_scope(layer1_name) as scope: weights = _variable_with_weight_decay( 'weights', shape=[9, 12], stddev=0.04, wd=0.004 ) biases = _variable_on_cpu('biases', [12], tf.constant_initializer(0.1)) #bn1 = batch_normalization(4, tf.matmul(inputs, weights)) #local1 = tf.nn.relu(bn1) #inner_product = tf.matmul(inputs, weights) local1 = tf.nn.relu(tf.add(tf.matmul(inputs, weights), biases)) #local1 = tf.nn.relu_layer(inputs, weights, biases, name=scope.name) #_activation_summary(local1) # softmax layer2_name = 'fc2_' + name with tf.variable_scope(layer2_name) as scope: weights = _variable_with_weight_decay( 'weights', [12, NUM_CLASSES], stddev=0.04, wd=0.0 ) biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0)) linear = tf.nn.xw_plus_b(local1, weights, biases, name=scope.name) #_activation_summary(linear) return linear
def __call__(self, inputs, state, scope=None): """Long short-term memory cell (LSTM).""" with tf.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" # Parameters of gates are concatenated into one multiply for efficiency. if self._state_is_tuple: c, h = state else: c, h = array_ops.split(1, 2, state) concat = _linear([inputs, h], 4 * self._num_units, True, 0., self.weights_init, self.trainable, self.restore, self.reuse) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = array_ops.split(1, 4, concat) new_c = (c * self._inner_activation(f + self._forget_bias) + self._inner_activation(i) * self._activation(j)) new_h = self._activation(new_c) * self._inner_activation(o) if self._state_is_tuple: new_state = _rnn_cell.LSTMStateTuple(new_c, new_h) else: new_state = array_ops.concat(1, [new_c, new_h]) # Retrieve RNN Variables with tf.variable_scope('Linear', reuse=True): self.W = tf.get_variable('Matrix') self.b = tf.get_variable('Bias') return new_h, new_state
def loss(self, logits, labels): """Adds loss ops to the computational graph. Hint: Use sparse_softmax_cross_entropy_with_logits Hint: Remember to add l2_loss (see tf.nn.l2_loss) Args: logits: tensor(num_nodes, output_size) labels: python list, len = num_nodes Returns: loss: tensor 0-D """ loss = None # YOUR CODE HERE labels = tf.convert_to_tensor(labels, dtype=tf.int64) softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels) l2 = self.config.l2 with tf.variable_scope('Composition', reuse=True): W1 = tf.get_variable("W1") with tf.variable_scope('Projection', reuse=True): U = tf.get_variable("U") l2_loss = tf.nn.l2_loss(W1) + tf.nn.l2_loss(U) l2_loss *= l2 loss = tf.reduce_sum(softmax_loss) + l2_loss # END YOUR CODE return loss
def add_model_vars(self): ''' You model contains the following parameters: embedding: tensor(vocab_size, embed_size) W1: tensor(2* embed_size, embed_size) b1: tensor(1, embed_size) U: tensor(embed_size, output_size) bs: tensor(1, output_size) Hint: Add the tensorflow variables to the graph here and *reuse* them while building the compution graphs for composition and projection for each tree Hint: Use a variable_scope "Composition" for the composition layer, and "Projection") for the linear transformations preceding the softmax. ''' embed_size = self.config.embed_size vocab_size = len(self.vocab) output_size = self.config.label_size with tf.variable_scope('Composition'): ### YOUR CODE HERE embedding = tf.get_variable("embedding", shape=(vocab_size, embed_size)) W1 = tf.get_variable("W1", shape=(2 * embed_size, embed_size)) b1 = tf.get_variable("b1", shape=(1, embed_size)) ### END YOUR CODE with tf.variable_scope('Projection'): ### YOUR CODE HERE U = tf.get_variable("U", shape=(embed_size, output_size)) bs = tf.get_variable("bs", shape=(1, output_size)) ### END YOUR CODE self.optimizer = tf.train.AdamOptimizer(learning_rate=self.config.lr) # dummy_total is a simple sum to ensure that the variables for the AdamOptimizer # are created for initialization and before restore the variables later. # It should never actually get executed. dummy_total = tf.constant(0.0) for v in tf.trainable_variables(): dummy_total +=tf.reduce_sum(v) self.dummy_minimizer = self.optimizer.minimize(dummy_total)
def _conv_layers(self,x): conv_layers = Layers(x) # Convolutional layers res_blocks = [1,3,4,23,3] output_channels = [64,256,512,1024,2048] with tf.variable_scope('scale0'): conv_layers.conv2d(filter_size=7,output_channels=output_channels[0],stride=2,padding='SAME',b_value=None) conv_layers.maxpool(k=3) with tf.variable_scope('scale1'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[1], stride=2) for block in range(res_blocks[1]-1): conv_layers.conv_layers.res_layer(filter_size=3, output_channels=output_channels[1], stride=1) with tf.variable_scope('scale2'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[2], stride=2) for block in range(res_blocks[2]-1): conv_layers.conv_layers.res_layer(filter_size=3, output_channels=output_channels[2], stride=1) with tf.variable_scope('scale3'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[3], stride=2) for block in range(res_blocks[3]-1): conv_layers.conv_layers.res_layer(filter_size=3, output_channels=output_channels[3], stride=1) with tf.variable_scope('scale4'): conv_layers.res_layer(filter_size=3, output_channels=output_channels[4], stride=2) for block in range(res_blocks[4]-1): conv_layers.conv_layers.res_layer(filter_size=3, output_channels=output_channels[4], stride=1) conv_layers.avgpool(globe=True) # Fully Connected Layer conv_layers.fc(output_nodes=10) return conv_layers.get_output()
def testBlockGRUToGRUCellSingleStep(self): with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()) as sess: batch_size = 4 cell_size = 5 input_size = 6 seed = 1994 initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=seed) # Inputs x = tf.zeros([batch_size, input_size]) h = tf.zeros([batch_size, cell_size]) # Values for the inputs. x_value = np.random.rand(batch_size, input_size) h_value = np.random.rand(batch_size, cell_size) # Output from the basic GRU cell implementation. with tf.variable_scope("basic", initializer=initializer): output = tf.nn.rnn_cell.GRUCell(cell_size)(x, h) sess.run([tf.initialize_all_variables()]) basic_res = sess.run([output], {x: x_value, h: h_value}) # Output from the block GRU cell implementation. with tf.variable_scope("block", initializer=initializer): output = gru_ops.GRUBlockCell(cell_size)(x, h) sess.run([tf.initialize_all_variables()]) block_res = sess.run([output], {x: x_value, h: h_value}) self.assertEqual(len(block_res), len(basic_res)) for block, basic in zip(block_res, basic_res): self.assertAllClose(block, basic)
def forward_propagation(images): with tf.variable_scope('conv1') as scope: W_conv1 = weight_variable([5, 5, 3, 32]) b_conv1 = bias_variable([32]) image_matrix = tf.reshape(images, [-1, 1750, 1750, 3]) h_conv1 = tf.nn.sigmoid(conv2d(image_matrix, W_conv1) + b_conv1) _activation_summary(h_conv1) h_pool1 = max_pool_5x5(h_conv1) with tf.variable_scope('conv2') as scope: W_conv2 = weight_variable([5, 5, 32, 64]) b_conv2 = bias_variable([64]) h_conv2 = tf.nn.sigmoid(conv2d(h_pool1, W_conv2) + b_conv2) _activation_summary(h_conv2) h_pool2 = max_pool_5x5(h_conv2) with tf.variable_scope('conv3') as scope: W_conv3 = weight_variable([5, 5, 64, 128]) b_conv3 = bias_variable([128]) h_conv3 = tf.nn.sigmoid(conv2d(h_pool2, W_conv3) + b_conv3) _activation_summary(h_conv3) h_pool3 = max_pool_5x5(h_conv3) with tf.variable_scope('local3') as scope: W_fc1 = weight_variable([14 * 14 * 128, 256]) b_fc1 = bias_variable([256]) h_pool3_flat = tf.reshape(h_pool3, [-1, 14 * 14 * 128]) h_fc1 = tf.nn.sigmoid(tf.matmul(h_pool3_flat, W_fc1) + b_fc1) _activation_summary(h_fc1) keep_prob = tf.Variable(1.0) W_fc2 = weight_variable([256, 4]) b_fc2 = bias_variable([4]) y_conv = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2) _activation_summary(y_conv) return y_conv
def transformer_layers_sharded(dp, ps_devices, inputs, num_layers, hparams, self_attention_bias=None, enc_output=None, attention_type=AttentionType.GLOBAL, name="transformer"): """Multi layer transformer, sharded by the data parallelism dp.""" x = inputs extra_loss = tf.constant(0.0) moe_hidden_sizes = [int(s) for s in hparams.moe_hidden_sizes.split(",")] expert_fn = expert_utils.ffn_expert_fn( hparams.hidden_size, moe_hidden_sizes, hparams.hidden_size) x = dp(tf.nn.dropout, x, 1.0 - hparams.layer_prepostprocess_dropout) for layer in range(num_layers): with tf.variable_scope("%s_layer_%d" % (name, layer)): # self-attention if attention_type == AttentionType.LOCAL_2D: y = dp(local_attention_2d(common_layers.layer_preprocess(x, hparams), hparams, attention_type="masked_local_attention_2d")) elif attention_type == AttentionType.LOCAL_1D: y = dp(local_attention_1d(common_layers.layer_preprocess(x, hparams), hparams, attention_type="local_mask_right", q_padding="LEFT", kv_padding="LEFT")) elif attention_type == AttentionType.GLOCAL: y = dp(local_global_attention( common_layers.layer_preprocess(x, hparams), self_attention_bias, hparams, q_padding="LEFT", kv_padding="LEFT")) elif attention_type == AttentionType.GLOBAL: self_attention_bias = dp(get_self_attention_bias(x)) y = dp(full_self_attention(common_layers.layer_preprocess(x, hparams), self_attention_bias, hparams, q_padding="LEFT", kv_padding="LEFT")) x = common_layers.layer_postprocess(x, y, hparams) if enc_output is not None: y = dp(encdec_attention_1d(common_layers.layer_preprocess(x, hparams), enc_output, None, hparams)) x = dp(common_layers.layer_postprocess, x, y, hparams) with tf.variable_scope("ffn"): if str(layer) in hparams.moe_layers_decoder.split(","): y, loss = expert_utils.distributed_moe( dp, ps_devices, common_layers.layer_preprocess(x, hparams), hparams.mode == tf.estimator.ModeKeys.TRAIN, input_size=hparams.hidden_size, expert_fn=expert_fn, num_experts=hparams.moe_num_experts, k=hparams.moe_k, loss_coef=hparams.moe_loss_coef) extra_loss += loss x = dp(common_layers.layer_postprocess, x, y, hparams) else: y = dp(ffn_layer, common_layers.layer_preprocess(x, hparams), hparams) x = dp(common_layers.layer_postprocess, x, y, hparams) return dp(common_layers.layer_preprocess, x, hparams), extra_loss
def ce(model, config, scope, connect, threshold = 1e-5): with tf.variable_scope(scope), tf.name_scope(scope): with tf.variable_scope('inputs'), tf.name_scope('inputs'): model['%s_in0length' %scope] = model['%s_out0length' %connect] model['%s_in1length' %scope] = model['%s_out1length' %connect] model['%s_in2length' %scope] = model['%s_out2length' %connect] model['%s_maxin2length' %scope] = model['%s_maxout2length' %connect] model['%s_inputs' %scope] = tf.clip_by_value(tf.nn.softmax(model['%s_outputs' %connect]), threshold, 1. - threshold, name = '%s_inputs' %scope) model['%s_out0length' %scope] = model['%s_in0length' %scope] model['%s_out1length' %scope] = model['%s_in1length' %scope] model['%s_out2length' %scope] = tf.placeholder(tf.int32, [model['%s_in0length' %scope]], '%s_out2length' %scope) model['%s_maxout2length' %scope] = model['%s_maxin2length' %scope] with tf.variable_scope('labels'), tf.name_scope('labels'): model['%s_labels_len' %scope] = tf.placeholder(tf.int32, [model['%s_in0length' %scope]], '%s_labels_len' %scope) model['%s_labels_ind' %scope] = tf.placeholder(tf.int64, [None, 2], '%s_labels_ind' %scope) model['%s_labels_val' %scope] = tf.placeholder(tf.int32, [None], '%s_labels_val' %scope) model['%s_labels_collapsed' %scope] = tf.sparse_to_dense(model['%s_labels_ind' %scope], [model['%s_maxin2length' %scope], model['%s_in0length' %scope]], model['%s_labels_val' %scope], -1, name = '%s_labels_collapsed' %scope) model['%s_labels' %scope] = tf.one_hot(model['%s_labels_collapsed' %scope], model['%s_out1length' %scope], name = '%s_labels' %scope) with tf.variable_scope('loss'), tf.name_scope('loss'): model['%s_loss' %scope] = tf.reduce_sum(-tf.multiply(model['%s_labels' %scope], tf.log(model['%s_inputs' %scope])), name = '%s_loss' %scope) with tf.variable_scope('outputs'), tf.name_scope('outputs'): model['%s_output' %scope] = model['%s_inputs' %scope] return model
def __call__(self, features, labels, params): """Creates the model graph. See the model_fn documentation in tf.contrib.learn.Estimator class for a more detailed explanation. """ with tf.variable_scope("model"): with tf.variable_scope(self.name): return self._build(features, labels, params)
def project_bilstm_layer(self, lstm_outputs, name=None): """ hidden layer between lstm layer and logits :param lstm_outputs: [batch_size, num_steps, emb_size] :return: [batch_size, num_steps, num_tags] """ with tf.variable_scope("project" if not name else name): with tf.variable_scope("hidden"): W = tf.get_variable("W", shape=[self.hidden_unit * 2, self.hidden_unit], dtype=tf.float32, initializer=self.initializers.xavier_initializer()) b = tf.get_variable("b", shape=[self.hidden_unit], dtype=tf.float32, initializer=tf.zeros_initializer()) output = tf.reshape(lstm_outputs, shape=[-1, self.hidden_unit * 2]) hidden = tf.tanh(tf.nn.xw_plus_b(output, W, b)) # project to score of tags with tf.variable_scope("logits"): W = tf.get_variable("W", shape=[self.hidden_unit, self.num_labels], dtype=tf.float32, initializer=self.initializers.xavier_initializer()) b = tf.get_variable("b", shape=[self.num_labels], dtype=tf.float32, initializer=tf.zeros_initializer()) pred = tf.nn.xw_plus_b(hidden, W, b) return tf.reshape(pred, [-1, self.seq_length, self.num_labels])
def add_logits_op(self): """ Adds logits to self """ with tf.variable_scope("bi-lstm"): lstm_fwrd_cell = tf.contrib.rnn.LSTMCell(self.hidden_size) lstm_back_cell = tf.contrib.rnn.LSTMCell(self.hidden_size) (output_fw, output_bw), _ = tf.nn.bidirectional_dynamic_rnn(lstm_fwrd_cell, lstm_back_cell, self.word_embeddings, sequence_length=self.sequence_lengths, dtype=tf.float32) output = tf.concat([output_fw, output_bw], axis=-1) output = tf.nn.dropout(output, self.dropout) with tf.variable_scope("proj"): W = tf.get_variable("W", shape=[2*self.hidden_size, self.ntags], dtype=tf.float32) b = tf.get_variable("b", shape=[self.ntags], dtype=tf.float32, initializer=tf.zeros_initializer()) ntime_steps = tf.shape(output)[1] output = tf.reshape(output, [-1, 2*self.hidden_size]) pred = tf.matmul(output, W) + b self.logits = tf.reshape(pred, [-1, ntime_steps, self.ntags])
def testInitFromCheckpointWithScopes(self): init_value0 = np.asarray([1.0, 3.0, 9.0], dtype=np.float32).reshape((1, 3, 1)) init_value1 = np.asarray([2.0, 4.0, 6.0, 8.0], dtype=np.float32).reshape((2, 1, 2)) var_names_to_values = {'layer0/v0': init_value0, 'layer1/v1': init_value1} model_dir = os.path.join(self.get_temp_dir(), 'model') with self.test_session() as sess: model_path = self.create_checkpoint_from_values(var_names_to_values, model_dir) with tf.variable_scope('my_model/my_layer0'): var0 = tf.contrib.framework.variables.variable('my_var0', shape=init_value0.shape) with tf.variable_scope('my_model/my_layer1'): var1 = tf.contrib.framework.variables.variable('my_var1', shape=init_value1.shape) vars_to_restore = {'layer0/v0': var0, 'layer1/v1': var1} op, feed_dict = tf.contrib.framework.variables.assign_from_checkpoint( model_path, vars_to_restore) # Initialize the variables. sess.run(tf.global_variables_initializer()) # Perform the assignment. sess.run(op, feed_dict) # Request and test the variable values: self.assertAllEqual(init_value0, var0.eval()) self.assertAllEqual(init_value1, var1.eval())
def __init__(self, model_path = "models", threshold = [0.6, 0.7, 0.7], factor = 0.709, scale_factor = 1): ''' :param face_rec_sess: FaceRecSession :param threshold: detection threshold :param factor: default 0.709 image pyramid -- magic number :param model_path: ''' self.threshold = threshold self.factor = factor self.scale_factor = scale_factor; with tf.Graph().as_default(), tf.device('/cpu:0'): print("Loading Face detection model") self.sess = tf.Session() if not model_path: model_path, _ = os.path.split(os.path.realpath(__file__)) with tf.variable_scope('pnet'): data = tf.placeholder(tf.float32, (None, None, None, 3), 'input') pnet = PNet({'data': data}) pnet.load(os.path.join(model_path, 'det1.npy'), self.sess) with tf.variable_scope('rnet'): data = tf.placeholder(tf.float32, (None, 24, 24, 3), 'input') rnet = RNet({'data': data}) rnet.load(os.path.join(model_path, 'det2.npy'), self.sess) with tf.variable_scope('onet'): data = tf.placeholder(tf.float32, (None, 48, 48, 3), 'input') onet = ONet({'data': data}) onet.load(os.path.join(model_path, 'det3.npy'), self.sess) self.pnet = lambda img: self.sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0': img}) self.rnet = lambda img: self.sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0': img}) self.onet = lambda img: self.sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0': img}) print("Face detection model loaded")
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope, reuse=reuse): out = inpt with tf.variable_scope("convnet"): for num_outputs, kernel_size, stride in convs: out = layers.convolution2d(out, num_outputs=num_outputs, kernel_size=kernel_size, stride=stride, activation_fn=tf.nn.relu) conv_out = layers.flatten(out) with tf.variable_scope("action_value"): action_out = conv_out for hidden in hiddens: action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None) if layer_norm: action_out = layers.layer_norm(action_out, center=True, scale=True) action_out = tf.nn.relu(action_out) action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None) if dueling: with tf.variable_scope("state_value"): state_out = conv_out for hidden in hiddens: state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None) if layer_norm: state_out = layers.layer_norm(state_out, center=True, scale=True) state_out = tf.nn.relu(state_out) state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1) q_out = state_score + action_scores_centered else: q_out = action_scores return q_out
def start_session(self): """ Creates the session. """ self.input_layer_mats = ["W_input", "b_input"] self.hidden_layer_mats = [] for i in xrange(self.num_hidden): self.hidden_layer_mats.append("W" + str(i)) self.hidden_layer_mats.append("b" + str(i)) self.output_layer_mats = ["W_output", "b_output"] self.weight_mats = self.input_layer_mats + self.hidden_layer_mats + self.output_layer_mats with tf.variable_scope("network") as scope: self.create_model_trainable() with tf.variable_scope("target") as scope: self.create_model_target() init = tf.initialize_all_variables() session = tf.Session() session.run(init) return session
def inference(input_tensor,train,regularizer): #第一层卷积 with tf.variable_scope('layer1-conv1'): conv1_weights = tf.get_variable("weight", [CONV1_SIZE,CONV1_SIZE,NUM_CHANNELS,CONV1_DEEP], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv1_biases = tf.get_variable("biases",[CONV1_DEEP], initializer=tf.constant_initializer(0.0)) conv1 = tf.nn.conv2d(input_tensor,conv1_weights, strides=[1,1,1,1],padding='SAME') relu1 = tf.nn.relu(tf.nn.bias_add(conv1,conv1_biases)) #第二层池化 with tf.name_scope('layer2-pool1'): pool1 = tf.nn.max_pool(relu1,ksize=[1,2,2,1], strides=[1,2,2,1],padding='SAME') #第三层卷积 with tf.variable_scope('layer3-conv2'): conv2_weights = tf.get_variable("weight", [CONV2_SIZE,CONV2_SIZE,CONV1_DEEP,CONV2_DEEP], initializer=tf.truncated_normal_initializer(stddev=0.1)) conv2_biases = tf.get_variable("biases",[CONV2_DEEP], initializer=tf.constant_initializer(0.0)) conv2 = tf.nn.conv2d(pool1,conv2_weights, strides=[1,1,1,1],padding='SAME') relu2 = tf.nn.relu(tf.nn.bias_add(conv2,conv2_biases)) #第四层池化 with tf.name_scope('layer4-pool2'): pool2 = tf.nn.max_pool(relu2,ksize=[1,2,2,1], strides=[1,2,2,1],padding='SAME') pool_shape = pool2.get_shape().as_list() nodes = pool_shape[1] * pool_shape[2] * pool_shape[3] reshaped = tf.reshape(pool2,[pool_shape[0],nodes]) #第五层全连接层 with tf.variable_scope('layer5-fc1'): fc1_weights = tf.get_variable("weight",[nodes,FC_SIZE], initializer=tf.truncated_normal_initializer(stddev=0.1)) #只有全连接层的权重需要加入正则化 if regularizer != None: tf.add_to_collection('losses',regularizer(fc1_weights)) fc1_biases = tf.get_variable("bias",[FC_SIZE], initializer=tf.constant_initializer(0.1)) fc1 = tf.nn.relu(tf.matmul(reshaped,fc1_weights) + fc1_biases) if train: fc1 = tf.nn.dropout(fc1,0.5) #第六层全连接层 with tf.variable_scope('layer6-fc2'): fc2_weights = tf.get_variable("weight",[FC_SIZE,NUM_LABELS], initializer=tf.truncated_normal_initializer(stddev=0.1)) #只有全连接层的权重需要加入正则化 if regularizer != None: tf.add_to_collection('losses',regularizer(fc2_weights)) fc2_biases = tf.get_variable("bias",[NUM_LABELS], initializer=tf.constant_initializer(0.1)) logit = tf.matmul(fc1,fc2_weights) + fc2_biases return logit
def testLSTMBasicToBlockPeeping(self): with self.test_session(use_gpu=self._use_gpu) as sess: batch_size = 2 input_size = 3 cell_size = 4 sequence_length = 5 inputs = [] for _ in range(sequence_length): inp = tf.convert_to_tensor( np.random.randn(batch_size, input_size), dtype=tf.float32) inputs.append(inp) initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=19890212) with tf.variable_scope("basic", initializer=initializer): cell = tf.nn.rnn_cell.LSTMCell(cell_size, use_peepholes=True, state_is_tuple=True) outputs, _ = tf.nn.rnn(cell, inputs, dtype=tf.float32) sess.run([tf.initialize_all_variables()]) basic_outputs = sess.run(outputs) basic_grads = sess.run(tf.gradients(outputs, inputs)) basic_wgrads = sess.run(tf.gradients(outputs, tf.trainable_variables())) with tf.variable_scope("block", initializer=initializer): w = tf.get_variable("w", shape=[input_size + cell_size, cell_size * 4], dtype=tf.float32) b = tf.get_variable("b", shape=[cell_size * 4], dtype=tf.float32, initializer=tf.zeros_initializer) wci = tf.get_variable("wci", shape=[cell_size], dtype=tf.float32) wcf = tf.get_variable("wcf", shape=[cell_size], dtype=tf.float32) wco = tf.get_variable("wco", shape=[cell_size], dtype=tf.float32) _, _, _, _, _, _, outputs = fused_lstm( tf.convert_to_tensor(sequence_length, dtype=tf.int64), inputs, w, b, wci=wci, wcf=wcf, wco=wco, cell_clip=0, use_peephole=True) sess.run([tf.initialize_all_variables()]) block_outputs = sess.run(outputs) block_grads = sess.run(tf.gradients(outputs, inputs)) block_wgrads = sess.run(tf.gradients(outputs, [w, b, wci, wcf, wco])) self.assertAllClose(basic_outputs, block_outputs) self.assertAllClose(basic_grads, block_grads) for basic, block in zip(basic_wgrads, block_wgrads): self.assertAllClose(basic, block, rtol=1e-2, atol=1e-2)
def testBasicLSTMCell(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) m = tf.zeros([1, 8]) g, out_m = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.BasicLSTMCell(2)] * 2)(x, m) sess.run([tf.initialize_all_variables()]) res = sess.run([g, out_m], {x.name: np.array([[1., 1.]]), m.name: 0.1 * np.ones([1, 8])}) self.assertEqual(len(res), 2) # The numbers in results were not calculated, this is just a smoke test. self.assertAllClose(res[0], [[0.24024698, 0.24024698]]) expected_mem = np.array([[0.68967271, 0.68967271, 0.44848421, 0.44848421, 0.39897051, 0.39897051, 0.24024698, 0.24024698]]) self.assertAllClose(res[1], expected_mem) with tf.variable_scope("other", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 3]) # Test BasicLSTMCell with input_size != num_units. m = tf.zeros([1, 4]) g, out_m = tf.nn.rnn_cell.BasicLSTMCell(2, input_size=3)(x, m) sess.run([tf.initialize_all_variables()]) res = sess.run([g, out_m], {x.name: np.array([[1., 1., 1.]]), m.name: 0.1 * np.ones([1, 4])}) self.assertEqual(len(res), 2)
def __init__(self,sess,n_features,n_actions,lr=0.001): self.sess = sess self.s = tf.placeholder(tf.float32,[1,n_features],name='state') self.a = tf.placeholder(tf.int32,None,name='act') self.td_error = tf.placeholder(tf.float32,None,"td_error") with tf.variable_scope('Actor'): l1 = tf.layers.dense( inputs = self.s, units = 20, activation = tf.nn.relu, kernel_initializer = tf.random_normal_initializer(mean=0,stddev=0.1), bias_initializer = tf.constant_initializer(0.1), name = 'l1' ) self.acts_prob = tf.layers.dense( inputs = l1, units = n_actions, activation = tf.nn.softmax, kernel_initializer = tf.random_normal_initializer(mean=0,stddev=0.1), bias_initializer = tf.constant_initializer(0.1), name = 'acts_prob' ) with tf.variable_scope('exp_v'): log_prob = tf.log(self.acts_prob[0,self.a]) self.exp_v = tf.reduce_mean(log_prob * self.td_error) with tf.variable_scope('train'): self.train_op = tf.train.AdamOptimizer(lr).minimize(-self.exp_v)
def __load_model(self): # Initial memory value for recurrence. self.prev_mem = tf.zeros((self.train_batch_size, self.memory_dim)) # choose RNN/GRU/LSTM cell with tf.variable_scope("train_test", reuse=True): self.cell = rnn_cell.LSTMCell(self.memory_dim) # embedding model if not self.attention: with tf.variable_scope("train_test"): self.dec_outputs, self.dec_memory = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("train_test", reuse = True): self.dec_outputs_tst, _ = seq2seq.embedding_rnn_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True) else: with tf.variable_scope("train_test"): self.dec_outputs, self.dec_memory = seq2seq.embedding_attention_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length) with tf.variable_scope("train_test", reuse = True): self.dec_outputs_tst, _ = seq2seq.embedding_attention_seq2seq(\ self.enc_inp, self.dec_inp, self.cell, \ self.vocab_size, self.vocab_size, self.seq_length, feed_previous=True)
def cnn_model(X, y): """2 layer Convolutional network to predict from sequence of words to a class.""" # Convert indexes of words into embeddings. # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then # maps word indexes of the sequence into [batch_size, sequence_length, # EMBEDDING_SIZE]. word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words') word_vectors = tf.expand_dims(word_vectors, 3) with tf.variable_scope('CNN_Layer1'): # Apply Convolution filtering on input sequence. conv1 = skflow.ops.conv2d(word_vectors, N_FILTERS, FILTER_SHAPE1, padding='VALID') # Add a RELU for non linearity. conv1 = tf.nn.relu(conv1) # Max pooling across output of Convlution+Relu. pool1 = tf.nn.max_pool(conv1, ksize=[1, POOLING_WINDOW, 1, 1], strides=[1, POOLING_STRIDE, 1, 1], padding='SAME') # Transpose matrix so that n_filters from convolution becomes width. pool1 = tf.transpose(pool1, [0, 1, 3, 2]) with tf.variable_scope('CNN_Layer2'): # Second level of convolution filtering. conv2 = skflow.ops.conv2d(pool1, N_FILTERS, FILTER_SHAPE2, padding='VALID') # Max across each filter to get useful features for classification. pool2 = tf.squeeze(tf.reduce_max(conv2, 1), squeeze_dims=[1]) # Apply regular WX + B and classification. return skflow.models.logistic_regression(pool2, y)
def testWithScopes(self): init_value0 = np.asarray([1.0, 3.0, 9.0]).reshape((1, 3, 1)) init_value1 = np.asarray([2.0, 4.0, 6.0, 8.0]).reshape((2, 1, 2)) with self.test_session() as sess: initializer = tf.truncated_normal_initializer(stddev=.1) with tf.variable_scope('my_model/my_layer0'): var0 = tf.contrib.framework.variable( 'my_var0', shape=[1, 3, 1], initializer=initializer) with tf.variable_scope('my_model/my_layer1'): var1 = tf.contrib.framework.variable( 'my_var1', shape=[2, 1, 2], initializer=initializer) var_names_to_values = {'my_model/my_layer0/my_var0': init_value0, 'my_model/my_layer1/my_var1': init_value1} init_fn = tf.contrib.framework.assign_from_values_fn(var_names_to_values) # Initialize the variables. sess.run(tf.global_variables_initializer()) # Perform the assignment. init_fn(sess) # Request and test the variable values: var0, var1 = sess.run([var0, var1]) self.assertAllEqual(init_value0, var0) self.assertAllEqual(init_value1, var1)
def additive_attention(queries, keys, values, bias, hidden_size, concat=False, keep_prob=None, dtype=None, scope=None): """ Additive attention mechanism. This layer is implemented using a one layer feed forward neural network :param queries: A tensor with shape [batch, heads, length_q, depth_k] :param keys: A tensor with shape [batch, heads, length_kv, depth_k] :param values: A tensor with shape [batch, heads, length_kv, depth_v] :param bias: A tensor :param hidden_size: An integer :param concat: A boolean value. If ``concat'' is set to True, then the computation of attention mechanism is following $tanh(W[q, k])$. When ``concat'' is set to False, the computation is following $tanh(Wq + Vk)$ :param keep_prob: a scalar in [0, 1] :param dtype: An optional instance of tf.DType :param scope: An optional string, the scope of this layer :returns: A dict with the following keys: weights: A tensor with shape [batch, length_q] outputs: A tensor with shape [batch, length_q, depth_v] """ with tf.variable_scope(scope, default_name="additive_attention", values=[queries, keys, values, bias], dtype=dtype): length_q = tf.shape(queries)[2] length_kv = tf.shape(keys)[2] q = tf.tile(tf.expand_dims(queries, 3), [1, 1, 1, length_kv, 1]) k = tf.tile(tf.expand_dims(keys, 2), [1, 1, length_q, 1, 1]) if concat: combined = tf.tanh( linear(tf.concat([q, k], axis=-1), hidden_size, True, True, name="qk_transform")) else: q = linear(queries, hidden_size, True, True, name="q_transform") k = linear(keys, hidden_size, True, True, name="key_transform") combined = tf.tanh(q + k) # shape: [batch, heads, length_q, length_kv] logits = tf.squeeze(linear(combined, 1, True, True, name="logits"), axis=-1) if bias is not None: logits += bias weights = tf.nn.softmax(logits, name="attention_weights") if keep_prob or keep_prob < 1.0: weights = tf.nn.dropout(weights, keep_prob) outputs = tf.matmul(weights, values) return {"weights": weights, "outputs": outputs}
def ready(self): config = self.config d = config.hidden batch_size = tf.shape(self.sent)[0] sent_mask = tf.cast(self.sent, tf.bool) sent_len = tf.reduce_sum(tf.cast(sent_mask, tf.int32), axis=1) sent_maxlen = tf.reduce_max(sent_len) sent_mask = tf.slice(sent_mask, [0, 0], [batch_size, sent_maxlen]) sent = tf.slice(self.sent, [0, 0], [batch_size, sent_maxlen]) mid_mask = tf.cast(self.mid, tf.bool) mid_len = tf.reduce_sum(tf.cast(mid_mask, tf.int32), axis=1) mid_maxlen = tf.reduce_max(mid_len) mid_mask = tf.slice(mid_mask, [0, 0], [batch_size, mid_maxlen]) mid = tf.slice(self.mid, [0, 0], [batch_size, mid_maxlen]) pat_mask = tf.cast(self.pats, tf.bool) pat_len = tf.reduce_sum(tf.cast(pat_mask, tf.int32), axis=1) with tf.variable_scope("embedding"): sent_emb = tf.nn.embedding_lookup(self.word_mat, sent) mid_emb = tf.nn.embedding_lookup(self.word_mat, mid) sent_emb = dropout(sent_emb, keep_prob=config.keep_prob, is_train=self.is_train, mode="embedding") pat_emb = tf.nn.embedding_lookup(self.word_mat, self.pats) with tf.variable_scope("encoder"): rnn = Cudnn_RNN(num_layers=2, num_units=d // 2) cont, _ = rnn(sent_emb, seq_len=sent_len, concat_layers=False) pat, _ = rnn(pat_emb, seq_len=pat_len, concat_layers=False) cont_d = dropout(cont, keep_prob=config.keep_prob, is_train=self.is_train) pat_d = dropout(pat, keep_prob=config.keep_prob, is_train=self.is_train) with tf.variable_scope("attention"): att_a = attention(cont_d, config.att_hidden, mask=sent_mask) pat_a = self.pat_a = attention(pat_d, config.att_hidden, mask=pat_mask) with tf.variable_scope("sim"): sim, pat_sim = att_match(mid_emb, pat_emb, mid_mask, pat_mask, d, keep_prob=config.keep_prob, is_train=self.is_train) neg_idxs = tf.matmul(self.rels, tf.transpose(self.rels, [1, 0])) pat_pos = tf.square(tf.maximum(config.tau - pat_sim, 0.)) pat_pos = tf.reduce_max(pat_pos - (1 - neg_idxs) * 1e30, axis=1) pat_neg = tf.square(tf.maximum(pat_sim, 0.)) pat_neg = tf.reduce_max(pat_neg - 1e30 * neg_idxs, axis=1) l_sim = tf.reduce_sum(self.weight * (pat_pos + pat_neg), axis=0) with tf.variable_scope("pred"): att2_d = tf.reduce_sum(tf.expand_dims(att_a, axis=2) * cont_d, axis=1) pat2_d = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat_d, axis=1) logit = self.logit = dense(att2_d, config.num_class, use_bias=False) pred = tf.nn.softmax(logit) l_a = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=logit[:config.batch_size], labels=self.rel[:config.batch_size]), axis=0) xsim = tf.stop_gradient(sim[config.batch_size:]) pseudo_rel = tf.gather(self.rels, tf.argmax(xsim, axis=1)) bound = tf.reduce_max(xsim, axis=1) weight = tf.nn.softmax(10 * bound) l_u = tf.reduce_sum( weight * tf.nn.softmax_cross_entropy_with_logits_v2( logits=logit[config.batch_size:], labels=pseudo_rel), axis=0) logit = dense(pat2_d, config.num_class, use_bias=False) l_pat = self.pat_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=logit, labels=self.rels), axis=0) self.max_val = tf.reduce_sum(pred * -log(pred), axis=1) self.pred = tf.argmax(pred, axis=1) self.loss = l_a + config.alpha * l_pat + config.beta * l_sim + config.gamma * l_u self.sim_pred = tf.argmax(tf.gather(self.rels, tf.argmax(sim, axis=1)), axis=1) self.sim_max_val = tf.reduce_max(sim, axis=1) self.gold = tf.argmax(self.rel, axis=1) self.max_logit = tf.reduce_max(self.logit, axis=1)
def densenet_views(inputs, num_classes=1000, reduction=None, growth_rate=None, num_filters=None, num_layers=None, dropout_rate=None, is_training=True, reuse=None, scope=None): assert reduction is not None assert growth_rate is not None assert num_filters is not None assert num_layers is not None end_points = {} compression = 1.0 - reduction num_dense_blocks = len(num_layers) with tf.variable_scope(scope, 'densenetxxx', [inputs, num_classes], reuse=reuse) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training), \ slim.arg_scope([slim.conv2d, _conv, _conv_block, _dense_block, _transition_block], outputs_collections=end_points_collection), \ slim.arg_scope([_conv], dropout_rate=dropout_rate): net = inputs # initial convolution net = slim.conv2d(net, num_filters, 7, stride=2, scope='conv1') net = slim.batch_norm(net) net = tf.nn.relu(net) net = slim.max_pool2d(net, 3, stride=2, padding='SAME') # FIRST BLOCK ============== # dense blocks net, num_filters = _dense_block(net, num_layers[0], num_filters, growth_rate, scope='dense_block' + str(1)) # Add transition_block net, num_filters = _transition_block(net, num_filters, compression=compression, scope='transition_block' + str(1)) views_softmax = add_views_branch(net, dropout_rate, end_points) # MIDDLE BLOCKS for i in range(1, num_dense_blocks - 1): # dense blocks net, num_filters = _dense_block(net, num_layers[i], num_filters, growth_rate, scope='dense_block' + str(i + 1)) # Add transition_block net, num_filters = _transition_block(net, num_filters, compression=compression, scope='transition_block' + str(i + 1)) with tf.variable_scope('3ViewBranches'): views_softmax = tf.reshape(views_softmax, [-1, 1, 1, NUM_VIEWS]) views_softmax_split = tf.split(views_softmax, [1, 1, 1], axis=3) blocks = [] for view in range(NUM_VIEWS): with tf.variable_scope('View_%d_Branch' % view): block_view, _ = _dense_block(net, num_layers[-1], num_filters, growth_rate, scope='dense_block' + str(num_dense_blocks)) scaled_block_view = tf.multiply(block_view, views_softmax_split[view], name='scale_view_%d' % view) blocks.append(scaled_block_view) net = tf.add_n(blocks, 'combine_views') # final blocks with tf.variable_scope('final_block', [inputs]): net = slim.batch_norm(net) net = tf.nn.relu(net) net = tf.reduce_mean(net, [1, 2], name='global_avg_pool', keep_dims=True) net = slim.conv2d(net, 1536, 1, activation_fn=tf.nn.relu, biases_initializer=tf.zeros_initializer(), scope='pre_logits') end_points['PreLogits'] = slim.flatten(net, 'pre_logits') net = tf.nn.dropout(net, keep_prob=0.8) net = slim.conv2d(net, num_classes, 1, biases_initializer=tf.zeros_initializer(), scope='logits') net = slim.flatten(net) end_points.update(slim.utils.convert_collection_to_dict(end_points_collection)) if num_classes is not None: end_points['predictions'] = slim.softmax(net, scope='predictions') return net, end_points
def __init__(self, encoders: List[Stateful], vocabulary: Vocabulary, data_id: str, name: str, max_output_len: int, dropout_keep_prob: float = 1.0, rnn_size: int = None, embedding_size: int = None, output_projection: OutputProjectionSpec = None, encoder_projection: EncoderProjection = None, attentions: List[BaseAttention] = None, embeddings_source: EmbeddedSequence = None, attention_on_input: bool = True, rnn_cell: str = "GRU", conditional_gru: bool = False, save_checkpoint: str = None, load_checkpoint: str = None) -> None: """Create a refactored version of monster decoder. Arguments: encoders: Input encoders of the decoder vocabulary: Target vocabulary data_id: Target data series name: Name of the decoder. Should be unique accross all Neural Monkey objects max_output_len: Maximum length of an output sequence dropout_keep_prob: Probability of keeping a value during dropout Keyword arguments: rnn_size: Size of the decoder hidden state, if None set according to encoders. embedding_size: Size of embedding vectors for target words output_projection: How to generate distribution over vocabulary from decoder rnn_outputs encoder_projection: How to construct initial state from encoders attention: The attention object to use. Optional. embeddings_source: Embedded sequence to take embeddings from rnn_cell: RNN Cell used by the decoder (GRU or LSTM) conditional_gru: Flag whether to use the Conditional GRU architecture attention_on_input: Flag whether attention from previous decoding step should be combined with the input in the next step. """ ModelPart.__init__(self, name, save_checkpoint, load_checkpoint) check_argument_types() log("Initializing decoder, name: '{}'".format(name)) self.encoders = encoders self.vocabulary = vocabulary self.data_id = data_id self.max_output_len = max_output_len self.dropout_keep_prob = dropout_keep_prob self.embedding_size = embedding_size self.rnn_size = rnn_size self.output_projection_spec = output_projection self.encoder_projection = encoder_projection self.attentions = attentions self.embeddings_source = embeddings_source self._conditional_gru = conditional_gru self._attention_on_input = attention_on_input self._rnn_cell_str = rnn_cell if self.attentions is None: self.attentions = [] if self.embedding_size is None and self.embeddings_source is None: raise ValueError("You must specify either embedding size or the " "embedded sequence from which to reuse the " "embeddings (e.g. set either 'embedding_size' or " " 'embeddings_source' parameter)") if self.embeddings_source is not None: if self.embedding_size is not None: warn("Overriding the embedding_size parameter with the" " size of the reused embeddings from the encoder.") self.embedding_size = ( self.embeddings_source.embedding_matrix.get_shape()[1].value) if self.encoder_projection is None: if not self.encoders: log("No encoder - language model only.") self.encoder_projection = empty_initial_state elif rnn_size is None: log("No rnn_size or encoder_projection: Using concatenation of" " encoded states") self.encoder_projection = concat_encoder_projection self.rnn_size = sum(e.output.get_shape()[1].value for e in encoders) else: log("Using linear projection of encoders as the initial state") self.encoder_projection = linear_encoder_projection( self.dropout_keep_prob) assert self.rnn_size is not None if self._rnn_cell_str not in RNN_CELL_TYPES: raise ValueError("RNN cell must be a either 'GRU', 'LSTM', or " "'NematusGRU'. Not {}".format(self._rnn_cell_str)) if self.output_projection_spec is None: log("No output projection specified - using tanh projection") self.output_projection = nonlinear_output( self.rnn_size, tf.tanh)[0] self.output_projection_size = self.rnn_size elif isinstance(self.output_projection_spec, tuple): (self.output_projection, self.output_projection_size) = tuple(self.output_projection_spec) else: self.output_projection = self.output_projection_spec self.output_projection_size = self.rnn_size if self._attention_on_input: self.input_projection = self.input_plus_attention else: self.input_projection = self.embed_input_symbol with self.use_scope(): with tf.variable_scope("attention_decoder") as self.step_scope: pass # TODO when it is possible, remove the printing of the cost var log("Decoder initalized. Cost var: {}".format(str(self.cost))) log("Runtime logits tensor: {}".format(str(self.runtime_logits)))
def body(*args) -> LoopState: loop_state = LoopState(*args) step = loop_state.step with tf.variable_scope(self.step_scope): # Compute the input to the RNN rnn_input = self.input_projection(*loop_state) # Run the RNN. cell = self._get_rnn_cell() if self._rnn_cell_str in ["GRU", "NematusGRU"]: cell_output, next_state = cell( rnn_input, loop_state.prev_rnn_output) attns = [ a.attention(cell_output, loop_state.prev_rnn_output, rnn_input, att_loop_state, loop_state.step) for a, att_loop_state in zip( self.attentions, loop_state.attention_loop_states)] if self.attentions: contexts, att_loop_states = zip(*attns) else: contexts, att_loop_states = [], [] if self._conditional_gru: cell_cond = self._get_conditional_gru_cell() cond_input = tf.concat(contexts, -1) cell_output, next_state = cell_cond( cond_input, next_state, scope="cond_gru_2_cell") elif self._rnn_cell_str == "LSTM": prev_state = tf.contrib.rnn.LSTMStateTuple( loop_state.prev_rnn_state, loop_state.prev_rnn_output) cell_output, state = cell(rnn_input, prev_state) next_state = state.c attns = [ a.attention(cell_output, loop_state.prev_rnn_output, rnn_input, att_loop_state, loop_state.step) for a, att_loop_state in zip( self.attentions, loop_state.attention_loop_states)] if self.attentions: contexts, att_loop_states = zip(*attns) else: contexts, att_loop_states = [], [] else: raise ValueError("Unknown RNN cell.") with tf.name_scope("rnn_output_projection"): embedded_input = tf.nn.embedding_lookup( self.embedding_matrix, loop_state.input_symbol) output = self.output_projection( cell_output, embedded_input, list(contexts), self.train_mode) logits = self._logit_function(output) self.step_scope.reuse_variables() if sample: next_symbols = tf.multinomial(logits, num_samples=1) elif train_mode: next_symbols = loop_state.train_inputs[step] else: next_symbols = tf.to_int32(tf.argmax(logits, axis=1)) int_unfinished_mask = tf.to_int32( tf.logical_not(loop_state.finished)) # Note this works only when PAD_TOKEN_INDEX is 0. Otherwise # this have to be rewritten assert PAD_TOKEN_INDEX == 0 next_symbols = next_symbols * int_unfinished_mask has_just_finished = tf.equal(next_symbols, END_TOKEN_INDEX) has_finished = tf.logical_or(loop_state.finished, has_just_finished) new_loop_state = LoopState( step=step + 1, input_symbol=next_symbols, train_inputs=loop_state.train_inputs, prev_rnn_state=next_state, prev_rnn_output=cell_output, rnn_outputs=loop_state.rnn_outputs.write( step + 1, cell_output), prev_contexts=list(contexts), prev_logits=logits, logits=loop_state.logits.write(step, logits), finished=has_finished, mask=loop_state.mask.write(step, tf.logical_not(has_finished)), attention_loop_states=list(att_loop_states)) return new_loop_state
def train(train_model=True): """ Trains the agent with hyperparameters and other info loaded from mission_control_<game>.py file :param train_model: bool, True -> Trains the agent False -> Loads the LATEST trained agent and plays :return: absolutely nothing """ with tf.variable_scope("Action_agent"): agent = get_agent(X_input) with tf.variable_scope("Target_agent"): target_agent = get_agent(X_input) loss = tf.losses.mean_squared_error(labels=Y_target, predictions=agent) var_list = tf.trainable_variables() agent_vars = [t for t in var_list if t.name.startswith("Action_agent")] optimizer = tf.train.RMSPropOptimizer(learning_rate=mc.learning_rate, momentum=mc.momentum, epsilon=mc.epsilon).minimize( loss, var_list=agent_vars) # Create the summary for tensorboard # TODO: Plot the rewards per episode tf.summary.scalar(name='loss', tensor=loss) tf.summary.scalar(name='max_q_value', tensor=tf.reduce_max( agent)) # TODO: Replace this to the op in the paper tf.summary.histogram(name='q_values_hist', values=agent) # TODO: Plot the length of each episode # TODO: Plot the argmax of the action taken for each play saver = tf.train.Saver() init = tf.global_variables_initializer() with tf.Session() as sess: if train_model: print("Training agent!") print("Preparing required directories") # Initialize global variables sess.run(init) # Used to measure time taken t1 = time.time() # Kinda like the global step, but is not a "Tensor" step = 0 # Get the initial epsilon prob_rand = mc.prob_random # TODO: Change this ASAP # Add epsilon to Tensorboard tf.summary.scalar('epsilon', tensor=prob_rand) summary_op = tf.summary.merge_all() replay_memory = deque() if mc.load_trained_model: saved_models = os.listdir(mc.logdir) latest_saved_model = sorted(saved_models)[-1] saver.restore( sess, tf.train.latest_checkpoint(mc.logdir + latest_saved_model + "/saved_models/")) with open( mc.logdir + latest_saved_model + "/saved_models/checkpoint", 'r') as checkout_file: line_1 = checkout_file.readline() step = int(line_1[30:-2]) tensorboard_dir = mc.logdir + latest_saved_model + "/Tensorboard/" saved_model_dir = mc.logdir + latest_saved_model + "/saved_models/" log_dir = mc.logdir + latest_saved_model + "/logs/" replay_memory = collect_rand_observations( replay_memory, sess, agent) else: replay_memory = collect_rand_observations( replay_memory) # Get the initial 50k random observations if not mc.load_trained_model: tensorboard_dir, saved_model_dir, log_dir = make_directories( mc.logdir) print("Tensorboard files stores in: {}".format(tensorboard_dir)) print("Saved models stored in: {}".format(saved_model_dir)) print("Log files stores in: {}".format(log_dir)) # File writer for tensorboard writer = tf.summary.FileWriter(logdir=tensorboard_dir, graph=sess.graph) game_rewards = [] # Save current mission control file with open("mission_control_breakout.py", "r") as mc_file: mission_control_file = mc_file.read() with open(log_dir + "/mission_control.txt", "w") as mc_writer: mc_writer.write(mission_control_file) for e in range(mc.n_episodes): with open(log_dir + "/log.txt", "a") as log_file: log_file.write( "--------------------------Episode: {}/{}------------------------------\n" .format(e + 1, mc.n_episodes)) print( "--------------------------Episode: {}/{}------------------------------\n" .format(e + 1, mc.n_episodes)) # Prepare first observation observation = env.reset() observation = ops.convert_to_gray_n_resize(observation) observation = np.expand_dims(observation, axis=2) state = np.repeat(observation, 4, axis=2) state = np.expand_dims(state, axis=0) # TODO: Only for breakout lives_left = 5 log_q_values = [] episode_rewards = [] for t in itertools.count(): mini_batch = random.sample(replay_memory, mc.batch_size) agent_input = [] agent_target = [] for s in range(len(mini_batch)): state_ = mini_batch[s][0] action_ = mini_batch[s][1] reward_ = mini_batch[s][2] next_state_ = mini_batch[s][3] done_ = mini_batch[s][4] life_lost = mini_batch[s][5] agent_input.append(state_[0]) target = sess.run(target_agent, feed_dict={X_input: state_}) if done_ or life_lost == 1: target[0, action_] = reward_ agent_target.append(target[0]) else: agent_output = sess.run( target_agent, feed_dict={X_input: next_state_}) target[0, action_] = reward_ + mc.gamma * ( np.amax(agent_output)) agent_target.append(target[0]) # Training the agent for 1 iterations. Finally!! for i in range(mc.fit_epochs): sess.run(optimizer, feed_dict={ X_input: agent_input, Y_target: agent_target }) # Copy trained parameters from the agent to the target network if (step + 1) % mc.target_network_update == 0: copy_parameters(sess) l, summary = sess.run([loss, summary_op], feed_dict={ X_input: agent_input, Y_target: agent_target }) writer.add_summary(summary, global_step=step) print("\rStep: {} ({}), Episode: {}/{}, Loss: {}".format( t, step, e + 1, mc.n_episodes, l), end="") sys.stdout.flush() # Collect the next observation if np.random.rand() < prob_rand: action = env.action_space.sample() else: q_prediction = sess.run(agent, feed_dict={X_input: state}) action = np.argmax(q_prediction) log_q_values.extend(q_prediction) next_state, reward, done, info = env.step(action) next_state = ops.convert_to_gray_n_resize(next_state) next_state = np.expand_dims(next_state, axis=2) next_state = np.expand_dims(next_state, axis=0) next_states = np.append(next_state, state[:, :, :, :3], axis=3) life_lost = 0 if lives_left - info['ale.lives'] > 0: life_lost = 1 lives_left -= 1 # Remove old samples from replay memory if it's full if len(replay_memory) > mc.observation_time: replay_memory.popleft() replay_memory.append( (state, action, reward, next_states, done, life_lost)) state = next_states episode_rewards.append(reward) step += 1 if (step + 1) % 10000 == 0: # Save the agent saved_path = saver.save(sess, saved_model_dir + '/model', global_step=step) prob_rand = anneal_epsilon(step) if mc.show_ui: env.render() if done: break with open(log_dir + "/log.txt", "a") as log_file: log_file.write( "Step: {} ({}), Play: {}/{}, Loss: {}\n".format( t, step, e + 1, mc.n_episodes, l)) log_file.write("Reward Obtained: {}\n".format( np.sum(episode_rewards))) game_rewards.append(np.sum(episode_rewards)) x_val = np.arange(e + 1) plt.plot(x_val, game_rewards) plt.xlabel("Episode") plt.ylabel("Reward Obtained") plt.savefig("{}/Rewards.png".format(log_dir)) plt.close() if log_q_values != []: log_file.write("Average Q Value: {}\n".format( np.mean(log_q_values))) else: log_file.write("All of the actions were random\n") print("\nReward Obtained: {}".format(np.sum(episode_rewards))) if log_q_values != []: print("Average Q Value: {}".format(np.mean(log_q_values))) else: print("All of the actions were random") print("Time taken of {} Plays on your potato: {:.4f}s".format( mc.n_episodes, time.time() - t1)) print("Average time for each Play: {:.4f}s".format( (time.time() - t1) / mc.n_episodes)) print("Tensorboard files saved in: {}".format(tensorboard_dir)) print("Model saved in: {}".format(saved_path)) print( "Model parameters stored in: {}".format(log_dir + "mission_control.txt")) print("Agent get to roll!") with open(log_dir + "/log.txt", "a") as log_file: log_file.write( "Time taken of {} episodes on your potato: {:.4f}s\n". format(mc.n_episodes, time.time() - t1)) log_file.write( "Average time for each episode: {:.4f}s\n".format( (time.time() - t1) / mc.n_episodes)) else: # Get the latest trained model saved_models = os.listdir(mc.logdir) latest_saved_model = sorted(saved_models)[-1] saver.restore( sess, tf.train.latest_checkpoint(mc.logdir + latest_saved_model + "/saved_models/")) print("Getting model from: {}".format(mc.logdir + latest_saved_model + "/saved_models/")) print( "------------------------Playing----------------------------") play(sess=sess, agent=agent, no_plays=mc.n_episodes, log_dir=None, show_ui=mc.show_ui, show_action=mc.show_action)
def multihead_attention(queries, memories, bias, num_heads, key_size, value_size, output_size, keep_prob=None, output=True, dtype=None, scope=None): """ Multi-head scaled-dot-product attention with input/output transformations. :param queries: A tensor with shape [batch, length_q, depth_q] if :param memories: A tensor with shape [batch, length_m, depth_m] :param bias: A tensor (see attention_bias) :param num_heads: An integer dividing key_size and value_size :param key_size: An integer :param value_size: An integer :param output_size: An integer :param keep_prob: A floating point number in (0, 1] :param output: Whether to use output transformation :param dtype: An optional instance of tf.DType :param scope: An optional string :returns: A dict with the following keys: weights: A tensor with shape [batch, length_q] outputs: A tensor with shape [batch, length_q, depth_v] """ if key_size % num_heads != 0: raise ValueError("Key size (%d) must be divisible by the number of " "attention heads (%d)." % (key_size, num_heads)) if value_size % num_heads != 0: raise ValueError("Value size (%d) must be divisible by the number of " "attention heads (%d)." % (value_size, num_heads)) with tf.variable_scope(scope, default_name="multihead_attention", values=[queries, memories], dtype=dtype): if memories is None: # self attention size = key_size * 2 + value_size combined = linear(queries, size, True, True, scope="qkv_transform") q, k, v = tf.split(combined, [key_size, key_size, value_size], axis=-1) else: q = linear(queries, key_size, True, True, scope="q_transform") combined = linear(memories, key_size + value_size, True, scope="kv_transform") k, v = tf.split(combined, [key_size, value_size], axis=-1) # split heads q = split_heads(q, num_heads) k = split_heads(k, num_heads) v = split_heads(v, num_heads) # scale query key_depth_per_head = key_size // num_heads q *= key_depth_per_head**-0.5 # attention results = multiplicative_attention(q, k, v, bias, keep_prob) # combine heads weights = results["weights"] x = combine_heads(results["outputs"]) if output: outputs = linear(x, output_size, True, True, scope="output_transform") else: outputs = x return {"weights": weights, "outputs": outputs}
def _dual_pointer_decoder(self, decoder_input, decoder_init_state, decoder_hidden, pointing_memory): ''' 듀얼 포인터 네트워크 디코더 및 train operate layer :param decoder_input: 디코더 입력 :param decoder_init_state: 디코더 초기 상태 값, 인코더 최종 state 사용 :param decoder_hidden: 디코더 은닉층 사이즈 :param pointing_memory: 디코더에서 포인팅 할 타겟 :return: ''' with tf.variable_scope("decoder_v3"): init_state = decoder_init_state with tf.variable_scope("object_cell_define"): object_decoder_cell = tf.contrib.rnn.LayerNormBasicLSTMCell( decoder_hidden, dropout_keep_prob=self.keep_pob) object_cell_pre_state = init_state with tf.variable_scope("subject_cell_define"): subject_decoder_cell = tf.contrib.rnn.LayerNormBasicLSTMCell( decoder_hidden, dropout_keep_prob=self.keep_pob) subject_cell_pre_state = init_state with tf.variable_scope("decoder_input_layer"): decoder_input_per_step = tf.unstack(decoder_input, axis=1) with tf.variable_scope("decoding_triple", reuse=tf.AUTO_REUSE): # 듀얼 포인팅 부분 object_logits = [] relation_logits = [] subject_logits = [] rev_relation_logits = [] for i in range(self.max_entities): input = decoder_input_per_step[i] object_deocder_output, object_state = object_decoder_cell( input, object_cell_pre_state) subject_decoder_output, subject_state = subject_decoder_cell( input, subject_cell_pre_state) object_deocder_output = tf.expand_dims( object_deocder_output, axis=1) subject_decoder_output = tf.expand_dims( subject_decoder_output, axis=1) # 포인팅은 multi-head attention 기반으로 수행 relation_output, object_pointing = self._multi_head_attention( key=pointing_memory, query=object_deocder_output, value=pointing_memory, attention_name="object_pointing") rev_output, subject_pointing = self._multi_head_attention( key=pointing_memory, query=subject_decoder_output, value=pointing_memory, attention_name="subject_pointing") object_pointing = tf.squeeze(object_pointing, axis=1) subject_pointing = tf.squeeze(subject_pointing, axis=1) relation_output = tf.squeeze(relation_output, axis=1) rev_output = tf.squeeze(rev_output, axis=1) relation_logit = tf.layers.dense( relation_output, units=self.relation_vocab_size, activation=tf.nn.leaky_relu, name="relation_label") rev_relation_logit = tf.layers.dense( rev_output, units=self.relation_vocab_size, activation=tf.nn.leaky_relu, name="rev_relation_label") object_logits.append(object_pointing) relation_logits.append(relation_logit) subject_logits.append(subject_pointing) rev_relation_logits.append(rev_relation_logit) object_cell_pre_state = object_state subject_cell_pre_state = subject_state object_logits = tf.stack(object_logits, axis=1) relation_logits = tf.stack(relation_logits, axis=1) subject_logits = tf.stack(subject_logits, axis=1) rev_relation_logits = tf.stack(rev_relation_logits, axis=1) self.object_predicts = tf.argmax(object_logits, axis=-1) self.relation_predicts = tf.argmax(relation_logits, axis=-1) self.subject_predicts = tf.argmax(subject_logits, axis=-1) self.rev_relation_predicts = tf.argmax(rev_relation_logits, axis=-1) with tf.variable_scope("training_layer"): # train operate 부분 self.object_loss = tf.losses.sparse_softmax_cross_entropy( logits=object_logits, labels=self.object_target, weights=self.relation_weight) self.re_loss = tf.losses.sparse_softmax_cross_entropy( logits=relation_logits, labels=self.relation_target, weights=self.relation_weight) self.subject_loss = tf.losses.sparse_softmax_cross_entropy( logits=subject_logits, labels=self.subject_target, weights=self.rev_relation_weight) self.rev_re_loss = tf.losses.sparse_softmax_cross_entropy( logits=rev_relation_logits, labels=self.rev_relation_target, weights=self.rev_relation_weight) self.object_loss = tf.reduce_mean(self.object_loss) self.re_loss = tf.reduce_mean(self.re_loss) self.subject_loss = tf.reduce_mean(self.subject_loss) self.rev_re_loss = tf.reduce_mean(self.rev_re_loss) self.loss = (0.4 * self.object_loss) + (0.4 * self.subject_loss) + ( 0.1 * self.re_loss) + (0.1 * self.rev_re_loss) # Adam optimizer 및 EMA 사용, 학습 parameter tuning _optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate) self._gradients = _optimizer.compute_gradients(self.loss) # for g in self._gradients: # print(g) _apply_op = _optimizer.apply_gradients( self._gradients, global_step=self.global_step) _ema = tf.train.ExponentialMovingAverage(decay=0.9999) with tf.control_dependencies([_apply_op]): _ema_op = _ema.apply( tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)) self.train_op = tf.group(_ema_op) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)
def _transformer_layer(self, inputs, decoder_inputs, drop_rate, is_training, scope='Transformer_body', reuse=tf.AUTO_REUSE): with tf.variable_scope(name_or_scope=scope, reuse=reuse): with tf.name_scope('ENCODER'): # Input Embedding + Positional Encoding input_embedding = embedding(ids=inputs, vocab_size=len( self.input_int2vocab), embed_dim=self._num_units, zeropad=True, pos=True, scope='enc_embedding', reuse=False) input_embedding = tf.layers.dropout(inputs=input_embedding, rate=drop_rate, training=is_training) # Encoder Blocks for i in range(1, self._num_blocks + 1): input_embedding = encoding_sublayer( input_embedding=input_embedding, num_units=self._num_units, num_heads=self._num_heads, drop_rate=drop_rate, is_training=is_training, scope='enc_block_{}'.format(i), reuse=False) with tf.name_scope('DECODER'): output_embedding = embedding(ids=decoder_inputs, vocab_size=len( self.target_int2vocab), embed_dim=self._num_units, zeropad=True, pos=True, scope='dec_embedding', reuse=False) output_embedding = tf.layers.dropout(inputs=output_embedding, rate=drop_rate, training=is_training) # Decoding Blocks for i in range(1, self._num_blocks + 1): output_embedding = decoding_sublayer( output_embedding=output_embedding, input_embedding=input_embedding, num_units=self._num_units, num_heads=self._num_heads, drop_rate=drop_rate, is_training=is_training, scope='dec_block_{}'.format(i), reuse=False) # Final linear projection with tf.name_scope('FINAL_DENSE'): logits = tf.layers.dense(inputs=output_embedding, units=len(self.target_int2vocab)) return logits
image.set_shape([None, None, None, 3]) for y in y_true: y.set_shape([None, None, None, None, None]) ################## # Model definition ################## # yolo_model = yolov3(args.class_num, args.anchors, args.use_label_smooth, args.use_focal_loss, args.batch_norm_decay, args.weight_decay) # with tf.variable_scope('yolov3'): # pred_feature_maps = yolo_model.forward(image, is_training=is_training) # loss = yolo_model.compute_loss(pred_feature_maps, y_true) # y_pred = yolo_model.predict(pred_feature_maps) yolo_model = sliming_yolov3(args.class_num, args.anchors, args.use_label_smooth, args.use_focal_loss, args.batch_norm_decay, args.weight_decay) ############################## first prune ################################################################################# with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward_include_res_with_prune_factor(image, prune_factor=0.8, is_training=is_training) ############################################################################################################################ # ############################## second prune ################################################################################# # with tf.variable_scope('yolov3'): # pred_feature_maps = yolo_model.forward_include_res_with_prune_factor(image, prune_factor=0.8, is_training=is_training, prune_cnt=2) # ############################################################################################################################ # ############################## third prune ################################################################################# # with tf.variable_scope('yolov3'): # pred_feature_maps = yolo_model.forward_include_res_with_prune_factor(image, prune_factor=0.8, is_training=is_training, prune_cnt=3) # ############################################################################################################################ ############################## fourth prune ################################################################################# # with tf.variable_scope('yolov3'):
def osvos(inputs, scope='osvos'): """Defines the OSVOS network Args: inputs: Tensorflow placeholder that contains the input image scope: Scope name for the network Returns: net: Output Tensor of the network end_points: Dictionary with all Tensors of the network """ im_size = tf.shape(inputs) with tf.variable_scope(scope, 'osvos', [inputs]) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs of all intermediate layers. with slim.arg_scope([slim.conv2d, slim.max_pool2d], padding='SAME', outputs_collections=end_points_collection): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net_2 = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net_2, [2, 2], scope='pool2') net_3 = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net_3, [2, 2], scope='pool3') net_4 = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net_4, [2, 2], scope='pool4') net_5 = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') # Get side outputs of the network with slim.arg_scope([slim.conv2d], activation_fn=None): side_2 = slim.conv2d(net_2, 16, [3, 3], scope='conv2_2_16') side_3 = slim.conv2d(net_3, 16, [3, 3], scope='conv3_3_16') side_4 = slim.conv2d(net_4, 16, [3, 3], scope='conv4_3_16') side_5 = slim.conv2d(net_5, 16, [3, 3], scope='conv5_3_16') # Supervise side outputs side_2_s = slim.conv2d(side_2, 1, [1, 1], scope='score-dsn_2') side_3_s = slim.conv2d(side_3, 1, [1, 1], scope='score-dsn_3') side_4_s = slim.conv2d(side_4, 1, [1, 1], scope='score-dsn_4') side_5_s = slim.conv2d(side_5, 1, [1, 1], scope='score-dsn_5') with slim.arg_scope([slim.convolution2d_transpose], activation_fn=None, biases_initializer=None, padding='VALID', outputs_collections=end_points_collection, trainable=False): # Side outputs side_2_s = slim.convolution2d_transpose(side_2_s, 1, 4, 2, scope='score-dsn_2-up') side_2_s = crop_features(side_2_s, im_size) utils.collect_named_outputs(end_points_collection, 'osvos/score-dsn_2-cr', side_2_s) side_3_s = slim.convolution2d_transpose(side_3_s, 1, 8, 4, scope='score-dsn_3-up') side_3_s = crop_features(side_3_s, im_size) utils.collect_named_outputs(end_points_collection, 'osvos/score-dsn_3-cr', side_3_s) side_4_s = slim.convolution2d_transpose(side_4_s, 1, 16, 8, scope='score-dsn_4-up') side_4_s = crop_features(side_4_s, im_size) utils.collect_named_outputs(end_points_collection, 'osvos/score-dsn_4-cr', side_4_s) side_5_s = slim.convolution2d_transpose(side_5_s, 1, 32, 16, scope='score-dsn_5-up') side_5_s = crop_features(side_5_s, im_size) utils.collect_named_outputs(end_points_collection, 'osvos/score-dsn_5-cr', side_5_s) # Main output side_2_f = slim.convolution2d_transpose(side_2, 16, 4, 2, scope='score-multi2-up') side_2_f = crop_features(side_2_f, im_size) utils.collect_named_outputs(end_points_collection, 'osvos/side-multi2-cr', side_2_f) side_3_f = slim.convolution2d_transpose(side_3, 16, 8, 4, scope='score-multi3-up') side_3_f = crop_features(side_3_f, im_size) utils.collect_named_outputs(end_points_collection, 'osvos/side-multi3-cr', side_3_f) side_4_f = slim.convolution2d_transpose(side_4, 16, 16, 8, scope='score-multi4-up') side_4_f = crop_features(side_4_f, im_size) utils.collect_named_outputs(end_points_collection, 'osvos/side-multi4-cr', side_4_f) side_5_f = slim.convolution2d_transpose(side_5, 16, 32, 16, scope='score-multi5-up') side_5_f = crop_features(side_5_f, im_size) utils.collect_named_outputs(end_points_collection, 'osvos/side-multi5-cr', side_5_f) concat_side = tf.concat([side_2_f, side_3_f, side_4_f, side_5_f], axis=3) net = slim.conv2d(concat_side, 1, [1, 1], scope='upscore-fuse') end_points = slim.utils.convert_collection_to_dict(end_points_collection) return net, end_points
def _multi_head_attention(self, key, query, value, attention_name, num_heads=8, head_size=32, intermediate_size=512, return_type="concat"): ''' multi-head attention :param key: key :param query: query :param value: value, self attention 일 시 key, query, value 다 같은 값 :param attention_name: scope name :param num_heads: head 개수 :param head_size: head size, 분할 후 차원 수 :param intermediate_size: 마지막 FFN layer :param return_type: 어떤 식으로 결과를 낼 것인지 결정 :return: ''' with tf.variable_scope(name_or_scope=attention_name): _query = tf.layers.dense(query, units=num_heads * head_size, activation=tf.nn.leaky_relu, name="query") _key = tf.layers.dense(key, units=num_heads * head_size, activation=tf.nn.leaky_relu, name="key") _value = tf.layers.dense(value, units=num_heads * head_size, activation=tf.nn.leaky_relu, name="value") _query_split = tf.split(_query, num_heads, axis=-1) _key_split = tf.split(_key, num_heads, axis=-1) _value_split = tf.split(_value, num_heads, axis=-1) _query_split = [ tf.layers.dense(q, head_size, activation=tf.nn.leaky_relu) for q in _query_split ] _key_split = [ tf.layers.dense(k, head_size, activation=tf.nn.leaky_relu) for k in _key_split ] _value_split = [ tf.layers.dense(v, head_size, activation=tf.nn.leaky_relu) for v in _value_split ] _query_concat = tf.concat(_query_split, axis=0) _key_concat = tf.concat(_key_split, axis=0) _value_concat = tf.concat(_value_split, axis=0) _matmul_query_key = tf.matmul(_query_concat, _key_concat, transpose_b=True) _scale_align = _matmul_query_key / (head_size**0.5) _softmax_align = tf.nn.softmax(_scale_align, -1) _output = tf.matmul(_softmax_align, _value_concat) # query_step * key_step multi_head_align = tf.add_n( tf.split(_scale_align, num_heads, axis=0)) multi_head_output = tf.concat(tf.split(_output, num_heads, axis=0), axis=2) # multi_head_output = tf.layers.dense(multi_head_output, intermediate_size, activation=tf.nn.leaky_relu, # name="mh_out") # query = tf.layers.dense(query, intermediate_size, activation=tf.nn.leaky_relu, name="query_intermediate") if return_type == "concat": residual_output = tf.concat([multi_head_output, query], axis=-1) elif return_type == "dense": residual_output = tf.concat([multi_head_output, query], axis=-1) residual_output = tf.layers.dense(residual_output, intermediate_size, activation=tf.nn.leaky_relu, name="mh_output") elif return_type == "residual": residual_output = multi_head_output + query else: residual_output = multi_head_output return residual_output, multi_head_align
def xception(inputs, blocks, num_classes=None, is_training=True, global_pool=True, keep_prob=0.5, output_stride=None, reuse=None, scope=None): """Generator for Xception models. This function generates a family of Xception models. See the xception_*() methods for specific model instantiations, obtained by selecting different block instantiations that produce Xception of various depths. Args: inputs: A tensor of size [batch, height_in, width_in, channels]. Must be floating point. If a pretrained checkpoint is used, pixel values should be the same as during training (see go/slim-classification-models for specifics). blocks: A list of length equal to the number of Xception blocks. Each element is an Xception Block object describing the units in the block. num_classes: Number of predicted classes for classification tasks. If 0 or None, we return the features before the logit layer. is_training: whether batch_norm layers are in training mode. global_pool: If True, we perform global average pooling before computing the logits. Set to True for image classification, False for dense prediction. keep_prob: Keep probability used in the pre-logits dropout layer. output_stride: If None, then the output will be computed at the nominal network stride. If output_stride is not None, it specifies the requested ratio of input to output spatial resolution. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. If global_pool is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in, else both height_out and width_out equal one. If num_classes is 0 or None, then net is the output of the last Xception block, potentially after global average pooling. If num_classes is a non-zero integer, net contains the pre-softmax activations. end_points: A dictionary from components of the network to the corresponding activation. Raises: ValueError: If the target output_stride is not valid. """ with tf.variable_scope( scope, 'xception', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + 'end_points' with slim.arg_scope([slim.conv2d, slim.separable_conv2d, xception_module, stack_blocks_dense], outputs_collections=end_points_collection): with slim.arg_scope([slim.batch_norm], is_training=is_training): net = inputs if output_stride is not None: if output_stride % 2 != 0: raise ValueError('The output_stride needs to be a multiple of 2.') output_stride /= 2 # Root block function operated on inputs. net = resnet_utils.conv2d_same(net, 32, 3, stride=2, scope='entry_flow/conv1_1') net = resnet_utils.conv2d_same(net, 64, 3, stride=1, scope='entry_flow/conv1_2') # Extract features for entry_flow, middle_flow, and exit_flow. net = stack_blocks_dense(net, blocks, output_stride) # Convert end_points_collection into a dictionary of end_points. end_points = slim.utils.convert_collection_to_dict( end_points_collection, clear_collection=True) if global_pool: # Global average pooling. net = tf.reduce_mean(net, [1, 2], name='global_pool', keepdims=True) end_points['global_pool'] = net if num_classes: net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training, scope='prelogits_dropout') net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') end_points[sc.name + '/logits'] = net end_points['predictions'] = slim.softmax(net, scope='predictions') return net, end_points
def bottleneck_unit(x, out_chan1, out_chan2, down_stride=False, up_stride=False, name=None): """ Modified implementation from github ry?! """ def conv_transpose(tensor, out_channel, shape, strides, name=None): out_shape = tensor.get_shape().as_list() in_channel = out_shape[-1] kernel = weight_variable([shape, shape, out_channel, in_channel], name=name) shape[-1] = out_channel return tf.nn.conv2d_transpose(x, kernel, output_shape=out_shape, strides=[1, strides, strides, 1], padding='SAME', name='conv_transpose') def conv(tensor, out_chans, shape, strides, name=None): in_channel = tensor.get_shape().as_list()[-1] kernel = weight_variable([shape, shape, in_channel, out_chans], name=name) return tf.nn.conv2d(x, kernel, strides=[1, strides, strides, 1], padding='SAME', name='conv') def bn(tensor, name=None): """ :param tensor: 4D tensor input :param name: name of the operation :return: local response normalized tensor - not using batch normalization :( """ return tf.nn.lrn(tensor, depth_radius=5, bias=2, alpha=1e-4, beta=0.75, name=name) in_chans = x.get_shape().as_list()[3] if down_stride or up_stride: first_stride = 2 else: first_stride = 1 with tf.variable_scope('res%s' % name): if in_chans == out_chan2: b1 = x else: with tf.variable_scope('branch1'): if up_stride: b1 = conv_transpose(x, out_chans=out_chan2, shape=1, strides=first_stride, name='res%s_branch1' % name) else: b1 = conv(x, out_chans=out_chan2, shape=1, strides=first_stride, name='res%s_branch1' % name) b1 = bn(b1, 'bn%s_branch1' % name, 'scale%s_branch1' % name) with tf.variable_scope('branch2a'): if up_stride: b2 = conv_transpose(x, out_chans=out_chan1, shape=1, strides=first_stride, name='res%s_branch2a' % name) else: b2 = conv(x, out_chans=out_chan1, shape=1, strides=first_stride, name='res%s_branch2a' % name) b2 = bn(b2, 'bn%s_branch2a' % name, 'scale%s_branch2a' % name) b2 = tf.nn.relu(b2, name='relu') with tf.variable_scope('branch2b'): b2 = conv(b2, out_chans=out_chan1, shape=3, strides=1, name='res%s_branch2b' % name) b2 = bn(b2, 'bn%s_branch2b' % name, 'scale%s_branch2b' % name) b2 = tf.nn.relu(b2, name='relu') with tf.variable_scope('branch2c'): b2 = conv(b2, out_chans=out_chan2, shape=1, strides=1, name='res%s_branch2c' % name) b2 = bn(b2, 'bn%s_branch2c' % name, 'scale%s_branch2c' % name) x = b1 + b2 return tf.nn.relu(x, name='relu')
def xception_module(inputs, depth_list, skip_connection_type, stride, unit_rate_list=None, rate=1, activation_fn_in_separable_conv=False, regularize_depthwise=False, outputs_collections=None, scope=None): """An Xception module. The output of one Xception module is equal to the sum of `residual` and `shortcut`, where `residual` is the feature computed by three separable convolution. The `shortcut` is the feature computed by 1x1 convolution with or without striding. In some cases, the `shortcut` path could be a simple identity function or none (i.e, no shortcut). Note that we replace the max pooling operations in the Xception module with another separable convolution with striding, since atrous rate is not properly supported in current TensorFlow max pooling implementation. Args: inputs: A tensor of size [batch, height, width, channels]. depth_list: A list of three integers specifying the depth values of one Xception module. skip_connection_type: Skip connection type for the residual path. Only supports 'conv', 'sum', or 'none'. stride: The block unit's stride. Determines the amount of downsampling of the units output compared to its input. unit_rate_list: A list of three integers, determining the unit rate for each separable convolution in the xception module. rate: An integer, rate for atrous convolution. activation_fn_in_separable_conv: Includes activation function in the separable convolution or not. regularize_depthwise: Whether or not apply L2-norm regularization on the depthwise convolution weights. outputs_collections: Collection to add the Xception unit output. scope: Optional variable_scope. Returns: The Xception module's output. Raises: ValueError: If depth_list and unit_rate_list do not contain three elements, or if stride != 1 for the third separable convolution operation in the residual path, or unsupported skip connection type. """ if len(depth_list) != 3: raise ValueError('Expect three elements in depth_list.') if unit_rate_list: if len(unit_rate_list) != 3: raise ValueError('Expect three elements in unit_rate_list.') with tf.variable_scope(scope, 'xception_module', [inputs]) as sc: residual = inputs def _separable_conv(features, depth, kernel_size, depth_multiplier, regularize_depthwise, rate, stride, scope): if activation_fn_in_separable_conv: activation_fn = tf.nn.relu else: activation_fn = None features = tf.nn.relu(features) return separable_conv2d_same(features, depth, kernel_size, depth_multiplier=depth_multiplier, stride=stride, rate=rate, activation_fn=activation_fn, regularize_depthwise=regularize_depthwise, scope=scope) for i in range(3): residual = _separable_conv(residual, depth_list[i], kernel_size=3, depth_multiplier=1, regularize_depthwise=regularize_depthwise, rate=rate*unit_rate_list[i], stride=stride if i == 2 else 1, scope='separable_conv' + str(i+1)) if skip_connection_type == 'conv': shortcut = slim.conv2d(inputs, depth_list[-1], [1, 1], stride=stride, activation_fn=None, scope='shortcut') print("xception_module[residual]",residual) print("xception_module[shortcut]",shortcut) outputs = residual + shortcut elif skip_connection_type == 'sum': outputs = residual + inputs elif skip_connection_type == 'none': outputs = residual else: raise ValueError('Unsupported skip connection type.') return slim.utils.collect_named_outputs(outputs_collections, sc.name, outputs)
def create_model( bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings, ): """Creates a classification model.""" model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings, ) # In the demo, we are doing a simple classification task on the entire # segment. # # If you want to use the token-level output, use model.get_sequence_output() # instead. output_layer = model.get_pooled_output() hidden_size = output_layer.shape[-1].value output_weights = tf.get_variable( "output_weights", [num_labels, hidden_size], initializer=tf.truncated_normal_initializer(stddev=0.02), ) output_bias = tf.get_variable( "output_bias", [num_labels], initializer=tf.zeros_initializer() ) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout output_layer = tf.nn.dropout(output_layer, keep_prob=0.9) logits = tf.matmul(output_layer, output_weights, transpose_b=True) logits = tf.nn.bias_add(logits, output_bias) # probabilities = tf.nn.softmax(logits, axis=-1) ### multiclass case probabilities = tf.nn.sigmoid(logits) # multi-label case labels = tf.cast(labels, tf.float32) tf.logging.info( "num_labels:{};logits:{};labels:{}".format(num_labels, logits, labels) ) per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=labels, logits=logits ) loss = tf.reduce_mean(per_example_loss) # probabilities = tf.nn.softmax(logits, axis=-1) # log_probs = tf.nn.log_softmax(logits, axis=-1) # # one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32) # # per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) # loss = tf.reduce_mean(per_example_loss) return (loss, per_example_loss, logits, probabilities)
def build_decoder(self): with tf.variable_scope("decode"): for layer in range(self.num_layers): with tf.variable_scope('decoder_{}'.format(layer + 1)): dec_cell = tf.contrib.rnn.LayerNormBasicLSTMCell( 2 * self.lstm_hidden_units) dec_cell = tf.contrib.rnn.DropoutWrapper( dec_cell, input_keep_prob=self.keep_prob) self.output_layer = Dense(self.decoder_vocab_size) attn_mech = attention_wrapper.LuongAttention( 2 * self.lstm_hidden_units, self.enc_outputs, memory_sequence_length=self.source_sentence_length) attn_cell = attention_wrapper.AttentionWrapper( dec_cell, attn_mech, self.attention_temperature, self.use_hmean, self.lstm_hidden_units) self.init_state = attn_cell.zero_state(self.batch_size, tf.float32) with tf.name_scope("training_decoder"): training_helper = tf.contrib.seq2seq.TrainingHelper( inputs=self.dec_embed_input, sequence_length=self.target_sentence_length, time_major=False) training_decoder = basic_decoder.BasicDecoder( attn_cell, training_helper, initial_state=self.init_state, latent_vector=self.z_vector, output_layer=self.output_layer) self.training_logits, _state, _len, self.c_kl_batch_train = decoder.dynamic_decode( training_decoder, output_time_major=False, impute_finished=True, maximum_iterations=self.decoder_num_tokens) self.training_logits = tf.identity( self.training_logits.rnn_output, 'logits') with tf.name_scope("inference_decoder"): start_token = self.decoder_word_index['GO'] end_token = self.decoder_word_index['EOS'] start_tokens = tf.tile(tf.constant([start_token], dtype=tf.int32), [self.batch_size], name='start_tokens') inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper( self.decoder_embeddings, start_tokens, end_token) inference_decoder = basic_decoder.BasicDecoder( attn_cell, inference_helper, initial_state=self.init_state, latent_vector=self.z_vector, output_layer=self.output_layer) self.inference_logits, _state, _len, self.c_kl_batch_inf = decoder.dynamic_decode( inference_decoder, output_time_major=False, impute_finished=True, maximum_iterations=self.decoder_num_tokens) self.inference_logits = tf.identity( self.inference_logits.sample_id, name='predictions') self.c_kl_batch_train = tf.div( self.c_kl_batch_train, tf.cast(self.target_sentence_length, dtype=tf.float32 )) # Divide by respective target seq lengths
def stack_blocks_dense(net, blocks, output_stride=None, outputs_collections=None): """Stacks Xception blocks and controls output feature density. First, this function creates scopes for the Xception in the form of 'block_name/unit_1', 'block_name/unit_2', etc. Second, this function allows the user to explicitly control the output stride, which is the ratio of the input to output spatial resolution. This is useful for dense prediction tasks such as semantic segmentation or object detection. Control of the output feature density is implemented by atrous convolution. Args: net: A tensor of size [batch, height, width, channels]. blocks: A list of length equal to the number of Xception blocks. Each element is an Xception Block object describing the units in the block. output_stride: If None, then the output will be computed at the nominal network stride. If output_stride is not None, it specifies the requested ratio of input to output spatial resolution, which needs to be equal to the product of unit strides from the start up to some level of Xception. For example, if the Xception employs units with strides 1, 2, 1, 3, 4, 1, then valid values for the output_stride are 1, 2, 6, 24 or None (which is equivalent to output_stride=24). outputs_collections: Collection to add the Xception block outputs. Returns: net: Output tensor with stride equal to the specified output_stride. Raises: ValueError: If the target output_stride is not valid. """ # The current_stride variable keeps track of the effective stride of the # activations. This allows us to invoke atrous convolution whenever applying # the next residual unit would result in the activations having stride larger # than the target output_stride. current_stride = 1 # The atrous convolution rate parameter. rate = 1 for block in blocks: with tf.variable_scope(block.scope, 'block', [net]) as sc: for i, unit in enumerate(block.args): if output_stride is not None and current_stride > output_stride: raise ValueError('The target output_stride cannot be reached.') with tf.variable_scope('unit_%d' % (i + 1), values=[net]): # If we have reached the target output_stride, then we need to employ # atrous convolution with stride=1 and multiply the atrous rate by the # current unit's stride for use in subsequent layers. print("[stack_blocks_dense]:output_stride:%d,current_stride:%d" % (output_stride, current_stride)) if output_stride is not None and current_stride == output_stride: net = block.unit_fn(net, rate=rate, **dict(unit, stride=1)) rate *= unit.get('stride', 1) else: net = block.unit_fn(net, rate=1, **unit) current_stride *= unit.get('stride', 1) # Collect activations at the block's end before performing subsampling. net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) if output_stride is not None and current_stride != output_stride: raise ValueError('The target output_stride cannot be reached.') return net
def distribute(images, labels, num_classes, total_num_examples, devices, is_train=True): # Put your code here # You can refer to the "original" function above, it is for the single-node version. if devices is None: devices = [None] # copied from original function def configure_optimizer(global_step, total_num_steps): """Return a configured optimizer""" def exp_decay(start, tgtFactor, num_stairs): decay_step = total_num_steps / (num_stairs - 1) decay_rate = (1 / tgtFactor)**(1 / (num_stairs - 1)) return tf.train.exponential_decay(start, global_step, decay_step, decay_rate, staircase=True) def lparam(learning_rate, momentum): return {'learning_rate': learning_rate, 'momentum': momentum} return HybridMomentumOptimizer({ 'weights': lparam(exp_decay(0.001, 250, 4), 0.9), 'biases': lparam(exp_decay(0.002, 10, 2), 0.9), }) #copied from orignal function def train(total_loss, global_step, total_num_steps): """Build train operations""" # Compute gradients with tf.control_dependencies([total_loss]): opt = configure_optimizer(global_step, total_num_steps) grads = opt.compute_gradients(total_loss) # Apply gradients. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) with tf.control_dependencies([apply_gradient_op]): return tf.no_op(name='train') # 1. Create global steps on the parameter server node. You can use the same method that the single-machine program uses. builder = ModelBuilder(devices[-1]) global_step = builder.ensure_global_step() # 2. Configure your optimizer using HybridMomentumOptimizer. opt = configure_optimizer(global_step, total_num_examples) # 3. Construct graph replica by splitting the original tensors into sub tensors. (hint: take a look at tf.split ) inputImagesSplit = tf.split(images, len(devices[:-1])) inputLabelsSplit = tf.split(labels, len(devices[:-1])) # keep a list to store gradients workerGradients = [] with tf.variable_scope('AlexNet') as varScope: # 4. For each worker node, create replica by calling alexnet_inference and computing gradients. with tf.name_scope('') as nameScope: for i in range(len(devices[:-1])): with tf.device(devices[i]): worker = devices[i] with tf.name_scope("Workers_{}".format(i)): net, logits, total_loss = alexnet_inference( builder, inputImagesSplit[i], inputLabelsSplit[i], num_classes) gradient = opt.compute_gradients(total_loss) workerGradients.append(gradient) #Reuse the variable for the next replica. For more information on how to reuse variables in TensorFlow, varScope.reuse_variables() # 5. On the parameter server node, apply gradients. with tf.device(builder.variable_device()): combinedGrad = builder.average_gradients(workerGradients) trainOp = opt.apply_gradients(combinedGrad, global_step=global_step) allTrainOp = tf.group(trainOp, name='AllTrainOps') # 6. return required values. return net, logits, total_loss, allTrainOp, global_step
import os import collections import numpy as np import tensorflow as tf from tensorflow.contrib.rnn.python.ops.rnn_cell import Conv2DLSTMCell # Defined for a single timestep batch_size = None max_time = None input_shape = [128, 128, 25] with tf.variable_scope('rnn', reuse=tf.AUTO_REUSE) as vs: # Define a cell # TODO: add support for padding='valid' cell = Conv2DLSTMCell(input_shape=input_shape, output_channels=2, kernel_shape=[7, 7], use_bias=True, name='conv_2d_lstm_cell_1') # Define input placeholder x_input = tf.placeholder(dtype=tf.float32, shape=[ batch_size, max_time, ] + input_shape, name='rnn_input') # Define rnn layer
def __init__(self, pretrained_embeddings, flags): """ Initializes your System :param args: pass in more arguments as needed """ self.pretrained_embeddings = pretrained_embeddings self.flags = flags self.h_size = self.flags.state_size self.p_size = self.flags.output_size self.q_size = self.flags.question_size self.embed_size = self.flags.embedding_size self.dropout = self.flags.dropout self.encoder = Encoder(hidden_size=self.h_size, dropout=(1.0-self.flags.dropout)) self.decoder = Decoder(hidden_size=self.h_size, output_size=self.p_size, dropout=(1.0-self.flags.dropout)) # ==== set up placeholder tokens ======== self.context_placeholder = tf.placeholder(tf.int32, shape=(None, self.p_size), name='context_placeholder') self.question_placeholder = tf.placeholder(tf.int32, shape=(None, self.q_size), name='question_placeholder') self.answer_span_placeholder = tf.placeholder(tf.int32, shape=(None, 2), name='answer_span_placeholder') self.mask_q_placeholder = tf.placeholder(tf.int32, shape=(None,), name='mask_q_placeholder') self.mask_ctx_placeholder = tf.placeholder(tf.int32, shape=(None,), name='mask_ctx_placeholder') self.dropout_placeholder = tf.placeholder(tf.float32, shape=(), name='dropout_placeholder') # ==== assemble pieces ==== with tf.variable_scope("qa", initializer=tf.uniform_unit_scaling_initializer(1.0)): self.setup_embeddings() self.setup_system() self.setup_loss() # ==== set up training/updating procedure ==== self.global_step = tf.Variable(0, trainable=False) self.starter_learning_rate = self.flags.learning_rate self.learning_rate = self.starter_learning_rate # learning rate decay # self.learning_rate = tf.train.exponential_decay(self.starter_learning_rate, self.global_step, # 1000, 0.96, staircase=True) self.optimizer = get_optimizer("adam") if self.flags.grad_clip: # gradient clipping self.optimizer = self.optimizer(self.learning_rate) grads = self.optimizer.compute_gradients(self.loss) for i, (grad, var) in enumerate(grads): if grad is not None: grads[i] = (tf.clip_by_norm(grad, self.flags.max_gradient_norm), var) self.train_op = self.optimizer.apply_gradients(grads, global_step=self.global_step) else: # no gradient clipping self.train_op = self.optimizer(self.learning_rate).minimize(self.loss, global_step=self.global_step) self.saver=tf.train.Saver()
def build_autoencoder(self, input_tensor, name, reuse=False): """ Generator的autoencoder部分, 负责获取图像上下文信息 :param input_tensor: :param name: :param reuse: :return: """ with tf.variable_scope(name, reuse=reuse): conv_1 = self.conv2d(inputdata=input_tensor, out_channel=64, kernel_size=5, padding='SAME', stride=1, use_bias=False, name='conv_1') relu_1 = self.lrelu(inputdata=conv_1, name='relu_1') conv_2 = self.conv2d(inputdata=relu_1, out_channel=128, kernel_size=3, padding='SAME', stride=2, use_bias=False, name='conv_2') relu_2 = self.lrelu(inputdata=conv_2, name='relu_2') conv_3 = self.conv2d(inputdata=relu_2, out_channel=128, kernel_size=3, padding='SAME', stride=1, use_bias=False, name='conv_3') relu_3 = self.lrelu(inputdata=conv_3, name='relu_3') conv_4 = self.conv2d(inputdata=relu_3, out_channel=128, kernel_size=3, padding='SAME', stride=2, use_bias=False, name='conv_4') relu_4 = self.lrelu(inputdata=conv_4, name='relu_4') conv_5 = self.conv2d(inputdata=relu_4, out_channel=256, kernel_size=3, padding='SAME', stride=1, use_bias=False, name='conv_5') relu_5 = self.lrelu(inputdata=conv_5, name='relu_5') conv_6 = self.conv2d(inputdata=relu_5, out_channel=256, kernel_size=3, padding='SAME', stride=1, use_bias=False, name='conv_6') relu_6 = self.lrelu(inputdata=conv_6, name='relu_6') dia_conv1 = self.dilation_conv(input_tensor=relu_6, k_size=3, out_dims=256, rate=2, padding='SAME', use_bias=False, name='dia_conv_1') relu_7 = self.lrelu(dia_conv1, name='relu_7') dia_conv2 = self.dilation_conv(input_tensor=relu_7, k_size=3, out_dims=256, rate=4, padding='SAME', use_bias=False, name='dia_conv_2') relu_8 = self.lrelu(dia_conv2, name='relu_8') dia_conv3 = self.dilation_conv(input_tensor=relu_8, k_size=3, out_dims=256, rate=8, padding='SAME', use_bias=False, name='dia_conv_3') relu_9 = self.lrelu(dia_conv3, name='relu_9') dia_conv4 = self.dilation_conv(input_tensor=relu_9, k_size=3, out_dims=256, rate=16, padding='SAME', use_bias=False, name='dia_conv_4') relu_10 = self.lrelu(dia_conv4, name='relu_10') conv_7 = self.conv2d(inputdata=relu_10, out_channel=256, kernel_size=3, padding='SAME', stride=1, use_bias=False, name='conv_7') relu_11 = self.lrelu(inputdata=conv_7, name='relu_11') conv_8 = self.conv2d(inputdata=relu_11, out_channel=256, kernel_size=3, padding='SAME', stride=1, use_bias=False, name='conv_8') relu_12 = self.lrelu(inputdata=conv_8, name='relu_12') deconv_1 = self.deconv2d(inputdata=relu_12, out_channel=128, kernel_size=4, stride=2, padding='SAME', use_bias=False, name='deconv_1') avg_pool_1 = self.avgpooling(inputdata=deconv_1, kernel_size=2, stride=1, padding='SAME', name='avg_pool_1') relu_13 = self.lrelu(inputdata=avg_pool_1, name='relu_13') conv_9 = self.conv2d(inputdata=tf.add(relu_13, relu_3), out_channel=128, kernel_size=3, padding='SAME', stride=1, use_bias=False, name='conv_9') relu_14 = self.lrelu(inputdata=conv_9, name='relu_14') deconv_2 = self.deconv2d(inputdata=relu_14, out_channel=64, kernel_size=4, stride=2, padding='SAME', use_bias=False, name='deconv_2') avg_pool_2 = self.avgpooling(inputdata=deconv_2, kernel_size=2, stride=1, padding='SAME', name='avg_pool_2') relu_15 = self.lrelu(inputdata=avg_pool_2, name='relu_15') conv_10 = self.conv2d(inputdata=tf.add(relu_15, relu_1), out_channel=32, kernel_size=3, padding='SAME', stride=1, use_bias=False, name='conv_10') relu_16 = self.lrelu(inputdata=conv_10, name='relu_16') skip_output_1 = self.conv2d(inputdata=relu_12, out_channel=3, kernel_size=3, padding='SAME', stride=1, use_bias=False, name='skip_ouput_1') skip_output_2 = self.conv2d(inputdata=relu_14, out_channel=3, kernel_size=3, padding='SAME', stride=1, use_bias=False, name='skip_output_2') skip_output_3 = self.conv2d(inputdata=relu_16, out_channel=3, kernel_size=3, padding='SAME', stride=1, use_bias=False, name='skip_output_3') # 传统GAN输出层都使用tanh函数激活 skip_output_3 = tf.nn.tanh(skip_output_3, name='skip_output_3_tanh') ret = { 'skip_1': skip_output_1, 'skip_2': skip_output_2, 'skip_3': skip_output_3 } return ret
def _build_pnasnet_base(images, normal_cell, num_classes, hparams, is_training, final_endpoint=None): """Constructs a PNASNet image model.""" end_points = {} def add_and_check_endpoint(endpoint_name, net): end_points[endpoint_name] = net return final_endpoint and (endpoint_name == final_endpoint) # Find where to place the reduction cells or stride normal cells reduction_indices = nasnet_utils.calc_reduction_layers( hparams.num_cells, hparams.num_reduction_layers) # pylint: disable=protected-access stem = lambda: nasnet._imagenet_stem(images, hparams, normal_cell) # pylint: enable=protected-access net, cell_outputs = stem() if add_and_check_endpoint('Stem', net): return net, end_points # Setup for building in the auxiliary head. aux_head_cell_idxes = [] if len(reduction_indices) >= 2: aux_head_cell_idxes.append(reduction_indices[1] - 1) # Run the cells filter_scaling = 1.0 # true_cell_num accounts for the stem cells true_cell_num = 2 activation_fn = tf.nn.relu6 if hparams.use_bounded_activation else tf.nn.relu for cell_num in range(hparams.num_cells): is_reduction = cell_num in reduction_indices stride = 2 if is_reduction else 1 if is_reduction: filter_scaling *= hparams.filter_scaling_rate if hparams.skip_reduction_layer_input or not is_reduction: prev_layer = cell_outputs[-2] net = normal_cell(net, scope='cell_{}'.format(cell_num), filter_scaling=filter_scaling, stride=stride, prev_layer=prev_layer, cell_num=true_cell_num) if add_and_check_endpoint('Cell_{}'.format(cell_num), net): return net, end_points true_cell_num += 1 cell_outputs.append(net) if (hparams.use_aux_head and cell_num in aux_head_cell_idxes and num_classes and is_training): aux_net = activation_fn(net) # pylint: disable=protected-access nasnet._build_aux_head(aux_net, end_points, num_classes, hparams, scope='aux_{}'.format(cell_num)) # pylint: enable=protected-access # Final softmax layer with tf.variable_scope('final_layer'): net = activation_fn(net) net = nasnet_utils.global_avg_pool(net) if add_and_check_endpoint('global_pool', net) or not num_classes: return net, end_points net = slim.dropout(net, hparams.dense_dropout_keep_prob, scope='dropout') logits = slim.fully_connected(net, num_classes) if add_and_check_endpoint('Logits', logits): return net, end_points predictions = tf.nn.softmax(logits, name='predictions') if add_and_check_endpoint('Predictions', predictions): return net, end_points return logits, end_points
def test(self): global grad_bufs global sess inits = {} inits['w1'] = np.array([10.0, 20.0]).astype(np.float32) inits['w2'] = np.array([5.0, 10.0]).astype(np.float32) scopes = ['update_scope', 'apply_scope'] tf.reset_default_graph() sess = tf.Session() input = tf.placeholder(tf.float32, [None, 2]) vars = {} losses = {} for scope in scopes: with tf.variable_scope(scope): w1 = tf.Variable(inits['w1'], name='w1') w2 = tf.Variable(inits['w2'], name='w2') # NB reduce_sum is necessary to ensure that the gradients # accumulated for multiple examples in a batch are the same as # if the examples were presented in individual batches losses[scope] = tf.reduce_sum(w1 + input * w2, axis=-1) vars[scope] = {'w1': w1, 'w2': w2} o = tf.train.GradientDescentOptimizer(learning_rate=1) """ Check that no extra trainable variables have been introduced. """ # two variables, two scopes, for a total of 4 trainable variables assert (len(tf.trainable_variables()) == 4) update_ops, apply_ops, zero_ops = create_train_ops( losses['update_scope'], o, 'update_scope', 'apply_scope') assert (len(tf.trainable_variables()) == 4) sess.run(tf.global_variables_initializer()) grad_bufs = { v.name: v for v in tf.global_variables() if 'grad_buf' in v.name } """ Check that the gradient buffers start out zero. """ assert_grad_bufs_zero() # so the first loss term looks like w1 + 1 * w2 # and the second term looks like w1 + 2 * w2 sess.run(update_ops, feed_dict={input: [[1, 1], [2, 2]]}) """ Confirm that no changes have taken place to the trainable variables yet in either scope. """ for scope in scopes: for var_name, var in vars[scope].items(): val = sess.run(var) np.testing.assert_equal(val, inits[var_name]) """ Confirm that the gradient buffers look reasonable. """ for buf_name, buf in grad_bufs.items(): actual = sess.run(buf) # first loss term was w1 + 1 * w2 # second was w1 + 2 * w2 # first loss term contribution: # derivative wrt to each element of both vectors should be 1 # second loss term contribution: # derivative wrt w1 should be 1; derivative wrt w2 should be 2 if 'w1' in buf_name: expected = np.array([1., 1.]) + np.array([1., 1.]) elif 'w2' in buf_name: expected = np.array([1., 1.]) + np.array([2., 2.]) np.testing.assert_equal(actual, expected) # loss will be e.g. w1 + [3, 4] * w2 sess.run(update_ops, feed_dict={input: [[3, 4], [5, 6]]}) """ Confirm that the gradient buffers still look reasonable. """ for buf_name, buf in grad_bufs.items(): actual = sess.run(buf) if 'w1' in buf_name: expected = np.array([1., 1.]) + np.array([1., 1.]) + \ np.array([1., 1.]) + np.array([1., 1.]) elif 'w2' in buf_name: expected = np.array([1., 1.]) + np.array([2., 2.]) + \ np.array([3., 4.]) + np.array([5., 6.]) np.testing.assert_equal(actual, expected) sess.run(apply_ops) """ Confirm that no changes have been made to the variables in update_scope. """ for var_name, var in vars['update_scope'].items(): actual = sess.run(var) if 'w1' in var_name: expected = inits['w1'] elif 'w2' in var_name: expected = inits['w2'] np.testing.assert_equal(actual, expected) """ Confirm that changes _have_ been made to the variables in apply_scope. """ for var_name, var in vars['apply_scope'].items(): actual = sess.run(var) # w1 started off as [10, 20]; # gradient wrt w1 was 1 on each step, # and we went for 4 steps with step size of 1 if 'w1' in var_name: expected = [10 - 1. - 1. - 1. - 1., 20 - 1. - 1. - 1. - 1.] # w2 started off as [5, 10] # gradients were [1, 1], [2, 2], [3, 4], and [5, 6] elif 'w2' in var_name: expected = [5. - 1. - 2. - 3. - 5., 10. - 1. - 2. - 4. - 6.] np.testing.assert_equal(actual, expected) sess.run(zero_ops) """ Check that gradient buffers have been zeroed. """ assert_grad_bufs_zero()
def features_matching(inputs_p1, inputs_p2): with tf.variable_scope("matching"): layers_1,dense_1 = features(inputs_p1, 'feature_layers_1') # 4*4*256 layers_2,dense_2 = features(inputs_p2, 'feature_layers_2') # 4*4*256 output,denses = matching(dense_1,dense_2) return output ,[layers_1,layers_2,denses]
def conv_layer(self, bottom, name): with tf.variable_scope(name): # CNN's filter is constant, NOT Variable that can be trained conv = tf.nn.conv2d(bottom, self.data_dict[name][0], [1, 1, 1, 1], padding='SAME') lout = tf.nn.relu(tf.nn.bias_add(conv, self.data_dict[name][1])) return lout
def __init__(self, epsilon=1e-5, momentum = 0.9, name="batch_norm"): with tf.variable_scope(name): self.epsilon = epsilon self.momentum = momentum self.name = name
def build_model(self, video, video_mask, caption, caption_mask, train_flag, reuse_variable=False): self.video = video # [batch_size, length, kernel, kernel, channel] self.video_mask = video_mask # [batch_size, length] video_mask_leng = tf.cast(tf.reduce_sum(self.video_mask, 1), tf.int32) self.caption = caption # [batch_size, length] self.caption_mask = caption_mask # [batch_size, length] caption_mask_leng = tf.cast(tf.reduce_sum(self.caption_mask, 1), tf.int32) #Make Mask list self.video_mask_list = [] self.caption_mask_list = [] max_len = self.config.caption_length for mi in range(2): video_mask_leng = tf.maximum(1, video_mask_leng - 2) caption_mask_leng = tf.maximum(1, caption_mask_leng - 2) max_len -= 2 self.video_mask_list.append( tf.reverse( tf.sequence_mask(video_mask_leng, max_len, tf.float32), [-1])) self.caption_mask_list.append( tf.sequence_mask(caption_mask_leng, max_len, tf.float32)) max_len = int((max_len - 1) / 2) video_mask_leng = tf.cast((video_mask_leng - 1) / 2, tf.int32) video_mask_leng = tf.maximum(1, video_mask_leng) caption_mask_leng = tf.cast((caption_mask_leng - 1) / 2, tf.int32) caption_mask_leng = tf.maximum(1, caption_mask_leng) self.video_mask_list.append( tf.reverse(tf.sequence_mask(video_mask_leng, max_len, tf.float32), [-1])) self.caption_mask_list.append( tf.sequence_mask(caption_mask_leng, max_len, tf.float32)) self.train_flag = train_flag #Batch normalization self.bn_fn = slim.batch_norm self.bn_params = {'is_training': self.train_flag} self.word_embed_t = tf.Variable(self.word_embed, dtype=tf.float32, name="word_embed", trainable=True) #video drop self.squeezed_feat = tf.squeeze(self.video) self.embedded_feat = tf.reshape( self.squeezed_feat, [self.batch_size, self.video_steps, self.channel_size]) # [batch_size, length, channel_size] self.embedded_feat = self.embedded_feat * tf.expand_dims(video_mask, 2) self.video_cell_d = lambda: rnn_cell.DropoutWrapper( self.video_cell(), input_keep_prob=self.dropout_keep_prob, output_keep_prob=self.dropout_keep_prob) self.caption_cell_d = lambda: rnn_cell.DropoutWrapper( self.caption_cell(), input_keep_prob=self.dropout_keep_prob, output_keep_prob=self.dropout_keep_prob) video_cell1 = rnn_cell.MultiRNNCell( [self.video_cell_d() for _ in range(self.config.num_layers)], state_is_tuple=True) video_cell2 = rnn_cell.MultiRNNCell( [self.video_cell_d() for _ in range(self.config.num_layers)], state_is_tuple=True) video_cell = [video_cell1, video_cell2] caption_cell1 = rnn_cell.MultiRNNCell( [self.caption_cell_d() for _ in range(self.config.num_layers)], state_is_tuple=True) caption_cell2 = rnn_cell.MultiRNNCell( [self.caption_cell_d() for _ in range(self.config.num_layers)], state_is_tuple=True) caption_cell = [caption_cell1, caption_cell2] video_emb_state = self.build_video_embedding(video_cell, self.embedded_feat, self.video_mask, reuse_variable) rnn_emb_state = self.build_caption_encoder(caption_cell, reuse_variable) with tf.variable_scope("multimodal", initializer=self.initializer) as scope: margin_list = [] logit_list = [] for i in range(self.batch_size): if i > 0: scope.reuse_variables() fuse = self.fusion(tf.tile( tf.expand_dims(video_emb_state[i, :, :], 0), [self.batch_size, 1, 1]), rnn_emb_state, i, reuse=(i > 0)) with slim.arg_scope( [slim.fully_connected], weights_regularizer=slim.l2_regularizer(0.0005), normalizer_fn=self.bn_fn, normalizer_params=self.bn_params): logit = slim.fully_connected( fuse, 256, activation_fn=tf.nn.leaky_relu, scope='fc1', reuse=(i > 0)) logit = slim.fully_connected( logit, 256, activation_fn=tf.nn.leaky_relu, scope='fc2', reuse=(i > 0)) logit = slim.fully_connected( logit, 128, activation_fn=tf.nn.leaky_relu, scope='fc3', reuse=(i > 0)) logit = slim.fully_connected(logit, 1, activation_fn=None, scope='scorefn', reuse=(i > 0)) score = logit logit_list.append(score) margin_list.append(score) margin_mat = tf.squeeze(tf.stack(margin_list)) logit_mat = tf.squeeze(tf.stack(logit_list)) self.logit = logit_mat diag_elem = tf.diag_part(margin_mat) loss_mat = tf.maximum( 0.0, 10. + margin_mat - tf.reshape(diag_elem, [-1, 1])) margin_loss = tf.reduce_sum(loss_mat) / (self.batch_size * self.batch_size) self.scores = margin_mat self.mean_loss = margin_loss self.concept_loss = tf.constant(0)
def decode_infer(self, inputs, state): # state['enc']: [b * beam, l_s, e] , state['dec']: [b * beam, q', e] # q' = previous decode output length # during infer, following graph are constructed using beam search with self.graph.as_default(): config = self.bert_config target_sequence = inputs['target'] # [b * beam, q'] vocab_size = len(self.hps.vocab_out) # trunct word idx, change those greater than vocab_size to unkId shape = target_sequence.shape unkid = self.hps.vocab_out[self.hps.unk] # target_sequence = tf_trunct(target_sequence, vocab_size, self.hps.unkId) target_sequence = tf_trunct(target_sequence, vocab_size, unkid) target_sequence.set_shape(shape) target_length = inputs['target_length'] target_seg_ids = tf.zeros_like(target_sequence, dtype=tf.int32, name='target_seg_ids_infer') tgt_mask = tf.sequence_mask(target_length, maxlen=tf.shape(target_sequence)[1], dtype=tf.float32) # [b, q'] # with tf.variable_scope('bert', reuse=True): out_dict_size = len(self.hps.vocab_out) with tf.variable_scope('bert', reuse=True): with tf.variable_scope('embeddings'), tf.device('/cpu:0'): # Perform embedding lookup on the target word ids. (tgt_embed, _) = embedding_lookup( input_ids=target_sequence, vocab_size=out_dict_size, # out vocab size embedding_size=config.hidden_size, initializer_range=config.initializer_range, word_embedding_name='word_embeddings', use_one_hot_embeddings=False) # Add positional embeddings and token type embeddings, then layer # normalize and perform dropout. tgt_embed = embedding_postprocessor( input_tensor=tgt_embed, use_token_type=True, token_type_ids=target_seg_ids, token_type_vocab_size=config.type_vocab_size, token_type_embedding_name='token_type_embeddings', use_position_embeddings=True, position_embedding_name='position_embeddings', initializer_range=config.initializer_range, max_position_embeddings=config.max_position_embeddings, dropout_prob=config.hidden_dropout_prob) with tf.variable_scope('decode', reuse=True): # [b, q', e] masked_tgt_embed = tgt_embed * tf.expand_dims(tgt_mask, -1) dec_attn_bias = attention_bias(tf.shape(masked_tgt_embed)[1], "causal") decoder_input = tf.pad(masked_tgt_embed, [[0, 0], [1, 0], [0, 0]])[:, :-1, :] # Shift left infer_decoder_input = decoder_input[:, -1:, :] infer_dec_attn_bias = dec_attn_bias[:, :, -1:, :] ret = transformer_decoder_three(infer_decoder_input, self.enc_output, self.topic_memory, infer_dec_attn_bias, self.enc_attn_bias, self.hps, state=state['decoder']) all_att_weights, decoder_output, decoder_state = ret decoder_output = decoder_output[:, -1, :] # [b * beam, e] vocab_logits = tf.matmul(decoder_output, self.decoder_weights, False, True) # [b * beam, v] vocab_probs = tf.nn.softmax(vocab_logits) vocab_size = out_dict_size # out vocabsize # we have tiled source_id_oo before feed, so last argument is set to 1 with tf.variable_scope('copy'): logits = calculate_final_logits(decoder_output, all_att_weights, vocab_probs, self.input_ids_oo, self.max_out_oovs, self.input_mask, vocab_size, tgt_seq_len=1) log_prob = tf.log(logits) # [b * beam, v + v'] return log_prob, {'encoder': state['encoder'], 'decoder': decoder_state}
def image_to_embedding(self, images, is_training=True): """Create a graph, transforming images into embedding vectors.""" with tf.variable_scope('net', reuse=is_training): return self.model_func(images, is_training=is_training)