def model(inpt, num_actions, scope, reuse=False): """This model takes as input an observation and returns values of all actions.""" with tf.variable_scope(scope, reuse=reuse): out = inpt out = layers.fully_connected(out, num_outputs=64, activation_fn=tf.nn.tanh) out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None) return out
def model(img_in, num_actions, scope, noisy=False, reuse=False, concat_softmax=False): with tf.variable_scope(scope, reuse=reuse): out = img_in with tf.variable_scope("convnet"): # original architecture out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu) out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu) out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu) out = layers.flatten(out) with tf.variable_scope("action_value"): if noisy: # Apply noisy network on fully connected layers # ref: https://arxiv.org/abs/1706.10295 out = noisy_dense(out, name='noisy_fc1', size=512, activation_fn=tf.nn.relu) out = noisy_dense(out, name='noisy_fc2', size=num_actions) else: out = layers.fully_connected(out, num_outputs=512, activation_fn=tf.nn.relu) out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None) # V: Softmax - inspired by deep-rl-attack # if concat_softmax: out = tf.nn.softmax(out) return out
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope, reuse=reuse): out = inpt with tf.variable_scope("convnet"): for num_outputs, kernel_size, stride in convs: out = layers.convolution2d(out, num_outputs=num_outputs, kernel_size=kernel_size, stride=stride, activation_fn=tf.nn.relu) conv_out = layers.flatten(out) with tf.variable_scope("action_value"): action_out = conv_out for hidden in hiddens: action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None) if layer_norm: action_out = layers.layer_norm(action_out, center=True, scale=True) action_out = tf.nn.relu(action_out) action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None) if dueling: with tf.variable_scope("state_value"): state_out = conv_out for hidden in hiddens: state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None) if layer_norm: state_out = layers.layer_norm(state_out, center=True, scale=True) state_out = tf.nn.relu(state_out) state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1) q_out = state_score + action_scores_centered else: q_out = action_scores return q_out
def __call__(self, x, reuse=False): with tf.variable_scope(self.name) as scope: if reuse: scope.reuse_variables() shared = tcl.fully_connected(x, 128, activation_fn=tf.nn.relu, weights_initializer=tf.random_normal_initializer(0, 0.02)) q = tcl.fully_connected(shared, 10, activation_fn=None, weights_initializer=tf.random_normal_initializer(0, 0.02)) # 10 classes return q
def dueling_model(img_in, num_actions, scope, reuse=False, layer_norm=False): """As described in https://arxiv.org/abs/1511.06581""" with tf.variable_scope(scope, reuse=reuse): out = img_in with tf.variable_scope("convnet"): # original architecture out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu) out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu) out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu) conv_out = layers.flatten(out) with tf.variable_scope("state_value"): state_hidden = layers.fully_connected(conv_out, num_outputs=512, activation_fn=None) if layer_norm: state_hidden = layer_norm_fn(state_hidden, relu=True) else: state_hidden = tf.nn.relu(state_hidden) state_score = layers.fully_connected(state_hidden, num_outputs=1, activation_fn=None) with tf.variable_scope("action_value"): actions_hidden = layers.fully_connected(conv_out, num_outputs=512, activation_fn=None) if layer_norm: actions_hidden = layer_norm_fn(actions_hidden, relu=True) else: actions_hidden = tf.nn.relu(actions_hidden) action_scores = layers.fully_connected(actions_hidden, num_outputs=num_actions, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores = action_scores - tf.expand_dims(action_scores_mean, 1) return state_score + action_scores
def __init__(self, input_size=4, hidden_size=2, gamma=0.95, action_size=2, alpha=0.1): self.input_size = input_size self.hidden_size = hidden_size self.gamma = gamma self.action_size = action_size self.alpha = alpha # save the hyper parameters self.params = self.__dict__.copy() # placeholders self.input_pl = tf.placeholder(tf.float32, [None, input_size]) self.action_pl = tf.placeholder(tf.int32, [None]) self.reward_pl = tf.placeholder(tf.float32, [None]) # a two-layer fully connected network hidden_layer = layers.fully_connected(self.input_pl, hidden_size, biases_initializer=None, activation_fn=tf.nn.relu) self.output = layers.fully_connected(hidden_layer, action_size, biases_initializer=None, activation_fn=tf.nn.softmax) # responsible output one_hot = tf.one_hot(self.action_pl, action_size) responsible_output = tf.reduce_sum(self.output * one_hot, axis=1) self.loss = -tf.reduce_mean(tf.log(responsible_output) * self.reward_pl) # training variables variables = tf.trainable_variables() self.variable_pls = [] for i, var in enumerate(variables): self.variable_pls.append(tf.placeholder(tf.float32)) self.gradients = tf.gradients(self.loss, variables) solver = tf.train.AdamOptimizer(learning_rate=alpha) self.update = solver.apply_gradients(zip(self.variable_pls, variables))
def bert_self_attention(config, hidden_states, attention_mask): with tf.variable_scope("BertSelfAttention"): mixed_query_layer = layers.fully_connected(hidden_states, config.hidden_size, scope="FCquery", activation_fn=None) mixed_key_layer = layers.fully_connected(hidden_states, config.hidden_size, scope="FCkey", activation_fn=None) mixed_value_layer = layers.fully_connected(hidden_states, config.hidden_size, scope="FCvalue", activation_fn=None) query_layer = transpose_for_scores(config, mixed_query_layer) key_layer = transpose_for_scores(config, mixed_key_layer) value_layer = transpose_for_scores(config, mixed_value_layer) # Take the dot product between "query" and "key" to get the raw attention scores. attention_scores = tf.matmul(query_layer, tf.transpose(key_layer, [0, 1, 3, 2])) # TODO(jonathan): the output of matmul is different than pyTorch's expected broadcasting # behavior... investigate attention_scores = attention_scores / np.sqrt(config.attention_head_size) # Apply the attention mask is (precomputed for all layers in BertModel forward() function) attention_scores = attention_scores + attention_mask # Normalize the attention scores to probabilities. attention_probs = tf.nn.softmax(attention_scores, axis=-1) # This is actually dropping out entire tokens to attend to, which might # seem a bit unusual, but is taken from the original Transformer paper. attention_probs = tf.nn.dropout(attention_probs, keep_prob=1.0 - config.attention_probs_dropout_prob) context_layer = tf.matmul(attention_probs, value_layer) context_layer = tf.transpose(context_layer, (0, 2, 1, 3)) new_context_layer_shape = [tf.shape(context_layer)[i] for i in range(2)] + [config.all_head_size] context_layer = tf.reshape(context_layer, new_context_layer_shape) return context_layer
def model_fn(x, target, mode, params): """Model function for Estimator.""" y_ = tf.cast(target, tf.float32) x_image = tf.reshape(x, [-1, 28, 28, 1]) # first convolutional layer h_conv1 = layers.convolution2d(x_image, 32, [5,5]) h_pool1 = layers.max_pool2d(h_conv1, [2,2]) # second convolutional layer h_conv2 = layers.convolution2d(h_pool1, 64, [5,5]) h_pool2 = layers.max_pool2d(h_conv2, [2,2]) # densely connected layer h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64]) h_fc1 = layers.fully_connected(h_pool2_flat, 1024) h_fc1_drop = layers.dropout( h_fc1, keep_prob=params["dropout"], is_training=(mode == ModeKeys.TRAIN)) # readout layer y_conv = layers.fully_connected(h_fc1_drop, 10, activation_fn=None) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(y_conv, y_)) train_op = tf.contrib.layers.optimize_loss( loss=cross_entropy, global_step=tf.contrib.framework.get_global_step(), learning_rate=params["learning_rate"], optimizer="Adam") predictions = tf.argmax(y_conv, 1) return predictions, cross_entropy, train_op
def _build_q_network(registry, inputs, num_actions, config): dueling = config["dueling"] hiddens = config["hiddens"] frontend = ModelCatalog.get_model(registry, inputs, 1, config["model"]) frontend_out = frontend.last_layer with tf.variable_scope("action_value"): action_out = frontend_out for hidden in hiddens: action_out = layers.fully_connected( action_out, num_outputs=hidden, activation_fn=tf.nn.relu) action_scores = layers.fully_connected( action_out, num_outputs=num_actions, activation_fn=None) if dueling: with tf.variable_scope("state_value"): state_out = frontend_out for hidden in hiddens: state_out = layers.fully_connected( state_out, num_outputs=hidden, activation_fn=tf.nn.relu) state_score = layers.fully_connected( state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores_centered = action_scores - tf.expand_dims( action_scores_mean, 1) return state_score + action_scores_centered else: return action_scores
def add_final_training_ops(self, embeddings, all_labels_count, hidden_layer_size=BOTTLENECK_TENSOR_SIZE / 4, dropout_keep_prob=None): """Adds a new softmax and fully-connected layer for training. The set up for the softmax and fully-connected layers is based on: https://tensorflow.org/versions/master/tutorials/mnist/beginners/index.html This function can be customized to add arbitrary layers for application-specific requirements. Args: embeddings: The embedding (bottleneck) tensor. all_labels_count: The number of all labels including the default label. hidden_layer_size: The size of the hidden_layer. Roughtly, 1/4 of the bottleneck tensor size. dropout_keep_prob: the percentage of activation values that are retained. Returns: softmax: The softmax or tensor. It stores the final scores. logits: The logits tensor. """ with tf.name_scope('input'): with tf.name_scope('Wx_plus_b'): hidden = layers.fully_connected(embeddings, hidden_layer_size) # We need a dropout when the size of the dataset is rather small. if dropout_keep_prob: hidden = tf.nn.dropout(hidden, dropout_keep_prob) logits = layers.fully_connected( hidden, all_labels_count, activation_fn=None) softmax = tf.nn.softmax(logits, name='softmax') return softmax, logits
def q_func_builder(input_placeholder, num_actions, scope, reuse=False): with tf.variable_scope(scope, reuse=reuse): latent = network(input_placeholder) if isinstance(latent, tuple): if latent[1] is not None: raise NotImplementedError("DQN is not compatible with recurrent policies yet") latent = latent[0] latent = layers.flatten(latent) with tf.variable_scope("action_value"): action_out = latent for hidden in hiddens: action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None) if layer_norm: action_out = layers.layer_norm(action_out, center=True, scale=True) action_out = tf.nn.relu(action_out) action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None) if dueling: with tf.variable_scope("state_value"): state_out = latent for hidden in hiddens: state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None) if layer_norm: state_out = layers.layer_norm(state_out, center=True, scale=True) state_out = tf.nn.relu(state_out) state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1) q_out = state_score + action_scores_centered else: q_out = action_scores return q_out
def multilayer_perceptron(x): W_fc1 = tf.Variable(tf.random_normal([784, 256], mean=0, stddev=1)) b_fc1 = tf.Variable([0] * 256) # ??????? fc1 = tf.nn.xw_plus_b(x, W_fc1, b_fc1) fc2 = layers.fully_connected(fc1, 256, activation_fn=tf.nn.relu, scope='fc2') out = layers.fully_connected(fc2, 10, activation_fn=None, scope='out') return out
def dnn_logits_fn(): """Builds the logits from the input layer.""" previous_layer = input_layer for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(previous_layer,)) as hidden_layer_scope: net = layers.fully_connected( previous_layer, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) previous_layer = net with variable_scope.variable_scope( "logits", values=(previous_layer,)) as logits_scope: dnn_logits = layers.fully_connected( previous_layer, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=logits_scope) _add_hidden_layer_summary(dnn_logits, logits_scope.name) return dnn_logits
def create(cls, embeddings, labels, **kwargs): model = cls() model.embeddings = embeddings model._record_state(**kwargs) model.lengths_key = kwargs.get('lengths_key') model.labels = labels nc = len(labels) # This only exists to make exporting easier model.pdrop_value = kwargs.get('dropout', 0.5) model.dropin_value = kwargs.get('dropin', {}) model.sess = kwargs.get('sess', tf.Session()) model.lengths = kwargs.get('lengths', tf.placeholder(tf.int32, [None], name="lengths")) model.y = kwargs.get('y', tf.placeholder(tf.int32, [None, None], name="y")) model.pdrop_in = kwargs.get('dropin', 0.0) model.labels = labels model.crf = bool(kwargs.get('crf', False)) model.crf_mask = bool(kwargs.get('crf_mask', False)) model.span_type = kwargs.get('span_type') model.proj = bool(kwargs.get('proj', False)) model.feed_input = bool(kwargs.get('feed_input', False)) model.activation_type = kwargs.get('activation', 'tanh') model.constraint = kwargs.get('constraint') # Wrap the constraint in a non-trainable variable so that it is saved # into the checkpoint. This means we won't need to recreate the actual # values of it when we reload the model if model.constraint is not None: constraint = [] for i, c in enumerate(model.constraint): constraint.append(tf.get_variable("constraint_{}".format(i), initializer=c, trainable=False)) model.constraint = constraint embedseq = model.embed(**kwargs) seed = np.random.randint(10e8) enc_out = model.encode(embedseq, **kwargs) with tf.variable_scope("output") as model.out_scope: if model.feed_input is True: enc_out = tf.concat(axis=2, values=[enc_out, embedseq]) # Converts seq to tensor, back to (B,T,W) T = tf.shape(enc_out)[1] H = enc_out.get_shape()[2] # Flatten from [B x T x H] - > [BT x H] enc_out_bt_x_h = tf.reshape(enc_out, [-1, H]) init = xavier_initializer(True, seed) with tf.contrib.slim.arg_scope([fully_connected], weights_initializer=init): if model.proj is True: hidden = tf.layers.dropout(fully_connected(enc_out_bt_x_h, H, activation_fn=tf_activation(model.activation_type)), model.pdrop_value, training=TRAIN_FLAG()) preds = fully_connected(hidden, nc, activation_fn=None, weights_initializer=init) else: preds = fully_connected(enc_out_bt_x_h, nc, activation_fn=None, weights_initializer=init) model.probs = tf.reshape(preds, [-1, T, nc], name="probs") return model
def __call__(self, z): with tf.variable_scope(self.name) as scope: g = tcl.fully_connected(z, 4 * 4 * 512, activation_fn=lrelu, normalizer_fn=tcl.batch_norm) g = tcl.fully_connected(g, 64, activation_fn=lrelu, normalizer_fn=tcl.batch_norm) g = tcl.fully_connected(g, 64, activation_fn=lrelu, normalizer_fn=tcl.batch_norm) g = tcl.fully_connected(g, 64*64*3, activation_fn=tf.nn.tanh, normalizer_fn=tcl.batch_norm) g = tf.reshape(g, tf.stack([tf.shape(z)[0], 64, 64, 3])) return g
def auto_encoder(x_1, x_2, x_mask_1, x_mask_2, y, dropout, opt): x_1_emb, W_emb = embedding(x_1, opt) # batch L emb x_2_emb = tf.nn.embedding_lookup(W_emb, x_2) x_1_emb = tf.nn.dropout(x_1_emb, dropout) # batch L emb x_2_emb = tf.nn.dropout(x_2_emb, dropout) # batch L emb biasInit = tf.constant_initializer(0.001, dtype=tf.float32) x_1_emb = layers.fully_connected(tf.squeeze(x_1_emb), num_outputs=opt.embed_size, biases_initializer=biasInit, activation_fn=tf.nn.relu, scope='trans', reuse=None) # batch L emb x_2_emb = layers.fully_connected(tf.squeeze(x_2_emb), num_outputs=opt.embed_size, biases_initializer=biasInit, activation_fn=tf.nn.relu, scope='trans', reuse=True) x_1_emb = tf.expand_dims(x_1_emb, 3) # batch L emb 1 x_2_emb = tf.expand_dims(x_2_emb, 3) if opt.encoder == 'aver': H_enc_1 = aver_emb_encoder(x_1_emb, x_mask_1) H_enc_2 = aver_emb_encoder(x_2_emb, x_mask_2) elif opt.encoder == 'max': H_enc_1 = max_emb_encoder(x_1_emb, x_mask_1, opt) H_enc_2 = max_emb_encoder(x_2_emb, x_mask_2, opt) elif opt.encoder == 'concat': H_enc_1 = concat_emb_encoder(x_1_emb, x_mask_1, opt) H_enc_2 = concat_emb_encoder(x_2_emb, x_mask_2, opt) # discriminative loss term if opt.combine_enc == 'mult': H_enc = tf.multiply(H_enc_1, H_enc_2) # batch * n_gan if opt.combine_enc == 'concat': H_enc = tf.concat([H_enc_1, H_enc_2], 1) if opt.combine_enc == 'sub': H_enc = tf.subtract(H_enc_1, H_enc_2) if opt.combine_enc == 'mix': H_1 = tf.multiply(H_enc_1, H_enc_2) H_2 = tf.concat([H_enc_1, H_enc_2], 1) H_3 = tf.subtract(H_enc_1, H_enc_2) H_enc = tf.concat([H_1, H_2, H_3], 1) # calculate the accuracy logits = discriminator_2layer(H_enc, opt, dropout, prefix='classify_', num_outputs=opt.category, is_reuse=None) prob = tf.nn.softmax(logits) correct_prediction = tf.equal(tf.argmax(prob, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits)) train_op = layers.optimize_loss( loss, framework.get_global_step(), optimizer='Adam', # variables=d_vars, learning_rate=opt.lr) return accuracy, loss, train_op, W_emb
def dueling_model(img_in, num_actions, scope, noisy=False, reuse=False, concat_softmax=False): """As described in https://arxiv.org/abs/1511.06581""" with tf.variable_scope(scope, reuse=reuse): out = img_in with tf.variable_scope("convnet"): # original architecture out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu) out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu) out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu) out = layers.flatten(out) with tf.variable_scope("state_value"): if noisy: # Apply noisy network on fully connected layers # ref: https://arxiv.org/abs/1706.10295 state_hidden = noisy_dense(out, name='noisy_fc1', size=512, activation_fn=tf.nn.relu) state_score = noisy_dense(state_hidden, name='noisy_fc2', size=1) else: state_hidden = layers.fully_connected( out, num_outputs=512, activation_fn=tf.nn.relu ) state_score = layers.fully_connected(state_hidden, num_outputs=1, activation_fn=None) with tf.variable_scope("action_value"): if noisy: # Apply noisy network on fully connected layers # ref: https://arxiv.org/abs/1706.10295 actions_hidden = noisy_dense(out, name='noisy_fc1', size=512, activation_fn=tf.nn.relu) action_scores = noisy_dense(actions_hidden, name='noisy_fc2', size=num_actions) else: actions_hidden = layers.fully_connected( out, num_outputs=512, activation_fn=tf.nn.relu ) action_scores = layers.fully_connected( actions_hidden, num_outputs=num_actions, activation_fn=None ) action_scores_mean = tf.reduce_mean(action_scores, 1) action_scores = action_scores - tf.expand_dims( action_scores_mean, 1 ) return state_score + action_scores
def inference_graph(self, data): with ops.device(self.device_assigner.get_device(self.layer_num)): # Compute activations for the neural network. nn_activations = layers.fully_connected(data, self.params.layer_size) for _ in range(1, self.params.num_layers): # pylint: disable=W0106 nn_activations = layers.fully_connected(nn_activations, self.params.layer_size) return nn_activations
def model(): print("building model ...") with tf.variable_scope('train'): print("building model ...") X_pl = tf.placeholder(tf.float32, [None, num_features]) X_expand = tf.expand_dims(X_pl, axis=2) print("X_pl", X_pl.get_shape()) t_pl = tf.placeholder(tf.int32, [None,]) print("t_pl", t_pl.get_shape()) is_training_pl = tf.placeholder(tf.bool) cell_fw = tf.nn.rnn_cell.GRUCell(205) cell_bw = tf.nn.rnn_cell.GRUCell(205) seq_len = tf.reduce_sum(tf.ones(tf.shape(X_pl), dtype=tf.int32), axis=1) _, enc_states = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_fw, cell_bw=cell_bw, inputs=X_expand, sequence_length=seq_len, dtype=tf.float32) enc_states = tf.concat(1, enc_states) enc_states_drop = dropout(enc_states, is_training=is_training_pl) l1 = fully_connected(enc_states_drop, 200, activation_fn=None) l1 = batch_norm(l1, is_training=is_training_pl) l1_relu = relu(l1) l1_dropout = dropout(l1_relu, is_training=is_training_pl) l2 = fully_connected(l1_dropout, 200, activation_fn=None) l2 = batch_norm(l2, is_training=is_training_pl) l2_relu = relu(l2) l_out = fully_connected(l2_relu, num_outputs=num_classes, activation_fn=None) l_out_softmax = tf.nn.softmax(l_out) tf.contrib.layers.summarize_variables() with tf.variable_scope('metrics'): loss = sparse_softmax_cross_entropy_with_logits(l_out, t_pl) print("loss", loss.get_shape()) loss = tf.reduce_mean(loss) print("loss", loss.get_shape()) tf.summary.scalar('train/loss', loss) argmax = tf.to_int32(tf.argmax(l_out, 1)) print("argmax", argmax.get_shape()) correct = tf.to_float(tf.equal(argmax, t_pl)) print("correct,", correct.get_shape()) accuracy = tf.reduce_mean(correct) print("accuracy", accuracy.get_shape()) with tf.variable_scope('optimizer'): print("building optimizer ...") global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) grads_and_vars = optimizer.compute_gradients(loss) gradients, variables = zip(*grads_and_vars) clipped_gradients, global_norm = ( tf.clip_by_global_norm(gradients, clip_norm)) clipped_grads_and_vars = zip(clipped_gradients, variables) tf.summary.scalar('train/global_gradient_norm', global_norm) train_op = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step) return X_pl, t_pl, is_training_pl, l_out, l_out_softmax, loss, accuracy, train_op, global_step
def multilayer_perceptron(x): fc1 = layers.fully_connected(x, 256, activation_fn=tf.nn.relu, scope='fc1') tf.histogram_summary('fc1', fc1) tf.histogram_summary('fc1/sparsity', tf.nn.zero_fraction(fc1)) fc2 = layers.fully_connected(fc1, 256, activation_fn=tf.nn.relu, scope='fc2') tf.histogram_summary('fc2', fc2) tf.histogram_summary('fc2/sparsity', tf.nn.zero_fraction(fc2)) out = layers.fully_connected(fc2, 10, activation_fn=None, scope='out') return out
def multilayer_perceptron(x): fc1 = layers.fully_connected(x, 256, activation_fn=tf.nn.relu, scope='fc1') fc2 = layers.fully_connected(fc1, 256, activation_fn=tf.nn.relu, scope='fc2') out = layers.fully_connected(fc2, 10, activation_fn=None, scope='out') assert_op = tf.Assert(tf.reduce_all(out > 0), [out], name='assert_out_positive') #out = tf.with_dependencies([assert_op], out) with tf.control_dependencies([assert_op]): out = tf.identity(out, name='out') return out
def _mlp(hiddens, inpt, num_actions, scope, reuse=False, layer_norm=False): with tf.variable_scope(scope, reuse=reuse): out = inpt for hidden in hiddens: out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None) if layer_norm: out = layers.layer_norm(out, center=True, scale=True) out = tf.nn.relu(out) q_out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None) return q_out
def build_q_network(self, hiddens): out = self._inputs for hidden in hiddens: out= layers.fully_connected(inputs=out, num_outputs= hidden, activation_fn=tf.tanh, weights_regularizer=layers.l2_regularizer(scale=0.1)) out = tf.nn.dropout(out, self.keep_prob) self.Q_t = layers.fully_connected(out, self.num_actions, activation_fn=None) self.Q_action = tf.argmax(self.Q_t, dimension=1)
def __call__(self, x, reuse=True): with tf.variable_scope(self.name) as vs: if reuse: vs.reuse_variables() d = tcl.fully_connected(tf.flatten(x), 64, activation_fn=tf.nn.relu,normalizer_fn=tcl.batch_norm) d = tcl.fully_connected(d, 64,activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm) d = tcl.fully_connected(d, 64,activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm) logit = tcl.fully_connected(d, 1, activation_fn=None) return logit
def reconstruct_mutual_info(z_c_categoricals, z_c_continuous, categorical_lambda, continuous_lambda, discriminator_hidden, is_training, reuse=None, name="mutual_info"): with tf.variable_scope(name, reuse=reuse): out = layers.fully_connected( discriminator_hidden, num_outputs=128, activation_fn=leaky_rectify, normalizer_fn=layers.batch_norm, normalizer_params={"is_training":is_training} ) num_categorical = sum([true_categorical.get_shape()[1].value for true_categorical in z_c_categoricals]) num_continuous = z_c_continuous.get_shape()[1].value out = layers.fully_connected( out, num_outputs=num_categorical + num_continuous, activation_fn=tf.identity ) # distribution logic offset = 0 loss_q_categorical = None for z_c_categorical in z_c_categoricals: cardinality = z_c_categorical.get_shape()[1].value prob_categorical = tf.nn.softmax(out[:, offset:offset + cardinality]) loss_q_categorical_new = - tf.reduce_sum(tf.log(prob_categorical + TINY) * z_c_categorical, reduction_indices=1 ) if loss_q_categorical is None: loss_q_categorical = loss_q_categorical_new else: loss_q_categorical = loss_q_categorical + loss_q_categorical_new offset += cardinality q_mean = out[:, num_categorical:num_categorical + num_continuous] q_sd = tf.ones_like(q_mean) epsilon = (z_c_continuous - q_mean) / (q_sd + TINY) loss_q_continuous = tf.reduce_sum( 0.5 * np.log(2 * np.pi) + tf.log(q_sd + TINY) + 0.5 * tf.square(epsilon), reduction_indices=1, ) loss_mutual_info = continuous_lambda * loss_q_continuous + categorical_lambda * loss_q_categorical return ( tf.reduce_mean(loss_mutual_info), tf.reduce_mean(loss_q_categorical), tf.reduce_mean(loss_q_continuous) )
def define_sequence_model(self): seed=12345 np.random.seed(12345) layer_list=[] with self.graph.as_default() as g: utt_length=tf.placeholder(tf.int32,shape=(None)) g.add_to_collection(name="utt_length",value=utt_length) with tf.name_scope("input"): input_layer=tf.placeholder(dtype=tf.float32,shape=(None,None,self.n_in),name="input_layer") if self.dropout_rate!=0.0: print "Using dropout to avoid overfitting and the dropout rate is",self.dropout_rate is_training_drop=tf.placeholder(dtype=tf.bool,shape=(),name="is_training_drop") input_layer_drop=dropout(input_layer,self.dropout_rate,is_training=is_training_drop) layer_list.append(input_layer_drop) g.add_to_collection(name="is_training_drop",value=is_training_drop) else: layer_list.append(input_layer) g.add_to_collection("input_layer",layer_list[0]) with tf.name_scope("hidden_layer"): basic_cell=[] if "tanh" in self.hidden_layer_type: is_training_batch=tf.placeholder(dtype=tf.bool,shape=(),name="is_training_batch") bn_params={"is_training":is_training_batch,"decay":0.99,"updates_collections":None} g.add_to_collection("is_training_batch",is_training_batch) for i in xrange(len(self.hidden_layer_type)): if self.dropout_rate!=0.0: if self.hidden_layer_type[i]=="tanh": new_layer=fully_connected(layer_list[-1],self.hidden_layer_size[i],activation_fn=tf.nn.tanh,normalizer_fn=batch_norm,normalizer_params=bn_params) new_layer_drop=dropout(new_layer,self.dropout_rate,is_training=is_training_drop) layer_list.append(new_layer_drop) if self.hidden_layer_type[i]=="lstm": basic_cell.append(MyDropoutWrapper(BasicLSTMCell(num_units=self.hidden_layer_size[i]),self.dropout_rate,self.dropout_rate,is_training=is_training_drop)) if self.hidden_layer_type[i]=="gru": basic_cell.append(MyDropoutWrapper(GRUCell(num_units=self.hidden_layer_size[i]),self.dropout_rate,self.dropout_rate,is_training=is_training_drop)) else: if self.hidden_layer_type[i]=="tanh": new_layer=fully_connected(layer_list[-1],self.hidden_layer_size[i],activation_fn=tf.nn.tanh,normalizer_fn=batch_norm,normalizer_params=bn_params) layer_list.append(new_layer) if self.hidden_layer_type[i]=="lstm": basic_cell.append(LayerNormBasicLSTMCell(num_units=self.hidden_layer_size[i])) if self.hidden_layer_type[i]=="gru": basic_cell.append(LayerNormGRUCell(num_units=self.hidden_layer_size[i])) multi_cell=MultiRNNCell(basic_cell) rnn_outputs,rnn_states=tf.nn.dynamic_rnn(multi_cell,layer_list[-1],dtype=tf.float32,sequence_length=utt_length) layer_list.append(rnn_outputs) with tf.name_scope("output_layer"): if self.output_type=="linear" : output_layer=tf.layers.dense(rnn_outputs,self.n_out) # stacked_rnn_outputs=tf.reshape(rnn_outputs,[-1,self.n_out]) # stacked_outputs=tf.layers.dense(stacked_rnn_outputs,self.n_out) # output_layer=tf.reshape(stacked_outputs,[-1,utt_length,self.n_out]) g.add_to_collection(name="output_layer",value=output_layer) with tf.name_scope("training_op"): if self.optimizer=="adam": self.training_op=tf.train.AdamOptimizer()
def create_a3c_graph(input_shape, n_action, model, opt, beta=None, name='a3c'): """ Implements Actor Critic Model (A3C) Returns a dictionary of Tensorflow graph operations to be with a tf.Session instance. Args: n_action: A `int`. Number of actions agent can do. model: The Tensorflow model opt: A `tf.train.Optimizer`. beta: A `float`. Regularization term for the entropy of the policy model. If beta is `None` no regularization will be added. """ actions = tf.placeholder(tf.int32, shape=(None)) returns = tf.placeholder(tf.float32, shape=(None)) policy_in = tf.placeholder(tf.float32, shape=input_shape) value_in = tf.placeholder(tf.float32, shape=input_shape) tf.add_to_collection("actions", actions) tf.add_to_collection("returns", returns) tf.add_to_collection("policy_in", policy_in) tf.add_to_collection("value_in", value_in) with tf.variable_scope('actor'): pnn = model(policy_in) probs = tf.nn.softmax(layers.fully_connected(pnn, n_action)) with tf.variable_scope('critic'): v_out = model(value_in) value = layers.fully_connected(v_out, 1) tf.add_to_collection("policy_out", probs) tf.add_to_collection("value_out", value) actor_vars = get_vars_from_scope('actor') critic_vars = get_vars_from_scope('critic') N = tf.shape(states)[0] p_vals = slice_2d(probs, tf.range(0, N), actions) surr_loss = tf.log(p_vals + 1e-8) policy_loss = -surr_loss * (returns - value) if beta: policy_loss += beta * (-tf.reduce_sum(probs * tf.log(probs + 1e-8), 1)) policy_loss = tf.reduce_mean(policy_loss, name="policy_loss") value_loss = tf.reduce_mean(tf.square(returns - value), name="value_loss") policy_train_op = opt.minimize(policy_loss, var_list=actor_vars) value_train_op = opt.minimize(value_loss, var_list=critic_vars) tf.add_to_collection("policy_loss", policy_loss) tf.add_to_collection("value_loss", value_loss) tf.add_to_collection("policy_train_op", policy_train_op) tf.add_to_collection("value_train_op", value_train_op)
def atari_model(ram_in, num_actions, scope, reuse=False): with tf.variable_scope(scope, reuse=reuse): out = ram_in #out = tf.concat(1,(ram_in[:,4:5],ram_in[:,8:9],ram_in[:,11:13],ram_in[:,21:22],ram_in[:,50:51], ram_in[:,60:61],ram_in[:,64:65])) with tf.variable_scope("action_value"): out = layers.fully_connected(out, num_outputs=256, activation_fn=tf.nn.relu) out = layers.fully_connected(out, num_outputs=128, activation_fn=tf.nn.relu) out = layers.fully_connected(out, num_outputs=64, activation_fn=tf.nn.relu) out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None) return out
def multilayer_perceptron(x): fc1 = layers.fully_connected(x, 256, activation_fn=tf.nn.relu, scope='fc1') fc2 = layers.fully_connected(fc1, 256, activation_fn=tf.nn.relu, scope='fc2') def _debug_print_func(fc1_val, fc2_val): print 'FC1 : {}, FC2 : {}'.format(fc1_val.shape, fc2_val.shape) return False debug_print_op = tf.py_func(_debug_print_func, [fc1, fc2], [tf.bool]) with tf.control_dependencies(debug_print_op): out = layers.fully_connected(fc2, 10, activation_fn=None, scope='out') return out
def create_network(self, scope): with tf.variable_scope(scope, reuse=False): state_input = tf.placeholder('float', [None, 84, 84, 4]) out = layers.convolution2d(state_input, num_outputs=32, kernel_size=8, stride=1, activation_fn=tf.nn.relu) out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu) out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu) conv_out = layers.flatten(out) value_out = layers.fully_connected(conv_out, num_outputs=256, activation_fn=tf.nn.relu) q_value = layers.fully_connected(value_out, num_outputs=self.action_dim, activation_fn=None) return state_input, q_value
def classification_head (net, num_classes): net = flatten(net) net = fully_connected(net, 4096) net = fully_connected(net, 4096) net = fully_connected(net, num_classes, activation_fn=None) return net
def model_function(features, targets, mode): # input layer # Reshape features to 4-D tensor (55000x28x28x1) # MNIST images are 28x28 pixels # batch size corresponds to number of images: -1 represents ' compute the # images automatically (55000)' # +1 represents the # channels. Here #channels =1 since grey image. For color image, #channels=3 input_layer = tf.reshape(features, [-1, 28, 28, 1]) # Computes 32 features using a 5x5 filter # Padding is added to preserve width # Input Tensor Shape: [batch_size,28,28,1] # Output Tensor Shape: [batch_size,28,28,32] conv1 = layers.conv2d( inputs=input_layer, num_outputs=32, kernel_size=[5, 5], stride=1, padding= "SAME", # do so much padding such that the feature map is same size as input activation_fn=tf.nn.relu) # Pooling layer 1 # Pooling layer ith a 2x2 filter and stride 2 # Input shape: [batch_size,28,28,32] # Output shape: [batch_size,14,14,32] pool1 = layers.max_pool2d(inputs=conv1, kernel_size=[2, 2], stride=2) # Convolution layer 2 # Input: 14 x 14 x 32 (32 channels here) # Output: 14 x 14 x 64 (32 features/patches fed to each perceptron; discovering 64 features) conv2 = layers.conv2d( inputs=pool1, num_outputs=64, kernel_size=[5, 5], stride=1, padding= "SAME", # do so much padding such that the feature map is same size as input activation_fn=tf.nn.relu) # Pooling layer 2 # Input: 14 x14 x 64 # Output: 7 x 7 x 64 pool2 = layers.max_pool2d(inputs=conv2, kernel_size=[2, 2], stride=2) # Flatten the pool2 to feed to the 1st layer of fully connected layers # Input size: [batch_size,7,7,64] # Output size: [batch_size, 7x7x64] pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64]) # Connected layers with 100, 20 neurons # Input shape: [batch_size, 7x7x64] # Output shape: [batch_size, 10] fclayers = layers.stack( pool2_flat, layers.fully_connected, [100, 20], activation_fn=tf.nn.relu, weights_regularizer=layers.l1_l2_regularizer(1.0, 2.0), weights_initializer=layers.xavier_initializer(uniform=True, seed=100)) outputs = layers.fully_connected( inputs=fclayers, num_outputs=10, # 10 perceptrons in output layer for 10 numbers (0 to 9) activation_fn=None ) # Use "None" as activation function specified in "softmax_cross_entropy" loss # Calculate loss using cross-entropy error; also use the 'softmax' activation function loss = losses.softmax_cross_entropy(outputs, targets) optimizer = layers.optimize_loss( loss=loss, global_step=tf.contrib.framework.get_global_step(), learning_rate=0.1, optimizer="SGD") # Class of output (i.e., predicted number) corresponds to the perceptron returning the highest fractional value # Returning both fractional values and corresponding labels probs = tf.nn.softmax(outputs) return {'probs': probs, 'labels': tf.argmax(probs, 1)}, loss, optimizer
def build_infer_graph(x, batch_size, vocab_size=VOCAB_SIZE, embedding_size=32, rnn_size=128, num_layers=2, p_keep=1.0): """ builds inference graph """ infer_args = { "batch_size": batch_size, "vocab_size": vocab_size, "embedding_size": embedding_size, "rnn_size": rnn_size, "num_layers": num_layers, "p_keep": p_keep } logger.debug("building inference graph: %s.", infer_args) # other placeholders p_keep = tf.placeholder_with_default(p_keep, [], "p_keep") batch_size = tf.placeholder_with_default(batch_size, [], "batch_size") # embedding layer embed_seq = layers.embed_sequence(x, vocab_size, embedding_size) # shape: [batch_size, seq_len, embedding_size] embed_seq = tf.nn.dropout(embed_seq, keep_prob=p_keep) # shape: [batch_size, seq_len, embedding_size] # RNN layers cells = [rnn.LSTMCell(rnn_size) for _ in range(num_layers)] cells = [ rnn.DropoutWrapper(cell, output_keep_prob=p_keep) for cell in cells ] cells = rnn.MultiRNNCell(cells) input_state = cells.zero_state(batch_size, tf.float32) # shape: [num_layers, 2, batch_size, rnn_size] rnn_out, output_state = tf.nn.dynamic_rnn(cells, embed_seq, initial_state=input_state) # rnn_out shape: [batch_size, seq_len, rnn_size] # output_state shape: [num_layers, 2, batch_size, rnn_size] with tf.name_scope("lstm"): tf.summary.histogram("outputs", rnn_out) for c_state, h_state in output_state: tf.summary.histogram("c_state", c_state) tf.summary.histogram("h_state", h_state) # fully connected layer logits = layers.fully_connected(rnn_out, vocab_size, activation_fn=None) # shape: [batch_size, seq_len, vocab_size] # predictions with tf.name_scope("softmax"): probs = tf.nn.softmax(logits) # shape: [batch_size, seq_len, vocab_size] with tf.name_scope("sequence"): tf.summary.histogram("embeddings", embed_seq) tf.summary.histogram("logits", logits) model = { "logits": logits, "probs": probs, "input_state": input_state, "output_state": output_state, "p_keep": p_keep, "batch_size": batch_size, "infer_args": infer_args } return model
bn_params = { 'is_training': train_mode, 'decay': 0.9, 'updates_collections': None } # We can build short code using 'arg_scope' to avoid duplicate code # same function with different arguments with arg_scope([fully_connected], activation_fn=tf.nn.relu, weights_initializer=xavier_init, biases_initializer=None, normalizer_fn=batch_norm, normalizer_params=bn_params ): hidden_layer1 = fully_connected(X, hidden_output_size, scope="h1") h1_drop = dropout(hidden_layer1, keep_prob, is_training=train_mode) hidden_layer2 = fully_connected(h1_drop, hidden_output_size, scope="h2") h2_drop = dropout(hidden_layer2, keep_prob, is_training=train_mode) hidden_layer3 = fully_connected(h2_drop, hidden_output_size, scope="h3") h3_drop = dropout(hidden_layer3, keep_prob, is_training=train_mode) hidden_layer4 = fully_connected(h3_drop, hidden_output_size, scope="h4") h4_drop = dropout(hidden_layer4, keep_prob, is_training=train_mode) hypothesis = fully_connected(h4_drop, final_output_size, activation_fn=None, scope="hypothesis") # define cost/loss & optimizer cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( logits=hypothesis, labels=Y)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
def ModelFn(): inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32) reg = layers.l2_regularizer(0.001) layers.fully_connected(inputs, 30, weights_regularizer=reg)
def generator(z, reuse=False): """ generator Network to produce samples. params: z: Input noise [batch size, latent dimension] returns: x_hat: Artificial image [batch size, 64, 64, 3] """ batch_norm = layers.batch_norm outputs = [] h = z with tf.variable_scope("generator", reuse=reuse) as scope: h = layers.fully_connected(inputs=h, num_outputs=4 * 4 * 1024, activation_fn=tf.nn.relu, normalizer_fn=batch_norm) h = tf.reshape(h, [-1, 4, 4, 1024]) # [4,4,1024] h = layers.conv2d_transpose(inputs=h, num_outputs=512, kernel_size=4, stride=2, activation_fn=tf.nn.relu, normalizer_fn=batch_norm) # [8,8,512] h = layers.conv2d_transpose(inputs=h, num_outputs=256, kernel_size=4, stride=2, activation_fn=tf.nn.relu, normalizer_fn=batch_norm) # [16,16,256] h = layers.conv2d_transpose(inputs=h, num_outputs=128, kernel_size=4, stride=2, activation_fn=tf.nn.relu, normalizer_fn=batch_norm) # This is an extra conv layer like the WGAN folks. h = layers.conv2d(inputs=h, num_outputs=128, kernel_size=4, stride=1, activation_fn=tf.nn.relu, normalizer_fn=batch_norm) # [32,32,128] x_hat = layers.conv2d_transpose(inputs=h, num_outputs=3, kernel_size=4, stride=2, activation_fn=tf.nn.sigmoid, biases_initializer=None) # [64,64,3] return x_hat
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None): """Deep Neural Net and Linear combined model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `Head` instance. * linear_feature_columns: An iterable containing all the feature columns used by the Linear model. * linear_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the Linear model. Defaults to the Ftrl optimizer. * joint_linear_weights: If True a single (possibly partitioned) variable will be used to store the linear model weights. It's faster, but requires all columns are sparse and have the 'sum' combiner. * dnn_feature_columns: An iterable containing all the feature columns used by the DNN model. * dnn_optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training the DNN model. Defaults to the Adagrad optimizer. * dnn_hidden_units: List of hidden units per DNN layer. * dnn_activation_fn: Activation function applied to each DNN layer. If `None`, will use `tf.nn.relu`. * dnn_dropout: When not `None`, the probability we will drop out a given DNN coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_min_slice_size: Optional. The min slice size of input layer partitions. If not provided, will use the default of 64M. config: `RunConfig` object to configure the runtime settings. Returns: `ModelFnOps` Raises: ValueError: If both `linear_feature_columns` and `dnn_features_columns` are empty at the same time. """ head = params["head"] linear_feature_columns = params.get("linear_feature_columns") linear_optimizer = params.get("linear_optimizer") or "Ftrl" joint_linear_weights = params.get("joint_linear_weights") dnn_feature_columns = params.get("dnn_feature_columns") dnn_optimizer = params.get("dnn_optimizer") or "Adagrad" dnn_hidden_units = params.get("dnn_hidden_units") dnn_activation_fn = params.get("dnn_activation_fn") or nn.relu dnn_dropout = params.get("dnn_dropout") gradient_clip_norm = params.get("gradient_clip_norm") input_layer_min_slice_size = ( params.get("input_layer_min_slice_size") or 64 << 20) num_ps_replicas = config.num_ps_replicas if config else 0 embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) fix_global_step_increment_bug = params.get( "fix_global_step_increment_bug", True) if not linear_feature_columns and not dnn_feature_columns: raise ValueError( "Either linear_feature_columns or dnn_feature_columns must be defined.") features = _get_feature_dict(features) # Build DNN Logits. dnn_parent_scope = "dnn" if not dnn_feature_columns: dnn_logits = None else: if not dnn_hidden_units: raise ValueError( "dnn_hidden_units must be defined when dnn_feature_columns is " "specified.") dnn_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas)) with variable_scope.variable_scope( dnn_parent_scope, values=tuple(six.itervalues(features)), partitioner=dnn_partitioner): input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=input_layer_min_slice_size)) with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as dnn_input_scope: net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=dnn_feature_columns, weight_collections=[dnn_parent_scope], scope=dnn_input_scope) for layer_id, num_hidden_units in enumerate(dnn_hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net,)) as dnn_hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=dnn_activation_fn, variables_collections=[dnn_parent_scope], scope=dnn_hidden_layer_scope) if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout( net, keep_prob=(1.0 - dnn_dropout)) # TODO(b/31209633): Consider adding summary before dropout. _add_hidden_layer_summary(net, dnn_hidden_layer_scope.name) with variable_scope.variable_scope( "logits", values=(net,)) as dnn_logits_scope: dnn_logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[dnn_parent_scope], scope=dnn_logits_scope) _add_hidden_layer_summary(dnn_logits, dnn_logits_scope.name) # Build Linear logits. linear_parent_scope = "linear" if not linear_feature_columns: linear_logits = None else: linear_partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=64 << 20) with variable_scope.variable_scope( linear_parent_scope, values=tuple(six.itervalues(features)), partitioner=linear_partitioner) as scope: if joint_linear_weights: linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) else: linear_logits, _, _ = layers.weighted_sum_from_feature_columns( columns_to_tensors=features, feature_columns=linear_feature_columns, num_outputs=head.logits_dimension, weight_collections=[linear_parent_scope], scope=scope) # Combine logits and build full model. if dnn_logits is not None and linear_logits is not None: logits = dnn_logits + linear_logits elif dnn_logits is not None: logits = dnn_logits else: logits = linear_logits def _make_training_op(training_loss): """Training op for the DNN linear combined model.""" train_ops = [] global_step = training_util.get_global_step() if dnn_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_DNN_LEARNING_RATE, optimizer=_get_optimizer(dnn_optimizer), gradient_multipliers=_extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, dnn_parent_scope, dnn_input_scope.name), clip_gradients=gradient_clip_norm, variables=ops.get_collection(dnn_parent_scope), name=dnn_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) if linear_logits is not None: train_ops.append( optimizers.optimize_loss( loss=training_loss, global_step=global_step, learning_rate=_linear_learning_rate(len(linear_feature_columns)), optimizer=_get_optimizer(linear_optimizer), clip_gradients=gradient_clip_norm, variables=ops.get_collection(linear_parent_scope), name=linear_parent_scope, # Empty summaries, because head already logs "loss" summary. summaries=[], increment_global_step=not fix_global_step_increment_bug)) train_op = control_flow_ops.group(*train_ops) if fix_global_step_increment_bug: with ops.control_dependencies([train_op]): with ops.colocate_with(global_step): return state_ops.assign_add(global_step, 1).op return train_op return head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_make_training_op, logits=logits)
import tensorflow as tf from tensorflow.python.ops import array_ops from tensorflow.python.framework import tensor_shape #from tensorflow.python.ops import rnn_cell from tensorflow.contrib import rnn as rnn_cell #from tensorflow.python.ops.rnn_cell import RNNCell, GRUCell, MultiRNNCell, BasicRNNCell from tensorflow.contrib.rnn import RNNCell, GRUCell, MultiRNNCell, BasicRNNCell from tensorflow.python.ops import variable_scope as vs from tensorflow.python.util import nest from tensorflow.python.ops.math_ops import sigmoid, tanh from tensorflow.contrib.layers import fully_connected DEFAULT_FUNC = lambda x, n: fully_connected(x, n, activation_fn=None) DEFAULT_RANK = 2 # WARREN HACK: WHY NOT JUST PUT THE PROTECTED FUNCTION HERE def _state_size_with_prefix(state_size, prefix=None): """Helper function that enables int or TensorShape shape specification. This function takes a size specification, which can be an integer or a TensorShape, and converts it into a list of integers. One may specify any additional dimensions that precede the final state size specification. Args: state_size: TensorShape or int that specifies the size of a tensor. prefix: optional additional list of dimensions to prepend. Returns: result_state_size: list of dimensions the resulting tensor size. """ result_state_size = tensor_shape.as_shape(state_size).as_list()
def dimension_reduction(inputs, num_units=256, scope="dimension_reduction", reuse=None): with tf.variable_scope(scope, reuse=reuse): out = layers.fully_connected(inputs, num_outputs=num_units * 4, activation_fn=tf.nn.relu) out = layers.fully_connected(out, num_outputs=num_units, activation_fn=tf.nn.relu) return out
num_hidden = 2 num_outputs = 30 learning_rate = 0.01 """### Placeholder Creating a placeholder for the data called X """ X = tf.placeholder(tf.float32, shape=[None, 30]) """### Layers Creating the hidden layer and the output layers using the [fully_connected](https://www.tensorflow.org/api_docs/python/tf/contrib/layers/fully_connected) function. """ hidden1 = fully_connected(X, num_hidden, activation_fn=None) output = fully_connected(hidden1, num_outputs, activation_fn=None) """### Loss Function Creating a Mean Squared Error loss function. """ loss = tf.reduce_mean(tf.square(output - X)) """### Optimizer Creating an AdamOptimizer designed to minimize the previous loss function. """ optimizer = tf.train.AdamOptimizer(learning_rate) train = optimizer.minimize(loss) """### Init
def apply(self, is_train, context_embed, answer, context_mask=None): init_fn = get_keras_initialization(self.init) bool_mask = tf.sequence_mask(context_mask, tf.shape(context_embed)[1]) with tf.variable_scope("predict"): m1, m2 = self.mapper.apply(is_train, context_embed, context_mask) if self.pre_process is not None: with tf.variable_scope("pre-process1"): m1 = self.pre_process.apply(is_train, m1, context_mask) with tf.variable_scope("pre-process2"): m2 = self.pre_process.apply(is_train, m2, context_mask) span_vector_lst = [] mask_lst = [] with tf.variable_scope("merge"): span_vector_lst.append(self.merge.apply(is_train, m1, m2)) mask_lst.append(bool_mask) for i in range(1, self.bound): with tf.variable_scope("merge", reuse=True): span_vector_lst.append( self.merge.apply(is_train, m1[:, :-i], m2[:, i:])) mask_lst.append(bool_mask[:, i:]) mask = tf.concat(mask_lst, axis=1) span_vectors = tf.concat( span_vector_lst, axis=1) # all logits -> flattened per-span predictions if self.post_process is not None: with tf.variable_scope("post-process"): span_vectors = self.post_process.apply(is_train, span_vectors) with tf.variable_scope("compute_logits"): logits = fully_connected(span_vectors, 1, activation_fn=None, weights_initializer=init_fn) logits = tf.squeeze(logits, squeeze_dims=[2]) logits = logits + VERY_NEGATIVE_NUMBER * ( 1 - tf.cast(tf.concat(mask, axis=1), tf.float32)) l = tf.shape(context_embed)[1] if len(answer) == 1: answer = answer[0] if answer.dtype == tf.int32: if self.f1_weight == 0: answer_ix = to_packed_coordinates(answer, l, self.bound) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=answer_ix)) else: f1_mask = packed_span_f1_mask(answer, l, self.bound) if self.f1_weight < 1: f1_mask *= self.f1_weight f1_mask += (1 - self.f1_weight) * tf.one_hot( to_packed_coordinates(answer, l, self.bound), l) # TODO can we stay in log space? (actually its tricky since f1_mask can have zeros...) probs = tf.nn.softmax(logits) loss = -tf.reduce_mean( tf.log(tf.reduce_sum(probs * f1_mask, axis=1))) else: log_norm = tf.reduce_logsumexp(logits, axis=1) if self.aggregate == "sum": log_score = tf.reduce_logsumexp( logits + VERY_NEGATIVE_NUMBER * (1 - tf.cast(answer, tf.float32)), axis=1) elif self.aggregate == "max": log_score = tf.reduce_max( logits + VERY_NEGATIVE_NUMBER * (1 - tf.cast(answer, tf.float32)), axis=1) else: raise NotImplementedError() loss = tf.reduce_mean(-(log_score - log_norm)) else: raise NotImplementedError() tf.add_to_collection(tf.GraphKeys.LOSSES, loss) return PackedSpanPrediction(logits, l, self.bound)
import tensorflow as tf n_steps = 28 n_inputs = 28 n_neurons = 150 n_outputs = 10 learning_rate = 0.001 X = tf.placeholder(tf.float32, [None, n_steps, n_inputs]) # 一维输出 y = tf.placeholder(tf.int32, [None]) # 使用最简单的basicRNNcell basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons) #使用dynamic_rnn outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32) # 原始输出 logits = fully_connected(states, n_outputs, activation_fn=None) # 计算和真实的交叉熵 xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits) loss = tf.reduce_mean(xentropy) # 使用AdamOptimizer optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) training_op = optimizer.minimize(loss) # 计算准确率,只有等于y才是对的,其他都错 correct = tf.nn.in_top_k(logits, y, 1) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) init = tf.global_variables_initializer() from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("/tmp/data/") # 转换到合理的输入shape
def logits_Discriminator(self, d_input, reuse=False): in_dims = d_input.get_shape().as_list() hi = d_input if len(in_dims) == 2: hi = tf.expand_dims(d_input, -1) elif len(in_dims) < 2 or len(in_dims) > 3: raise ValueError('Discriminator input must be 2-D or 3-D') with tf.variable_scope('Discriminator'): with tf.variable_scope(self.name) as scope: if reuse: scope.reuse_variables() def disc_block(block_idx, input_, kwidth, nfmaps, bnorm, activation, pooling=2): with tf.variable_scope('d_block_{}'.format(block_idx)): bias_init = None if self.bias_D_conv: bias_init = tf.constant_initializer(0.) downconv_init = \ tf.truncated_normal_initializer(stddev=0.02) # downconvolution hi_a = downconv(input_, nfmaps, kwidth=kwidth, pool=pooling, init=downconv_init, bias_init=bias_init) # VBN # activation if activation == 'leakyrelu': hi = leakyrelu(hi_a) elif activation == 'relu': hi = tf.nn.relu(hi_a) else: raise ValueError('Unrecognized activation {}' 'in D'.format(activation)) return hi # [removed] apply input noisy layer to real and fake samples for block_idx, fmaps in enumerate(self.d_num_fmaps): hi = disc_block(block_idx, hi, 31, self.d_num_fmaps[block_idx], True, 'leakyrelu') if not reuse: print('Discriminator deconved shape: ', hi.get_shape()) #hi_f = flatten(hi) d_logit_out = conv1d( hi, kwidth=1, num_kernels=1, init=tf.truncated_normal_initializer(stddev=0.02), name='logits_conv') d_logit_out = tf.squeeze(d_logit_out) d_logit_out = tf.expand_dims(d_logit_out, 1) d_logit_out = fully_connected(d_logit_out, 1, activation_fn=None) if not reuse: print('Discriminator output shape: ', d_logit_out.get_shape()) print('*****************************') return d_logit_out
def discriminator(wave_in, reuse=True): hi = wave_in # hi = tf.expand_dims(wave_in, -1) # batch_size = int(wave_in.get_shape()[0]) # set up the disc_block function with tf.variable_scope('d_model') as scope: if reuse: scope.reuse_variables() def disc_block(block_idx, input_, kwidth, nfmaps, bnorm, activation, name, pooling=2): with tf.variable_scope('d_block_{}'.format(block_idx)): if not reuse: print('D block {} input shape: {}' ''.format(block_idx, input_.get_shape()), end=' *** ') bias_init = None if bias_D_conv: if not reuse: print('biasing D conv', end=' *** ') bias_init = tf.constant_initializer(0.) downconv_init = tf.truncated_normal_initializer(stddev=0.02) ########################################## hi_a = downconv(input_, nfmaps, kwidth=kwidth, pool=pooling, init=downconv_init, bias_init=bias_init, name=name) ########################################## if not reuse: print('downconved shape: {} ' ''.format(hi_a.get_shape()), end=' *** ') # if bnorm: # if not reuse: # print('Applying VBN', end=' *** ') # hi_a = vbn(hi_a, 'd_vbn_{}'.format(block_idx)) if activation == 'leakyrelu': if not reuse: print('Applying Lrelu', end=' *** ') hi = leakyrelu(hi_a) elif activation == 'relu': if not reuse: print('Applying Relu', end=' *** ') hi = tf.nn.relu(hi_a) else: raise ValueError('Unrecognized activation {} ' 'in D'.format(activation)) return hi #%% # beg_size = canvas_size # apply input noisy layer to real and fake samples hi = gaussian_noise_layer(hi, disc_noise_std) if not reuse: print('*** Discriminator summary ***') for block_idx, fmaps in enumerate(d_num_fmaps): hi = disc_block(block_idx, hi, 31, d_num_fmaps[block_idx], False, 'leakyrelu', name='db_{}_{}'.format(block_idx,fmaps)) if not reuse: print() if not reuse: print('discriminator deconved shape: ', hi.get_shape()) hi_f = flatten(hi) #keeps batch size, flatten everything else #hi_f = tf.nn.dropout(hi_f, self.keep_prob_var) d_logit_out = conv1d(hi, kwidth=1, num_kernels=1, init=tf.truncated_normal_initializer(stddev=0.02), name='logits_conv') d_logit_out = tf.squeeze(d_logit_out) #removes dimensions of 1 # all logits connected to 1 single neuron for binary classification d_logit_out = fully_connected(d_logit_out, 1, activation_fn=None) if not reuse: print('discriminator output shape: ', d_logit_out.get_shape()) print('*****************************') return d_logit_out
def __init__(self): self.session = tf.Session() # input/output placeholders self.inputs_articles = tf.placeholder(tf.float32, (None, 200, INPUT_SIZE), name='input_articles') self.inputs_headlines = tf.placeholder(tf.float32, (None, 30, INPUT_SIZE), name='inputs_headlines') self.outputs = tf.placeholder( tf.float32, (None, OUTPUT_SIZE), name='outputs') # TODO change to two dimensions # LSTM cells, TODO make these bidrectional! with tf.variable_scope('scope1') as scope1: # Create cell self.cell_articles = tf.contrib.rnn.BasicLSTMCell( RNN_HIDDEN, state_is_tuple=True) self.cell_articles = tf.contrib.rnn.core_rnn_cell.DropoutWrapper( self.cell_articles, input_keep_prob=0.7, output_keep_prob=0.2) # Initialize batch size, initial states batch_size_articles = tf.shape(self.inputs_articles)[0] initial_state_articles = self.cell_articles.zero_state( batch_size_articles, tf.float32) # Hidden states, outputs self.rnn_outputs_articles, self.rnn_states_articles = tf.nn.dynamic_rnn( self.cell_articles, self.inputs_articles, initial_state=initial_state_articles, time_major=False) with tf.variable_scope('scope1') as scope1: scope1.reuse_variables() # Create cell self.cell_headlines = tf.contrib.rnn.BasicLSTMCell( RNN_HIDDEN, state_is_tuple=True, reuse=True) self.cell_headlines = tf.contrib.rnn.core_rnn_cell.DropoutWrapper( self.cell_headlines, input_keep_prob=0.7, output_keep_prob=0.2) # Initialize batch size, initial states batch_size_headlines = tf.shape(self.inputs_headlines)[0] initial_state_headlines = self.rnn_states_articles # Hidden states, outputs self.rnn_outputs_headlines, self.rnn_states_headlines = tf.nn.dynamic_rnn( self.cell_headlines, self.inputs_headlines, initial_state=initial_state_headlines, time_major=False) # make prediction by taking LAST rnn_outputs_articles and rnn_outputs_headlines # TODO: Take padding out from output self.rnn_outputs = tf.concat([ self.rnn_outputs_articles[:, -1, :], self.rnn_outputs_headlines[:, -1, :] ], 1) #self.rnn_outputs = tf.concat([self.rnn_outputs_articles, self.rnn_outputs_headlines], 1) predicted_outputs = layers.fully_connected(self.rnn_outputs, num_outputs=OUTPUT_SIZE, activation_fn=tf.nn.sigmoid) #final_projection = lambda x: layers.fully_connected(x, num_outputs=OUTPUT_SIZE, activation_fn=tf.nn.sigmoid) #self.softmaxes = tf.nn.softmax(predicted_outputs) #self.pred_stance = tf.argmax(self.softmaxes, 1) # cross entropy loss TODO compute cross entropy between softmax and expected output (a one-hot vector) # TODO: TF cross entropy function yooooo #self.error = -(self.outputs * tf.log(self.softmaxes + TINY) + (1.0 - self.outputs) * tf.log(1.0 - self.softmaxes + TINY)) #self.error = tf.reduce_mean(self.error) # reformat self.outputs (labels) #outputs_reformat = tf.argmax(self.outputs, axis = 1) self.error = tf.nn.softmax_cross_entropy_with_logits( labels=self.outputs, logits=predicted_outputs) self.train_fn = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE, name='train_fn').minimize( self.error)
''' multi_layer_cell = tf.contrib.rnn.MultiRNNCell([ tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons) for _ in range(n_layers) ]) multi_layer_cell_reversed = tf.contrib.rnn.MultiRNNCell([ tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons) for _ in range(n_layers) ]) rnn_outputs, states = tf.nn.bidirectional_dynamic_rnn( multi_layer_cell, multi_layer_cell_reversed, X, dtype=tf.float32) rnn_outputs_fw, rnn_outputs_bw = rnn_outputs stacked_rnn_outputs_fw = tf.reshape(rnn_outputs_fw, [-1, n_neurons]) stacked_rnn_outputs_bw = tf.reshape(rnn_outputs_bw, [-1, n_neurons]) stacked_rnn_outputs = tf.add(stacked_rnn_outputs_fw, stacked_rnn_outputs_bw) stacked_outputs = fully_connected(stacked_rnn_outputs, n_outputs, activation_fn=None) # stacked_outputs = fully_connected(stacked_rnn_outputs,n_outputs) #This will force the output to be positive numbers outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs]) learning_rate = 0.001 #loss = tf.nn.l2_loss(tf.reduce_mean(tf.square(outputs-y))+ sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))) loss = tf.add_n([tf.reduce_mean(tf.square(outputs - y))] + tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) training_op = optimizer.minimize(loss) init = tf.global_variables_initializer() n_iterations = 10000
def alexnet_v2(inputs, is_training=True, emb_size=4096, dropout_keep_prob=0.5, scope='alexnet_v2'): inputs = tf.cast(inputs, tf.float32) if new_shape is not None: shape = new_shape inputs = tf.image.resize_images( inputs, tf.constant(new_shape[:2]), method=tf.image.ResizeMethod.BILINEAR) else: shape = img_shape if is_training and augmentation_function is not None: inputs = augmentation_function(inputs, shape) if image_summary: tf.summary.image('Inputs', inputs, max_outputs=3) net = inputs mean = tf.reduce_mean(net, [1, 2], True) std = tf.reduce_mean(tf.square(net - mean), [1, 2], True) net = (net - mean) / (std + 1e-5) inputs = net with variable_scope.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with arg_scope([ layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d ], outputs_collections=[end_points_collection]): net = layers.conv2d(inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1') net = layers.conv2d(net, 192, [5, 5], scope='conv2') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2') net = layers.conv2d(net, 384, [3, 3], scope='conv3') net = layers.conv2d(net, 384, [3, 3], scope='conv4') net = layers.conv2d(net, 256, [3, 3], scope='conv5') net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5') net = slim.flatten(net, scope='flatten') # Use conv2d instead of fully_connected layers. with arg_scope( [slim.fully_connected], weights_initializer=trunc_normal(0.005), biases_initializer=init_ops.constant_initializer(0.1)): net = layers.fully_connected(net, 4096, scope='fc6') net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = layers.fully_connected(net, emb_size, scope='fc7') return net
def inference(images, size,labels, training_nn, training_class, _reuse): # # batch_norm_decay = 0.9 batch_norm_epsilon = 1e-5 batch_norm_scale = True batch_norm_params = { 'is_training': training_nn, 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon, 'scale': batch_norm_scale, 'updates_collections': None, # } with arg_scope( [layers.conv2d], kernel_size = 3, weights_initializer = tf.random_normal_initializer(stddev=0.02), biases_initializer = tf.constant_initializer(0.0), activation_fn=tf.nn.elu, normalizer_fn=layers.batch_norm, normalizer_params=batch_norm_params, trainable = training_nn, reuse=_reuse, padding='SAME', stride=1): conv0 = layers.conv2d(images,num_outputs = 64, scope='SecondAMIN/conv0') with tf.name_scope('convBlock-1') as scope: conv1 = layers.conv2d(conv0,num_outputs = 128, scope='SecondAMIN/conv1') bconv1 = layers.conv2d(conv1,num_outputs = 196, scope='SecondAMIN/bconv1') conv2 = layers.conv2d(bconv1, num_outputs = 128, scope='SecondAMIN/conv2') pool1 = layers.max_pool2d(conv2, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='SecondAMIN/pool1') _activation_summary(conv1) _activation_summary(bconv1) _activation_summary(conv2) with tf.name_scope('convBlock-2') as scope: conv3 = layers.conv2d(pool1, num_outputs = 128, scope='SecondAMIN/conv3') bconv2 = layers.conv2d(conv3, num_outputs = 196, scope='SecondAMIN/bconv2') conv4 = layers.conv2d(bconv2, num_outputs = 128, scope='SecondAMIN/conv4') pool2 = layers.max_pool2d(conv4, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='SecondAMIN/pool2') _activation_summary(conv3) _activation_summary(bconv2) _activation_summary(conv4) with tf.name_scope('convBlock-3') as scope: conv5 = layers.conv2d(pool2, num_outputs = 128, scope='SecondAMIN/conv5') bconv3 = layers.conv2d(conv5, num_outputs = 196, scope='SecondAMIN/bconv3') conv6 = layers.conv2d(bconv3, num_outputs = 128, scope='SecondAMIN/conv6') pool3 = layers.avg_pool2d(conv6, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='SecondAMIN/pool3') _activation_summary(conv5) _activation_summary(bconv3) _activation_summary(conv6) map1 = tf.image.resize_images(pool1,[32,32]) map2 = tf.image.resize_images(pool2,[32,32]) map3 = tf.image.resize_images(pool3,[32,32]) summap = tf.concat([map1, map2, map3],3) # with tf.name_scope('Depth-Map-Block') as scope: conv7 = layers.conv2d(summap, num_outputs = 128, scope='SecondAMIN/conv7') dp1 = tf.layers.dropout(conv7,rate = 0.2, training = training_nn, name = 'SecondAMIN/dropout1') conv8 = layers.conv2d(dp1, num_outputs = 64, scope='SecondAMIN/conv8') _activation_summary(conv7) _activation_summary(conv8) with arg_scope( [layers.conv2d], kernel_size = 3, weights_initializer = tf.random_normal_initializer(stddev=0.02), biases_initializer = tf.constant_initializer(0.0), activation_fn= None, normalizer_fn= None, padding='SAME', trainable = training_nn, reuse=_reuse, stride=1): # conv11 = layers.conv2d(conv8, num_outputs = 1, scope='SecondAMIN/conv11') _activation_summary(conv11) tf.summary.image('depthMap_Second', conv11, max_outputs=FLAGS.batch_size) with arg_scope( [layers.conv2d], kernel_size = 3, weights_initializer = tf.random_normal_initializer(stddev=0.02), biases_initializer = tf.constant_initializer(0.0), activation_fn=tf.nn.elu, normalizer_fn=layers.batch_norm, normalizer_params=batch_norm_params, trainable = training_nn, reuse=_reuse, padding='SAME', stride=1): conv0_fir = layers.conv2d(images,num_outputs = 24, scope='FirstAMIN/conv0') # pool1_fir = layers.max_pool2d(conv0_fir, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='FirstAMIN/pool1') with tf.name_scope('convBlock-1_fir') as scope: conv1_fir = layers.conv2d(pool1_fir,num_outputs = 20, scope='FirstAMIN/conv1')# bconv1_fir = layers.conv2d(conv1_fir,num_outputs = 25, scope='FirstAMIN/bconv1')# conv2_fir = layers.conv2d(bconv1_fir, num_outputs = 20, scope='FirstAMIN/conv2')# with tf.name_scope('convBlock-2_fir') as scope: pool2_fir = layers.max_pool2d(conv2_fir, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='FirstAMIN/pool2') conv3_fir = layers.conv2d(pool2_fir, num_outputs = 20, scope='FirstAMIN/conv3') bconv2_fir = layers.conv2d(conv3_fir, num_outputs = 25, scope='FirstAMIN/bconv2') conv4_fir = layers.conv2d(bconv2_fir, num_outputs = 20, scope='FirstAMIN/conv4') with tf.name_scope('convBlock-3_fir') as scope: pool3_fir = layers.avg_pool2d(conv4_fir, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='FirstAMIN/pool3') conv5_fir = layers.conv2d(pool3_fir, num_outputs = 20, scope='FirstAMIN/conv5') bconv3_fir = layers.conv2d(conv5_fir, num_outputs = 25, scope='FirstAMIN/bconv3') conv6_fir = layers.conv2d(bconv3_fir, num_outputs = 20, scope='FirstAMIN/conv6') map1_fir = tf.image.resize_images(conv2_fir,[32,32]) map2_fir = tf.image.resize_images(conv4_fir,[32,32]) map3_fir = conv6_fir summap_fir = tf.concat([map1_fir, map2_fir, map3_fir],3) # with tf.name_scope('Depth-Map-Block_fir') as scope: conv7_fir = layers.conv2d(summap_fir, num_outputs = 28, scope='FirstAMIN/conv7') dp1_fir = tf.layers.dropout(conv7_fir,rate = 0, training = training_nn, name = 'FirstAMIN/dropout2') conv8_fir = layers.conv2d(dp1_fir, num_outputs =16 , scope='FirstAMIN/conv8') with arg_scope( [layers.conv2d], kernel_size = 3, weights_initializer = tf.random_normal_initializer(stddev=0.02), biases_initializer = None, # activation_fn= None, normalizer_fn= None, padding='SAME', reuse=_reuse, stride=1): # conv11_fir = layers.conv2d(conv8_fir, num_outputs = 1, scope='FirstAMIN/conv11') tf.summary.image('ZeroOneMap', tf.cast(256*conv11_fir,tf.uint8), max_outputs=FLAGS.batch_size) with arg_scope( [layers.conv2d], kernel_size = 3, weights_initializer = tf.random_normal_initializer(stddev=0.02), biases_initializer = tf.constant_initializer(0.0), activation_fn=tf.nn.elu, normalizer_fn=layers.batch_norm, normalizer_params=batch_norm_params, trainable = training_nn, padding='SAME', reuse=_reuse, stride=1): # with tf.name_scope('Score-Map-Block09') as scope: summap_fir = tf.image.resize_images(summap_fir,[256,256]) conv9_fir = layers.conv2d(summap_fir, num_outputs = 28, scope='FirstAMIN/conv9') conv10_fir = layers.conv2d(conv9_fir, num_outputs = 24, scope='FirstAMIN/conv10') # conv12_fir = layers.conv2d(conv10_fir, num_outputs = 20, scope='FirstAMIN/conv12') conv13_fir = layers.conv2d(conv12_fir, num_outputs = 20, scope='FirstAMIN/conv13') # conv14_fir = layers.conv2d(conv13_fir, num_outputs = 20, scope='FirstAMIN/conv14') conv15_fir = layers.conv2d(conv14_fir, num_outputs = 16, scope='FirstAMIN/conv15') # conv16_fir = layers.conv2d(conv15_fir, num_outputs = 16, scope='FirstAMIN/conv16') with arg_scope( [layers.conv2d], kernel_size = 3, weights_initializer = tf.random_normal_initializer(stddev=0.002), biases_initializer = None, #tf.constant_initializer(0.0), activation_fn= None, normalizer_fn= None, padding='SAME', reuse=_reuse, stride=1): conv17 = layers.conv2d(conv16_fir, num_outputs = 6, scope='FirstAMIN/conv17') thirdPart_comp_1 = tf.complex(conv17, tf.zeros_like(conv17)) thirdPart_comp_1=tf.transpose(thirdPart_comp_1, perm=[0,3,1,2]) thirdPart_fft_1=tf.abs(tf.fft2d(thirdPart_comp_1, name='summap_fft_real_1')) thirdPart_fft_1=tf.transpose(thirdPart_fft_1, perm=[0,2,3,1]) thirdPart_fft_1=tf.log1p(thirdPart_fft_1[:,32:256-32,32:256-32,:]) # Live_est1= images-conv17/45 Live_est_mask = tf.cast(tf.greater(Live_est1,0),tf.float32) Live_est=Live_est1*Live_est_mask # ################################################################################################################################# with arg_scope( [layers.conv2d], kernel_size = 3, weights_initializer = tf.random_normal_initializer(stddev=0.02), biases_initializer = tf.constant_initializer(0.0), activation_fn=tf.nn.elu, normalizer_fn=layers.batch_norm, normalizer_params=batch_norm_params, trainable = training_nn, padding='SAME', reuse=_reuse, stride=1): # Score Map Branch with tf.name_scope('Score-Map-Block1_dis') as scope: conv9_dis = layers.conv2d(Live_est, num_outputs = 24, scope='ThirdAMIN/conv9') conv10_dis = layers.conv2d(conv9_dis, num_outputs = 20, scope='ThirdAMIN/conv10') pool1_dis = layers.max_pool2d(conv10_dis, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='ThirdPool1') conv12_dis = layers.conv2d(pool1_dis, num_outputs = 20, scope='ThirdAMIN/conv12') conv13_dis = layers.conv2d(conv12_dis, num_outputs = 16, scope='ThirdAMIN/conv13') pool2_dis = layers.max_pool2d(conv13_dis, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='ThirdPool2') conv14_dis = layers.conv2d(pool2_dis, num_outputs = 12, scope='ThirdAMIN/conv14') conv15_dis = layers.conv2d(conv14_dis, num_outputs = 6, scope='ThirdAMIN/conv15') pool3_dis = layers.max_pool2d(conv15_dis, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='ThirdPool3') conv16_dis = layers.conv2d(pool3_dis, num_outputs = 1, scope='ThirdAMIN/conv16') conv20_dis=tf.reshape(conv16_dis, [6,32*32]) sc333_dis = layers.fully_connected(conv20_dis, num_outputs = 100, reuse=_reuse, scope='ThirdAMIN/bconv15_sc333_dis') dp1_dis = tf.layers.dropout(sc333_dis,rate = 0.2, training = training_nn, name = 'dropout3') sc = layers.fully_connected(dp1_dis, num_outputs = 2, reuse=_reuse, weights_initializer = tf.random_normal_initializer(stddev=0.02), biases_initializer = None, #tf.constant_initializer(0.0), activation_fn= None, normalizer_fn= None,scope='ThirdAMIN/bconv10_sc') conv9_dis2 = layers.conv2d(images, num_outputs = 24, reuse= True, scope='ThirdAMIN/conv9') conv10_dis2 = layers.conv2d(conv9_dis2, num_outputs = 20, reuse= True, scope='ThirdAMIN/conv10') pool1_dis2 = layers.max_pool2d(conv10_dis2, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='ThirdPool1') conv12_dis2 = layers.conv2d(pool1_dis2, num_outputs = 20,reuse= True, scope='ThirdAMIN/conv12') conv13_dis2 = layers.conv2d(conv12_dis2, num_outputs = 16, reuse= True, scope='ThirdAMIN/conv13') pool2_dis2 = layers.max_pool2d(conv13_dis2, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='ThirdPool2') conv14_dis2 = layers.conv2d(pool2_dis2, num_outputs = 12, reuse= True, scope='ThirdAMIN/conv14') conv15_dis2 = layers.conv2d(conv14_dis2, num_outputs = 6, reuse= True, scope='ThirdAMIN/conv15') pool3_dis2 = layers.max_pool2d(conv15_dis2, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='ThirdPool3') conv16_dis2 = layers.conv2d(pool3_dis2, num_outputs = 1, reuse= True, scope='ThirdAMIN/conv16') conv20_dis2=tf.reshape(conv16_dis2, [6,32*32]) sc333_dis2 = layers.fully_connected(conv20_dis2, reuse= True, num_outputs = 100,scope='ThirdAMIN/bconv15_sc333_dis') dp1_dis2 = tf.layers.dropout(sc333_dis2,rate = 0.2, training = training_nn, name = 'dropout4') sc2 = layers.fully_connected(dp1_dis2, num_outputs = 2, reuse= True, weights_initializer = tf.random_normal_initializer(stddev=0.02), biases_initializer = None, #tf.constant_initializer(0.0), activation_fn= None, normalizer_fn= None,scope='ThirdAMIN/bconv10_sc') ################################################################################################################################## batch_norm_decay = 0.9 batch_norm_epsilon = 1e-5 batch_norm_scale = True batch_norm_params = { 'is_training': False, 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon, 'scale': batch_norm_scale, 'updates_collections': None, # 'trainable':False, #'reuse':True } with arg_scope( [layers.conv2d], kernel_size = 3, weights_initializer = tf.random_normal_initializer(stddev=0.02), biases_initializer = tf.constant_initializer(0.0), activation_fn=tf.nn.elu, normalizer_fn=layers.batch_norm, normalizer_params=batch_norm_params, trainable = False, padding='SAME', reuse=True, stride=1): ################################################################################################################################# conv0_new = layers.conv2d(Live_est,num_outputs = 64, scope='SecondAMIN/conv0') with tf.name_scope('convBlock-1_new') as scope: conv1_new = layers.conv2d(conv0_new,num_outputs = 128, scope='SecondAMIN/conv1') bconv1_new = layers.conv2d(conv1_new,num_outputs = 196, scope='SecondAMIN/bconv1') conv2_new = layers.conv2d(bconv1_new, num_outputs = 128, scope='SecondAMIN/conv2') pool1_new = layers.max_pool2d(conv2_new, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='SecondAMIN/pool1') with tf.name_scope('convBlock-2_new') as scope: conv3_new = layers.conv2d(pool1_new, num_outputs = 128, scope='SecondAMIN/conv3') bconv2_new = layers.conv2d(conv3_new, num_outputs = 196, scope='SecondAMIN/bconv2') conv4_new = layers.conv2d(bconv2_new, num_outputs = 128, scope='SecondAMIN/conv4') pool2_new = layers.max_pool2d(conv4_new, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='SecondAMIN/pool2') with tf.name_scope('convBlock-3_new') as scope: conv5_new = layers.conv2d(pool2_new, num_outputs = 128, scope='SecondAMIN/conv5') bconv3_new = layers.conv2d(conv5_new, num_outputs = 196, scope='SecondAMIN/bconv3') conv6_new = layers.conv2d(bconv3_new, num_outputs = 128, scope='SecondAMIN/conv6') pool3_new = layers.avg_pool2d(conv6_new, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='SecondAMIN/pool3') map1_new = tf.image.resize_images(pool1_new,[32,32]) map2_new = tf.image.resize_images(pool2_new,[32,32]) map3_new = tf.image.resize_images(pool3_new,[32,32]) summap_new = tf.concat([map1_new, map2_new, map3_new],3) # Depth Map Branch with tf.name_scope('Depth-Map-Block_new') as scope: conv7_new = layers.conv2d(summap_new, num_outputs = 128, scope='SecondAMIN/conv7') dp1_new = tf.layers.dropout(conv7_new,rate = 0.2, training = training_nn, name = 'SecondAMIN/dropout1') conv8_new = layers.conv2d(dp1_new, num_outputs = 64, scope='SecondAMIN/conv8') with arg_scope( [layers.conv2d], kernel_size = 3, weights_initializer = tf.random_normal_initializer(stddev=0.02), biases_initializer = tf.constant_initializer(0.0), activation_fn= None, normalizer_fn= None, padding='SAME', trainable = False, reuse=True, stride=1): # Depth Map Branch conv11_new = layers.conv2d(conv8_new, num_outputs = 1, scope='SecondAMIN/conv11') label_Amin1=size LabelsWholeImage=tf.cast(np.ones([6,32,32,1]), tf.float32) LabelsWholeImage2=LabelsWholeImage*tf.reshape(tf.cast(1-label_Amin1,tf.float32),[6,1,1,1]) LabelsWholeImage=labels*tf.reshape(tf.cast(label_Amin1,tf.float32),[6,1,1,1]) Z_GT2=np.zeros([6,3,3,1]) Z_GT2[:,1,1,:]=1 GT2=tf.cast(Z_GT2, tf.float32) tf.summary.image('GT2', LabelsWholeImage[:,:,:,0:1], max_outputs=FLAGS.batch_size) tf.summary.image('SC', tf.cast(256*conv11[:,:,:,0:1],tf.uint8), max_outputs=FLAGS.batch_size) tf.summary.image('Live_SC', tf.cast(256*conv11_new[:,:,:,0:1],tf.uint8), max_outputs=FLAGS.batch_size) tf.summary.image('Live', tf.cast(256*Live_est[:,:,:,3:6],tf.uint8), max_outputs=FLAGS.batch_size) tf.summary.image('inputImage', tf.cast(256*images[:,:,:,3:6],tf.uint8), max_outputs=FLAGS.batch_size) tf.summary.image('GT3_Artifact', LabelsWholeImage2[:,:,:,0:1], max_outputs=FLAGS.batch_size) tf.summary.image('Artifact', conv17[:,:,:,3:6], max_outputs=FLAGS.batch_size) return Live_est, conv17, conv11, GT2,conv17,images,thirdPart_fft_1,LabelsWholeImage, conv11_new,conv11_new , LabelsWholeImage2, sc, sc2, conv11_fir
if activation == "relu": return tf.nn.relu(z) else: return z with tf.name_scope("dnn"): hidden1 = neuron_layer(X, n_hidden1, "hidden1", activation="relu") hidden2 = neuron_layer(hidden1, n_hidden2, "hidden2", activation="relu") # 进入到softmax之前的结果 logits = neuron_layer(hidden2, n_outputs, "outputs") ''' with tf.name_scope("dnn"): # tensorflow使用这个函数帮助我们使用合适的初始化w和b的策略,默认使用ReLU激活函数 hidden1 = fully_connected(X, n_hidden1, scope="hidden1") hidden2 = fully_connected(hidden1, n_hidden2, scope="hidden2") logits = fully_connected(hidden2, n_outputs, scope="outputs", activation_fn=None) with tf.name_scope("loss"): # 定义交叉熵损失函数,并且求个样本平均 # 函数等价于先使用softmax损失函数,再接着计算交叉熵,并且更有效率 # 类似的softmax_cross_entropy_with_logits只会给one-hot编码,我们使用的会给0-9分类号 xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits) loss = tf.reduce_mean(xentropy, name="loss") learning_rate = 0.01 with tf.name_scope("train"): optimizer = tf.train.GradientDescentOptimizer(learning_rate) training_op = optimizer.minimize(loss)
n_hidden1 = 300 n_hidden2 = 150 n_hidden3 = 300 n_outputs = n_inputs learning_rate = 0.001 # 定义梯度优化学习率参数 l2_reg = 0.01 # 开始构建网络模型: 基于mlp的自编码模型 x = tf.placeholder(tf.float32, shape=(None, n_inputs), name="input_tensor") with tf.contrib.framework.arg_scope( [fully_connected], activation_fn=tf.nn.elu, weights_initializer=tf.contrib.layers.variance_scaling_initializer(), weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)): hidden1 = fully_connected(x, n_hidden1) hidden2 = fully_connected(hidden1, n_hidden2) hidden3 = fully_connected(hidden2, n_hidden3) outputs = fully_connected(hidden3, n_outputs, activation_fn=None) reconstruction_loss = tf.reduce_mean(tf.square(outputs - x)) # loss function MSE reg_losses = tf.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES) loss = tf.add_n([reconstruction_loss] + reg_losses) optimizer = tf.train.AdamOptimizer(learning_rate) train_op = optimizer.minimize(loss) init = tf.global_variables_initializer() # 开始构建训练的session过程 n_epochs = 5
def main(unused_argv): mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) if FLAGS.download_only: sys.exit(0) # Sanity check on the number of workers and the worker index # if FLAGS.task_index >= FLAGS.num_workers: raise ValueError("Worker index %d exceeds number of workers %d " % (FLAGS.task_index, FLAGS.num_workers)) # Sanity check on the number of parameter servers # if FLAGS.num_parameter_servers <= 0: raise ValueError("Invalid num_parameter_servers value: %d" % FLAGS.num_parameter_servers) ps_hosts = re.findall(r'[\w\.:]+', FLAGS.ps_hosts) worker_hosts = re.findall(r'[\w\.:]+', FLAGS.worker_hosts) server = tf.train.Server({"ps":ps_hosts,"worker":worker_hosts}, job_name=FLAGS.job_name, task_index=FLAGS.task_index) print("GRPC URL: %s" % server.target) print("Task index = %d" % FLAGS.task_index) print("Number of workers = %d" % FLAGS.num_workers) if FLAGS.job_name == "ps": server.join() else: is_chief = (FLAGS.task_index == 0) if FLAGS.sync_replicas: if FLAGS.replicas_to_aggregate is None: replicas_to_aggregate = FLAGS.num_workers else: replicas_to_aggregate = FLAGS.replicas_to_aggregate # Construct device setter object # device_setter = get_device_setter(FLAGS.num_parameter_servers, FLAGS.num_workers) # The device setter will automatically place Variables ops on separate # # parameter servers (ps). The non-Variable ops will be placed on the workers. # with tf.device(device_setter): global_step = tf.Variable(0, name="global_step", trainable=False) with tf.name_scope('input'): # input # x = tf.placeholder(tf.float32, shape=[None, 784], name="x-input") x_image = tf.reshape(x, [-1,28,28,1]) # label, 10 output classes # y_ = tf.placeholder(tf.float32, shape=[None, 10], name="y-input") prob = tf.placeholder(tf.float32, name='keep_prob') stack1_conv1 = layers.convolution2d(x_image, 64, [3,3], weights_regularizer=layers.l2_regularizer(0.1), biases_regularizer=layers.l2_regularizer(0.1), scope='stack1_Conv1') stack1_conv2 = layers.convolution2d(stack1_conv1, 64, [3,3], weights_regularizer=layers.l2_regularizer(0.1), biases_regularizer=layers.l2_regularizer(0.1), scope='stack1_Conv2') stack1_pool = layers.max_pool2d(stack1_conv2, [2,2], padding='SAME', scope='stack1_Pool') stack3_pool_flat = layers.flatten(stack1_pool, scope='stack3_pool_flat') fcl1 = layers.fully_connected(stack3_pool_flat, 512, weights_regularizer=layers.l2_regularizer(0.1), biases_regularizer=layers.l2_regularizer(0.1), scope='FCL1') fcl1_d = layers.dropout(fcl1, keep_prob=prob, scope='dropout1') fcl2 = layers.fully_connected(fcl1_d, 128, weights_regularizer=layers.l2_regularizer(0.1), biases_regularizer=layers.l2_regularizer(0.1), scope='FCL2') fcl2_d = layers.dropout(fcl2, keep_prob=prob, scope='dropout2') y, cross_entropy = skflow.models.logistic_regression(fcl2_d, y_, init_stddev=0.01) tf.scalar_summary('cross_entropy', cross_entropy) with tf.name_scope('train'): start_l_rate = 0.001 decay_step = 1000 decay_rate = 0.5 learning_rate = tf.train.exponential_decay(start_l_rate, global_step, decay_step, decay_rate, staircase=False) grad_op = tf.train.RMSPropOptimizer(learning_rate=learning_rate) '''rep_op = tf.train.SyncReplicasOptimizer(grad_op, replicas_to_aggregate=len(workers), replica_id=FLAGS.task_index, total_num_replicas=len(workers)) # also belong to the same class as other optimizers''' train_op = tf.contrib.layers.optimize_loss(loss=cross_entropy, global_step=global_step, learning_rate=0.001, optimizer=grad_op, clip_gradients=1) tf.scalar_summary('learning_rate', learning_rate) '''if FLAGS.sync_replicas and is_chief: # Initial token and chief queue runners required by the sync_replicas mode # chief_queue_runner = opt.get_chief_queue_runner() init_tokens_op = opt.get_init_tokens_op()''' with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.scalar_summary('accuracy', accuracy) merged = tf.merge_all_summaries() init_op = tf.initialize_all_variables() sv = tf.train.Supervisor(is_chief=is_chief, init_op=init_op, recovery_wait_secs=1, global_step=global_step) sess_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False, device_filters=["/job:ps", "/job:worker/task:%d" % FLAGS.task_index]) # The chief worker (task_index==0) session will prepare the session, # # while the remaining workers will wait for the preparation to complete. # if is_chief: print("Worker %d: Initializing session..." % FLAGS.task_index) else: print("Worker %d: Waiting for session to be initialized..." % FLAGS.task_index) sess = sv.prepare_or_wait_for_session(server.target, config=sess_config) if tf.gfile.Exists('./summary/train'): tf.gfile.DeleteRecursively('./summary/train') tf.gfile.MakeDirs('./summary/train') train_writer = tf.train.SummaryWriter('./summary/train', sess.graph) print("Worker %d: Session initialization complete." % FLAGS.task_index) '''if FLAGS.sync_replicas and is_chief: # Chief worker will start the chief queue runner and call the init op # print("Starting chief queue runner and running init_tokens_op") sv.start_queue_runners(sess, [chief_queue_runner]) sess.run(init_tokens_op)''' ## Perform training ## time_begin = time.time() print("Training begins @ %s" % time.ctime(time_begin)) local_step = 1 while True: # Training feed # batch_xs, batch_ys = mnist.train.next_batch(FLAGS.batch_size) train_feed = {x: batch_xs, y_: batch_ys, prob: 0.8} run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _, step, loss, summary = sess.run([train_op, global_step, cross_entropy, merged], feed_dict=train_feed, options=run_options, run_metadata=run_metadata) now = time.time() if(local_step % 2 == 0): print("%s: Worker %d: training step %d done (global step: %d), loss: %.6f" % (time.ctime(now), FLAGS.task_index, local_step, step+1, loss)) train_writer.add_run_metadata(run_metadata, 'step'+str(step+1)) train_writer.add_summary(summary, step+1) if step+1 >= FLAGS.train_steps: break local_step += 1 time_end = time.time() print("Training ends @ %s" % time.ctime(time_end)) training_time = time_end - time_begin print("Training elapsed time: %f s" % training_time) # memory issue occured, split testing data into batch # acc_acu = 0. for i in xrange(int(10000/1000)): test_x, test_y = mnist.test.next_batch(1000) acc_batch = sess.run(accuracy, feed_dict={x: test_x, y_: test_y, prob: 1.0}) print(acc_batch) acc_acu += acc_batch acc = acc_acu/10.0 print ("test accuracy %g" % acc) sv.stop()
with tf.name_scope('dnn'): hidden1 = my_fully_connected(X, 300, 'hidden1', activation='relu') hidden2 = my_fully_connected(hidden1, 100, 'hidden2', activation='relu') logits = my_fully_connected(hidden2, 10, 'outputs') """ ## USING TF.CONTRIB.LAYERS FULLY CONNECTED LAYER from tensorflow.contrib.layers import fully_connected ########################## ### CONTRSUCTION PHASE ### ########################## with tf.name_scope('dnn'): hidden1 = fully_connected(X, 300, scope='hidden1') hidden2 = fully_connected(hidden1, 100, scope='hidden2') logits = fully_connected(hidden2, 10, scope='outputs', activation_fn=None) with tf.name_scope('loss'): xentropy = tf.nn.softmax_cross_entropy_with_logits(logits, y) loss = tf.reduce_mean(xentropy, name='loss') lr = 1e-3 with tf.name_scope('train'): optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) training_op = optimizer.minimize(loss) with tf.name_scope('eval'): correct = tf.nn.in_top_k(logits, tf.argmax(y, axis=1), 1) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
def __init__(self, config: BaselineConfig, is_training, features, init_embedding=None): """Constructor for BertModel. Args: config: `BertConfig` instance. is_training: bool. rue for training model, false for eval model. Controls whether dropout will be applied. input_ids: int64 Tensor of shape [batch_size, seq_length, feat_size]. label_ids: (optional) int64 Tensor of shape [batch_size, seq_length]. seq_length: (optional) int64 Tensor of shape [batch_size]. init_embedding: (optional) Raises: ValueError: The config is invalid or one of the input tensor shapes is invalid. """ super(BaselineModel).__init__() input_ids = features["input_ids"] seq_length = features["seq_length"] label_ids = features["label_ids"] self.input_ids = input_ids self.label_ids = label_ids self.seq_length = seq_length self.is_training = is_training input_shape = model_utils.get_shape_list(input_ids, expected_rank=3) self.batch_size = input_shape[0] self.max_length = input_shape[1] self.window_size = input_shape[2] if not is_training: config.embedding_dropout_prob = 0.0 config.hidden_dropout_prob = 0.0 if init_embedding is None: self.embedding = tf.get_variable( shape=[config.vocab_size, config.embedding_size], dtype=tf.float32, name='embedding', initializer=tf.truncated_normal_initializer(stddev=0.02)) else: self.embedding = tf.Variable(init_embedding, dtype=tf.float32, name='embedding') with tf.variable_scope('embedding'): x = tf.nn.embedding_lookup(self.embedding, self.input_ids) feat_size = self.window_size x = tf.reshape( x, [self.batch_size, -1, feat_size * config.embedding_size]) x = model_utils.dropout(x, config.embedding_dropout_prob) def lstm_cell(dim): cell = tf.nn.rnn_cell.LSTMCell(dim, name='basic_lstm_cell') cell = rnn.DropoutWrapper(cell, output_keep_prob=1.0 - config.hidden_dropout_prob) cell = tf.nn.rnn_cell.MultiRNNCell([cell] * config.num_hidden_layers) return cell with tf.variable_scope('rnn'): (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=lstm_cell(config.hidden_size), cell_bw=lstm_cell(config.hidden_size), inputs=x, sequence_length=self.seq_length, dtype=tf.float32) output = tf.concat([forward_output, backword_output], axis=2) with tf.variable_scope('output'): scores = layers.fully_connected(inputs=output, num_outputs=config.num_classes, activation_fn=None) transition_param = tf.get_variable( "transitions", [config.num_classes, config.num_classes]) self.prediction, _ = crf.crf_decode(scores, transition_param, self.seq_length) with tf.variable_scope('loss'): # crf self.log_likelihood, _ = crf.crf_log_likelihood( scores, self.label_ids, self.seq_length, transition_param) self.loss = tf.reduce_mean(-self.log_likelihood)
def __init__(self, args): """ configure model parameters :param args: model configuration parameters :param mode: train or val """ self._batch_size = args.batch_size self._feature_size = args.feature_size self._mode = args.mode # CNN parameters self._in_channel = 1 self._in_width = args.patch_size # context w self._in_height = args.patch_size # context h self._target_size_cnn = args.target_size_cnn self._filter_size = args.filter_size self._dropout = args.dropout # RNN parameters self._discrete_steps = len(args.discrete_timestamps) self._target_size = self._discrete_steps * args.feature_size # output size of fc layer after rnn self._rnn_size = args.rnn_size self._seq_length = args.seq_length self._n_layers = args.n_layers self._keep_prob = args.keep_prob self._lr = args.learning_rate # optimizer global step self._global_step = tf.Variable(0, trainable=False) # sequence length argument self._sequence_length = tf.convert_to_tensor( [self._seq_length for _ in range(self._batch_size)]) # past trajectory self._input_data = tf.placeholder( tf.float32, shape=[self._batch_size, self._seq_length, self._feature_size], name='input_data') # context path to feed into CNN self._context_input = tf.placeholder( tf.float32, shape=[ self._batch_size * self._seq_length, self._in_width, self._in_height, self._in_channel ], name='context_data') # ground true trajectory self._target_data = tf.placeholder(tf.float32, shape=[ self._batch_size, self._seq_length, self._discrete_steps, self._feature_size ], name='target_data') # transformed delta ground truth self._target_data_delta = tf.placeholder(tf.float32, shape=[ self._batch_size, self._seq_length, self._discrete_steps, self._feature_size ], name='target_data_delta') def conv2d(input, num_output, kernel_size, stride): # define a cnn unit: x--> conv -> relu -> max_pooling # take 4D input tensor x = layers.conv2d(inputs=input, num_outputs=num_output, kernel_size=kernel_size, stride=stride, activation_fn=tf.nn.relu) x = tf.contrib.layers.max_pool2d(x, kernel_size=[2, 2], stride=1) return x # ---- CNN ---- # Convolution to map context data to vector # (N * seq_length, patch_size, patch_size, 1) -> 4 conv -> 3fc # output = (N * seq_length, target_size_cnn) with tf.variable_scope("conv_net"): # input, num_channel, kernel_size, stride x = conv2d(self._context_input, 4, 11, 7) x = conv2d(x, 8, 7, 5) x = conv2d(x, 16, 5, 3) x = conv2d(x, 8, 3, 1) x = tf.reshape(x, [self._batch_size * self._seq_length, -1]) x = layers.fully_connected(x, 128) x = layers.fully_connected(x, 64) x = layers.fully_connected(x, self._target_size_cnn) self._initial_output = x # Visualize cnn outputs distribution in Tensorboard # with tf.name_scope('initial_output'): # mean = tf.reduce_mean(self._initial_output) # tf.summary.scalar('mean', mean) # # stddev = tf.sqrt(tf.reduce_mean(tf.square(self._initial_output - mean))) # tf.summary.scalar('stddev', stddev) # tf.summary.histogram('histogram', self._initial_output) # ---- RNN ---- # build inputs for RNN with tf.variable_scope("build_complete_input"): # tansform input_data to delta delta_x = VRU2Model.input_data_delta_tf(self._input_data) self._tf_input_data = delta_x # reshape cnn outputs back to (N, seq_length, target_size_cnn) x = tf.reshape(x, shape=[ self._batch_size, self._seq_length, self._target_size_cnn ]) # concat cnn outputs with delta input, shape (batch_size, seq_length, feature_size + target_size_cnn) complete_input = VRU2Model.concat_tensor(delta_x, x) self._complete_input = complete_input def get_a_cell(): # initialize a recurrent unit single_cell = rnn.GRUCell(num_units=self._rnn_size) # wrap a dropout layer if applicable if self._mode == 'train' and self._keep_prob < 1.0: single_cell = rnn.DropoutWrapper( cell=single_cell, output_keep_prob=self._keep_prob) return single_cell with tf.variable_scope("rnn"): cell = rnn.MultiRNNCell( [get_a_cell() for _ in range(self._n_layers)]) # initial cell state _initial_state = cell.zero_state(batch_size=self._batch_size, dtype=tf.float32) # dynamic rnn, output shape (batch_size, seq_length, rnn_size) rnn_output, self._final_state = tf.nn.dynamic_rnn( cell=cell, inputs=complete_input, sequence_length=self._sequence_length, initial_state=_initial_state, dtype=None, parallel_iterations=None, swap_memory=False, time_major=False, scope=None) with tf.variable_scope('fc_'): # reshape to = (batch_size * seq_length, rnn_size) rnn_output = tf.reshape(rnn_output, shape=[-1, self._rnn_size]) # shape = (batch_size * seq_length, target_size) outputs = layers.fully_connected(rnn_output, self._target_size) # reshape to = (batch_size, seq_length, target_size) outputs = tf.reshape( outputs, shape=[self._batch_size, self._seq_length, self._target_size]) self._outputs = outputs with tf.variable_scope('loss'): # transform target data to delta, shape=(batch_size, seq_length, discrete_steps, feature_size) self._target_data_delta = VRU2Model.target_data_delta_tf( self._input_data, self._target_data) # shape = (batch_size, seq_length, discrete_steps, feature_size) outputs = tf.reshape(outputs, shape=[ -1, self._seq_length, self._discrete_steps, self._feature_size ]) self._predicted_outputs = outputs # calculate (x-x')^2, (y-y')^2 sqaure_loss = tf.square( tf.subtract(outputs, self._target_data_delta)) # calculate sqrt ((x-x')^2 + (y-y')^2) loss = tf.sqrt(tf.reduce_sum( sqaure_loss, 2)) # shape = (batch_size, discrete_steps) loss = tf.reduce_mean(loss) self._loss = loss # add summary operations tf.summary.scalar('loss', self._loss) self._summary_op = tf.summary.merge_all() # transform predicted delta to absolute with tf.variable_scope('predicted_final'): self._tf_predicted_outputs = VRU2Model.restore_delta_to_absolute( self._input_data, outputs) if self._mode == 'infer': return # used in training phase to update parameters with tf.variable_scope('optimizer'): self._train_op = tf.train.AdamOptimizer( learning_rate=self._lr).minimize(loss, global_step=self._global_step)
def discriminator_lstm(self, inputs, lengths, reuse=False): """Build DNN model. On first pass will make vars.""" lstm_cell_size = 256 num_projection = 40 lstm_num_layer = 2 # If test of cv , BN should use global mean / stddev is_training = False if self.cross_validation else True with tf.variable_scope('d_model') as scope: if reuse: scope.reuse_variables() if self.batch_norm: normalizer_fn = batch_norm normalizer_params = { "is_training": is_training, "scale": True, "renorm": True } else: normalizer_fn = None normalizer_params = None if self.l2_scale > 0.0 and is_training: weights_regularizer = l2_regularizer(self.l2_scale) else: weights_regularizer = None keep_prob = 1.0 sys.stdout.flush() # Apply input noisy layer if not reuse: print("*** Discriminator summary ***") print("D inputs shape: {}".format(inputs.get_shape())) inputs = gaussian_noise_layer(inputs, self.disc_noise_std) # h = fully_connected(inputs, num_projection, # activation_fn=leakyrelu, # normalizer_fn=normalizer_fn, # normalizer_params=normalizer_params, # weights_initializer=xavier_initializer(), # weights_regularizer=weights_regularizer, # biases_initializer=tf.zeros_initializer()) def lstm_cell(): return tf.contrib.rnn.LSTMCell(lstm_cell_size, use_peepholes=True, initializer=xavier_initializer(), num_proj=num_projection, forget_bias=1.0, state_is_tuple=True, activation=tf.tanh, reuse=reuse) attn_cell = lstm_cell if is_training and self.keep_prob < 1.0: def attn_cell(): return tf.contrib.rnn.DropoutWrapper( lstm_cell(), output_keep_prob=self.keep_prob) cell = tf.contrib.rnn.MultiRNNCell( [attn_cell() for _ in range(lstm_num_layer)], state_is_tuple=True) initial_states = cell.zero_state(self.batch_size, tf.float32) outputs, states = tf.nn.dynamic_rnn(cell, inputs, sequence_length=lengths, initial_state=initial_states, dtype=tf.float32, time_major=False) if not reuse: print("D hidden layer number is {}".format(lstm_num_layer)) print("D cell size is {}".format(lstm_cell_size)) print("D projection num is {}".format(num_projection)) sys.stdout.flush() # Output layer y = fully_connected(outputs, 1, activation_fn=None, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.zeros_initializer()) # y = tf.clip_by_value(y, -0.5, 1.5) if not reuse: print("d output shape: {}".format(y.get_shape())) print("****************************************") sys.stdout.flush() return y
def infer(self, reuse): cnn = self.cnn activation_fn = tf.nn.relu is_training = True input_dim = cnn.input_dim left_context = cnn.left_context right_context = cnn.right_context splice_dim = left_context + 1 + right_context in_dims = self.inputs.get_shape().as_list() if len(in_dims) == 2: # shape format [batch, width] dims = self.inputs.get_shape().as_list() assert dims[0] == cnn.batch_size inputs = tf.reshape(self.inputs, [dims[0], splice_dim, input_dim]) inputs = tf.expand_dims(inputs, -1) elif len(in_dims) == 3: # shape format [batch, length, width] dims = self.inputs.get_shape().as_list() assert dims[0] == 1 inputs = tf.squeeze(self.inputs, [0]) inputs = tf.reshape(inputs, [-1, splice_dim, input_dim]) inputs = tf.expand_dims(inputs, -1) # If test of cv , BN should use global mean / stddev if cnn.cross_validation: is_training = False with tf.variable_scope('g_model') as scope: if reuse: scope.reuse_variables() if cnn.batch_norm: normalizer_fn = batch_norm normalizer_params = { "is_training": is_training, "scale": True, "renorm": True } else: normalizer_fn = None normalizer_params = None if cnn.l2_scale > 0.0 and is_training: weights_regularizer = l2_regularizer(cnn.l2_scale) else: weights_regularizer = None keep_prob = 1.0 if not reuse: print("*** Generator summary ***") print("G inputs shape: {}".format(inputs.get_shape())) # conv1 # inputs format [batch, in_height, in_width, in_channels] # filters format [filter_height, filter_width, in_channels, out_channels] filter_num = [32, 64] filter_width = [11, 11] assert len(filters_num) == len(filters_num) for i in range(len(filters_num)): inputs = tf.contrib.layers.conv2d( inputs, filters_num[i], [splice_dim, filters_width[i]], activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.zeros_initializer()) if not reuse: print("Conv{} layer output shape: {}".format( i + 1, inputs.get_shape()), end=" *** ") self.nnet_info(normalizer_fn, rced.keep_prob, weights_regularizer) # kernel = tf.get_variable('weights_1', [11, 11, 1, 32], # initializer=tf.truncated_normal_initializer(stddev=0.05), # regularizer=weights_regularizer) # conv = tf.nn.conv2d(inputs, kernel, [1, 1, 1, 1], padding='SAME') # biases = tf.get_variable('biases_1', [32], # initializer=tf.constant_initializer(0.1)) # pre_activation = tf.nn.bias_add(conv, biases) # if cnn.batch_norm: # pre_activation = batch_norm(pre_activation, scale=True, # is_training=is_training, # renorm=True) # h = tf.nn.relu(pre_activation) # # pool1 # # pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], # # strides=[1, 2, 2, 1], # # padding='SAME') # if not reuse: # print("Conv1 layer output shape: {}".format(h.get_shape()), # end=" *** ") # self.nnet_info(normalizer_fn, cnn.keep_prob, weights_regularizer) # # conv2 # kernel = tf.get_variable('weights_2', [11, 11, 32, 64], # initializer=tf.truncated_normal_initializer(stddev=0.05), # regularizer=weights_regularizer) # conv = tf.nn.conv2d(h, kernel, [1, 1, 1, 1], padding='SAME') # biases = tf.get_variable('biases_2', [64], # initializer=tf.constant_initializer(0.1)) # pre_activation = tf.nn.bias_add(conv, biases) # if cnn.batch_norm: # pre_activation = batch_norm(pre_activation, scale=True, # is_training=is_training, # renorm=True) # h = tf.nn.relu(pre_activation) # # pool2 # # pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1], # # strides=[1, 2, 2, 1], # # padding='SAME') # if not reuse: # print("Conv2 layer output shape: {}".format(h.get_shape()), # end=" *** ") # self.nnet_info(normalizer_fn, cnn.keep_prob, weights_regularizer) # local3 # Move everything into depth so we can perform a single matrix multiply. # reshape = tf.reshape(h, [cnn.batch_size, -1]) reshape = tf.reshape( inputs, [-1, splice_dim * input_dim * filters_num[-1]]) h = fully_connected(reshape, 512, activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.zeros_initializer()) if not reuse: print("Local3 layer output shape: {}".format(h.get_shape()), end=" *** ") self.nnet_info(normalizer_fn, cnn.keep_prob, weights_regularizer) # local4 h = fully_connected( h, 512, activation_fn=activation_fn, normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.constant_initializer(0.1)) if not reuse: print("Local4 layer output shape: {}".format(h.get_shape()), end=" *** ") self.nnet_info(normalizer_fn, cnn.keep_prob, weights_regularizer) # Linear output y = fully_connected( h, cnn.output_dim, activation_fn=None, weights_initializer=xavier_initializer(), weights_regularizer=weights_regularizer, biases_initializer=tf.constant_initializer(0.1)) if not reuse: print("G output shape: {}".format(y.get_shape())) sys.stdout.flush() return y
def get_symble(input_image, **kargs): w_decay = kargs.get('w_decay', 1e-5) net_name = kargs.get('net_name', 'mobilenet') w_r = tfc.l2_regularizer(w_decay) width_mult = kargs.get('width_mult', 1.0) train_fg = kargs.get('train_fg', True) class_num = kargs.get('class_num', 81) assert net_name.lower() in [ 'mobilenet', 'mobilenetv2' ], "Please sel netname: mobilenet or mobilenetv2" cn = [ int(x * width_mult) for x in [32, 16, 24, 32, 64, 96, 160, 320, 1280] ] #cn = [int(x*width_mult) for x in [32,16,24,32,64,96,160,320,256]] with tf.variable_scope(net_name): b0 = Conv_block(input_image,3,filter_num=cn[0],conv_stride=2,relu_type='relu6', \ name='cb1',w_regular=w_r,**kargs) b1 = Inverted_residual_seq(b0, 1, cn[0], cn[1], 1, 1, **kargs) #1 b2 = Inverted_residual_seq(b1, 6, cn[1], cn[2], 2, 1, seq_name='res2', w_regular=w_r, **kargs) #2 b3 = Inverted_residual_seq(b2, 6, cn[2], cn[3], 2, 1, seq_name='res3', w_regular=w_r, **kargs) #3 b4 = Inverted_residual_seq(b3, 6, cn[3], cn[4], 2, 1, seq_name='res4', w_regular=w_r, **kargs) #4 b5 = Inverted_residual_seq(b4, 6, cn[4], cn[5], 2, 1, seq_name='res5', w_regular=w_r, **kargs) #3 b6 = Inverted_residual_seq(b5, 6, cn[5], cn[6], 2, 1, seq_name='res6', w_regular=w_r, **kargs) #3 b7 = Inverted_residual_seq(b6, 6, cn[6], cn[7], 1, 1, seq_name='res7', w_regular=w_r, **kargs) #1 b8 = Conv_block(b7,1,filter_num=cn[8],conv_stride=1,relu_type='relu6', \ name='cb2',**kargs) pool = GlobalAveragePooling2D(b8, name='pool') #fc = tfc.fully_connected(pool,class_num,activation_fn=tf.nn.relu6,trainable=train_fg,\ # weights_regularizer=w_r,scope='fc') fc = tfc.fully_connected(pool,class_num,activation_fn=tf.sigmoid,trainable=train_fg,\ weights_regularizer=w_r,scope='fc') #dp = tfc.dropout(fc,keep_prob=0.5,is_training=train_fg,scope='drop_out') return fc
def __init__(self, max_seq_len, max_sent_len, num_classes, vocab_size, embedding_size, max_grad_norm, dropout_keep_proba, learning_rate): ## Parameters self.learning_rate = learning_rate self.vocab_size = vocab_size self.num_classes = num_classes self.max_seq_len = max_seq_len self.embedding_size = embedding_size self.word_encoder_num_hidden = max_seq_len self.word_output_size = max_seq_len self.sentence_encoder_num_hidden = max_sent_len self.sentence_output_size = max_sent_len self.max_grad_norm = max_grad_norm self.dropout_keep_proba = dropout_keep_proba # tf graph input self.input_x = tf.placeholder(shape=[None, None, None], dtype=tf.int32, name="input_x") self.input_y = tf.placeholder(shape=[None, self.num_classes], dtype=tf.int32, name="input_y") self.word_lengths = tf.placeholder(shape=[None, None], dtype=tf.int32, name="word_lengths") self.sentence_lengths = tf.placeholder(shape=[ None, ], dtype=tf.int32, name="sentence_lengths") self.train = tf.placeholder(dtype=tf.bool, name="train") # input_x dims (self.document_size, self.sentence_size, self.word_size) = tf.unstack(tf.shape(self.input_x)) with tf.device("/gpu:0"), tf.name_scope("embedding_layer"): w = tf.Variable(tf.random_uniform( [self.vocab_size, self.embedding_size], -1.0, 1.0), dtype=tf.float32, name="w") self.input_x_embedded = tf.nn.embedding_lookup(w, self.input_x) # reshape input_x after embedding self.input_x_embedded = tf.reshape(self.input_x_embedded, [ self.document_size * self.sentence_size, self.word_size, self.embedding_size ]) self.input_x_embedded_lengths = tf.reshape( self.word_lengths, [self.document_size * self.sentence_size]) with tf.variable_scope("word_level"): self.word_encoder_outputs = self.bidirectional_RNN( num_hidden=self.word_encoder_num_hidden, inputs=self.input_x_embedded) word_level_output = self.attention( inputs=self.word_encoder_outputs, output_size=self.word_output_size) with tf.variable_scope("dropout"): word_level_output = layers.dropout( word_level_output, keep_prob=self.dropout_keep_proba, train=self.train) # reshape word_level output self.sentence_encoder_inputs = tf.reshape( word_level_output, [self.document_size, self.sentence_size, self.word_output_size]) with tf.variable_scope("sentence_level"): self.sentence_encoder_outputs = self.bidirectional_RNN( num_hidden=self.sentence_encoder_num_hidden, inputs=self.sentence_encoder_inputs) sentence_level_output = self.attention( inputs=self.sentence_encoder_outputs, output_size=self.sentence_output_size) with tf.variable_scope("dropout"): sentence_level_output = layers.dropout( sentence_level_output, keep_prob=self.dropout_keep_proba, train=self.train) # Final model prediction with tf.variable_scope("classifier_output"): self.logits = layers.fully_connected(sentence_level_output, self.num_classes, activation_fn=None) self.predictions = tf.argmax(self.logits, axis=1, name="predictions") # Calculate mean cross-entropy loss with tf.variable_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits( labels=self.input_y, logits=self.logits) self.loss = tf.reduce_mean(losses) # Accuracy with tf.variable_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, axis=1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def apply(self, is_train, context_embed, answer, context_mask=None): init_fn = get_keras_initialization(self.init) m1, m2 = self.predictor.apply(is_train, context_embed, context_mask) self.m1 = m1 self.m2 = m2 if m1.shape.as_list()[-1] != 1: with tf.variable_scope("start_pred"): start_logits = fully_connected(m1, 1, activation_fn=None, weights_initializer=init_fn) else: start_logits = m1 start_logits = tf.squeeze(start_logits, squeeze_dims=[2]) if m1.shape.as_list()[-1] != 1: with tf.variable_scope("end_pred"): end_logits = fully_connected(m2, 1, activation_fn=None, weights_initializer=init_fn) else: end_logits = m2 end_logits = tf.squeeze(end_logits, squeeze_dims=[2]) masked_start_logits = exp_mask(start_logits, context_mask) masked_end_logits = exp_mask(end_logits, context_mask) start_atten = tf.einsum("ajk,aj->ak", m1, tf.nn.softmax(masked_start_logits)) end_atten = tf.einsum("ajk,aj->ak", m2, tf.nn.softmax(masked_end_logits)) with tf.variable_scope("encode_context"): enc = self.encoder.apply(is_train, context_embed, context_mask) if len(enc.shape) == 3: _, encodings, fe = enc.shape.as_list() enc = tf.reshape(enc, (-1, encodings * fe)) with tf.variable_scope("confidence"): conf = [start_atten, end_atten, enc] none_logit = self.confidence_predictor.apply( is_train, tf.concat(conf, axis=1)) with tf.variable_scope("confidence_logits"): none_logit = fully_connected(none_logit, 1, activation_fn=None, weights_initializer=init_fn) none_logit = tf.squeeze(none_logit, axis=1) batch_dim = tf.shape(start_logits)[0] # (batch, (l * l)) logits for each (start, end) pair all_logits = tf.reshape( tf.expand_dims(masked_start_logits, 1) + tf.expand_dims(masked_end_logits, 2), (batch_dim, -1)) # (batch, (l * l) + 1) logits including the none option all_logits = tf.concat( [all_logits, tf.expand_dims(none_logit, 1)], axis=1) log_norms = tf.reduce_logsumexp(all_logits, axis=1) # Now build a "correctness" mask in the same format correct_mask = tf.logical_and(tf.expand_dims(answer[0], 1), tf.expand_dims(answer[1], 2)) correct_mask = tf.reshape(correct_mask, (batch_dim, -1)) correct_mask = tf.concat([ correct_mask, tf.logical_not(tf.reduce_any(answer[0], axis=1, keep_dims=True)) ], axis=1) # Note we are happily allowing the model to place weights on "backwards" spans, and also giving # it points for predicting spans that start and end at different answer spans. It would be easy to # fix by masking out some of the `all_logit` matrix and specify a more accuracy correct_mask, but I # in general left it this way to be consistent with the independent bound models that do the same. # Some early tests found properly masking things to not make much difference (or even to hurt), but it # still could be an avenue for improvement log_correct = tf.reduce_logsumexp( all_logits + VERY_NEGATIVE_NUMBER * (1 - tf.cast(correct_mask, tf.float32)), axis=1) per_sample_loss = -(log_correct - log_norms) tf.add_to_collection("PER_SAMPLE_LOSSES", per_sample_loss) loss = tf.reduce_mean(-(log_correct - log_norms)) probs = tf.nn.softmax(all_logits) tf.add_to_collection(tf.GraphKeys.LOSSES, loss) return ConfidencePrediction(probs[:, :-1], masked_start_logits, masked_end_logits, probs[:, -1], none_logit, context_mask)
def _dnn_model_fn(features, labels, mode, params, config=None): """Deep Neural Net model_fn. Args: features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`). labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype `int32` or `int64` in the range `[0, n_classes)`. mode: Defines whether this is training, evaluation or prediction. See `ModeKeys`. params: A dict of hyperparameters. The following hyperparameters are expected: * head: A `_Head` instance. * hidden_units: List of hidden units per layer. * feature_columns: An iterable containing all the feature columns used by the model. * optimizer: string, `Optimizer` object, or callable that defines the optimizer to use for training. If `None`, will use the Adagrad optimizer with a default learning rate of 0.05. * activation_fn: Activation function applied to each layer. If `None`, will use `tf.nn.relu`. Note that a string containing the unqualified name of the op may also be provided, e.g., "relu", "tanh", or "sigmoid". * dropout: When not `None`, the probability we will drop out a given coordinate. * gradient_clip_norm: A float > 0. If provided, gradients are clipped to their global norm with this clipping ratio. * embedding_lr_multipliers: Optional. A dictionary from `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to multiply with learning rate for the embedding variables. * input_layer_min_slice_size: Optional. The min slice size of input layer partitions. If not provided, will use the default of 64M. config: `RunConfig` object to configure the runtime settings. Returns: predictions: A dict of `Tensor` objects. loss: A scalar containing the loss of the step. train_op: The op for training. """ head = params["head"] hidden_units = params["hidden_units"] feature_columns = params["feature_columns"] optimizer = params.get("optimizer") or "Adagrad" activation_fn = _get_activation_fn(params.get("activation_fn")) dropout = params.get("dropout") gradient_clip_norm = params.get("gradient_clip_norm") input_layer_min_slice_size = ( params.get("input_layer_min_slice_size") or 64 << 20) num_ps_replicas = config.num_ps_replicas if config else 0 embedding_lr_multipliers = params.get("embedding_lr_multipliers", {}) features = _get_feature_dict(features) parent_scope = "dnn" partitioner = partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas) with variable_scope.variable_scope( parent_scope, values=tuple(six.itervalues(features)), partitioner=partitioner): input_layer_partitioner = ( partitioned_variables.min_max_variable_partitioner( max_partitions=num_ps_replicas, min_slice_size=input_layer_min_slice_size)) with variable_scope.variable_scope( "input_from_feature_columns", values=tuple(six.itervalues(features)), partitioner=input_layer_partitioner) as input_layer_scope: if all( isinstance(fc, feature_column._FeatureColumn) # pylint: disable=protected-access for fc in feature_columns ): net = layers.input_from_feature_columns( columns_to_tensors=features, feature_columns=feature_columns, weight_collections=[parent_scope], scope=input_layer_scope) else: net = fc_core.input_layer( features=features, feature_columns=feature_columns, weight_collections=[parent_scope]) for layer_id, num_hidden_units in enumerate(hidden_units): with variable_scope.variable_scope( "hiddenlayer_%d" % layer_id, values=(net,)) as hidden_layer_scope: net = layers.fully_connected( net, num_hidden_units, activation_fn=activation_fn, variables_collections=[parent_scope], scope=hidden_layer_scope) if dropout is not None and mode == model_fn.ModeKeys.TRAIN: net = layers.dropout(net, keep_prob=(1.0 - dropout)) _add_hidden_layer_summary(net, hidden_layer_scope.name) with variable_scope.variable_scope( "logits", values=(net,)) as logits_scope: logits = layers.fully_connected( net, head.logits_dimension, activation_fn=None, variables_collections=[parent_scope], scope=logits_scope) _add_hidden_layer_summary(logits, logits_scope.name) def _train_op_fn(loss): """Returns the op to optimize the loss.""" return optimizers.optimize_loss( loss=loss, global_step=training_util.get_global_step(), learning_rate=_LEARNING_RATE, optimizer=_get_optimizer(optimizer), gradient_multipliers=( dnn_linear_combined._extract_embedding_lr_multipliers( # pylint: disable=protected-access embedding_lr_multipliers, parent_scope, input_layer_scope.name)), clip_gradients=gradient_clip_norm, name=parent_scope, # Empty summaries to prevent optimizers from logging training_loss. summaries=[]) return head.create_model_fn_ops( features=features, mode=mode, labels=labels, train_op_fn=_train_op_fn, logits=logits)