Example #1
0
def model(inpt, num_actions, scope, reuse=False):
    """This model takes as input an observation and returns values of all actions."""
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        out = layers.fully_connected(out, num_outputs=64, activation_fn=tf.nn.tanh)
        out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
        return out
Example #2
0
def model(img_in, num_actions, scope, noisy=False, reuse=False,
          concat_softmax=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = img_in
        with tf.variable_scope("convnet"):
            # original architecture
            out = layers.convolution2d(out, num_outputs=32, kernel_size=8,
                                       stride=4, activation_fn=tf.nn.relu)
            out = layers.convolution2d(out, num_outputs=64, kernel_size=4,
                                       stride=2, activation_fn=tf.nn.relu)
            out = layers.convolution2d(out, num_outputs=64, kernel_size=3,
                                       stride=1, activation_fn=tf.nn.relu)
        out = layers.flatten(out)

        with tf.variable_scope("action_value"):
            if noisy:
                # Apply noisy network on fully connected layers
                # ref: https://arxiv.org/abs/1706.10295
                out = noisy_dense(out, name='noisy_fc1', size=512,
                                  activation_fn=tf.nn.relu)
                out = noisy_dense(out, name='noisy_fc2', size=num_actions)
            else:
                out = layers.fully_connected(out, num_outputs=512,
                                             activation_fn=tf.nn.relu)
                out = layers.fully_connected(out, num_outputs=num_actions,
                                             activation_fn=None)
            # V: Softmax - inspired by deep-rl-attack #
            if concat_softmax:
                out = tf.nn.softmax(out)
        return out
Example #3
0
def _cnn_to_mlp(convs, hiddens, dueling, inpt, num_actions, scope, reuse=False, layer_norm=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        with tf.variable_scope("convnet"):
            for num_outputs, kernel_size, stride in convs:
                out = layers.convolution2d(out,
                                           num_outputs=num_outputs,
                                           kernel_size=kernel_size,
                                           stride=stride,
                                           activation_fn=tf.nn.relu)
        conv_out = layers.flatten(out)
        with tf.variable_scope("action_value"):
            action_out = conv_out
            for hidden in hiddens:
                action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None)
                if layer_norm:
                    action_out = layers.layer_norm(action_out, center=True, scale=True)
                action_out = tf.nn.relu(action_out)
            action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)

        if dueling:
            with tf.variable_scope("state_value"):
                state_out = conv_out
                for hidden in hiddens:
                    state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None)
                    if layer_norm:
                        state_out = layers.layer_norm(state_out, center=True, scale=True)
                    state_out = tf.nn.relu(state_out)
                state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
            action_scores_mean = tf.reduce_mean(action_scores, 1)
            action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
            q_out = state_score + action_scores_centered
        else:
            q_out = action_scores
        return q_out
Example #4
0
File: nets.py Project: 1202kbs/GAN
	def __call__(self, x, reuse=False):
		with tf.variable_scope(self.name) as scope:
			if reuse:
				scope.reuse_variables()
			shared = tcl.fully_connected(x, 128, activation_fn=tf.nn.relu, weights_initializer=tf.random_normal_initializer(0, 0.02))
			q = tcl.fully_connected(shared, 10, activation_fn=None, weights_initializer=tf.random_normal_initializer(0, 0.02)) # 10 classes
		return q
Example #5
0
def dueling_model(img_in, num_actions, scope, reuse=False, layer_norm=False):
    """As described in https://arxiv.org/abs/1511.06581"""
    with tf.variable_scope(scope, reuse=reuse):
        out = img_in
        with tf.variable_scope("convnet"):
            # original architecture
            out = layers.convolution2d(out, num_outputs=32, kernel_size=8, stride=4, activation_fn=tf.nn.relu)
            out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
            out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)
        conv_out = layers.flatten(out)

        with tf.variable_scope("state_value"):
            state_hidden = layers.fully_connected(conv_out, num_outputs=512, activation_fn=None)
            if layer_norm:
                state_hidden = layer_norm_fn(state_hidden, relu=True)
            else:
                state_hidden = tf.nn.relu(state_hidden)
            state_score = layers.fully_connected(state_hidden, num_outputs=1, activation_fn=None)
        with tf.variable_scope("action_value"):
            actions_hidden = layers.fully_connected(conv_out, num_outputs=512, activation_fn=None)
            if layer_norm:
                actions_hidden = layer_norm_fn(actions_hidden, relu=True)
            else:
                actions_hidden = tf.nn.relu(actions_hidden)
            action_scores = layers.fully_connected(actions_hidden, num_outputs=num_actions, activation_fn=None)
            action_scores_mean = tf.reduce_mean(action_scores, 1)
            action_scores = action_scores - tf.expand_dims(action_scores_mean, 1)
        return state_score + action_scores
Example #6
0
 def __init__(self, input_size=4, hidden_size=2, gamma=0.95,
              action_size=2, alpha=0.1):
     self.input_size = input_size
     self.hidden_size = hidden_size
     self.gamma = gamma
     self.action_size = action_size
     self.alpha = alpha
     # save the hyper parameters
     self.params = self.__dict__.copy()
     # placeholders
     self.input_pl = tf.placeholder(tf.float32, [None, input_size])
     self.action_pl = tf.placeholder(tf.int32, [None])
     self.reward_pl = tf.placeholder(tf.float32, [None])
     # a two-layer fully connected network
     hidden_layer = layers.fully_connected(self.input_pl,
                                           hidden_size,
                                           biases_initializer=None,
                                           activation_fn=tf.nn.relu)
     self.output = layers.fully_connected(hidden_layer,
                                          action_size,
                                          biases_initializer=None,
                                          activation_fn=tf.nn.softmax)
     # responsible output
     one_hot = tf.one_hot(self.action_pl, action_size)
     responsible_output = tf.reduce_sum(self.output * one_hot, axis=1)
     self.loss = -tf.reduce_mean(tf.log(responsible_output) * self.reward_pl)
     # training variables
     variables = tf.trainable_variables()
     self.variable_pls = []
     for i, var in enumerate(variables):
         self.variable_pls.append(tf.placeholder(tf.float32))
     self.gradients = tf.gradients(self.loss, variables)
     solver = tf.train.AdamOptimizer(learning_rate=alpha)
     self.update = solver.apply_gradients(zip(self.variable_pls, variables))
Example #7
0
def bert_self_attention(config, hidden_states, attention_mask):
    with tf.variable_scope("BertSelfAttention"):
        mixed_query_layer = layers.fully_connected(hidden_states,
                                                   config.hidden_size, scope="FCquery", activation_fn=None)
        mixed_key_layer = layers.fully_connected(hidden_states,
                                                 config.hidden_size, scope="FCkey", activation_fn=None)
        mixed_value_layer = layers.fully_connected(hidden_states,
                                                   config.hidden_size, scope="FCvalue", activation_fn=None)
        query_layer = transpose_for_scores(config, mixed_query_layer)
        key_layer = transpose_for_scores(config, mixed_key_layer)
        value_layer = transpose_for_scores(config, mixed_value_layer)

        # Take the dot product between "query" and "key" to get the raw attention scores.
        attention_scores = tf.matmul(query_layer, tf.transpose(key_layer, [0, 1, 3, 2]))
        # TODO(jonathan): the output of matmul is different than pyTorch's expected broadcasting
        # behavior... investigate
        attention_scores = attention_scores / np.sqrt(config.attention_head_size)
        # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
        attention_scores = attention_scores + attention_mask

        # Normalize the attention scores to probabilities.
        attention_probs = tf.nn.softmax(attention_scores, axis=-1)

        # This is actually dropping out entire tokens to attend to, which might
        # seem a bit unusual, but is taken from the original Transformer paper.
        attention_probs = tf.nn.dropout(attention_probs, keep_prob=1.0 - config.attention_probs_dropout_prob)
        context_layer = tf.matmul(attention_probs, value_layer)
        context_layer = tf.transpose(context_layer, (0, 2, 1, 3))
        new_context_layer_shape = [tf.shape(context_layer)[i] for i in range(2)] + [config.all_head_size]
        context_layer = tf.reshape(context_layer, new_context_layer_shape)
    return context_layer
def model_fn(x, target, mode, params):
    """Model function for Estimator."""

    y_ = tf.cast(target, tf.float32)

    x_image = tf.reshape(x, [-1, 28, 28, 1])

    # first convolutional layer
    h_conv1 = layers.convolution2d(x_image, 32, [5,5])
    h_pool1 = layers.max_pool2d(h_conv1, [2,2])

    # second convolutional layer
    h_conv2 = layers.convolution2d(h_pool1, 64, [5,5])
    h_pool2 = layers.max_pool2d(h_conv2, [2,2])

    # densely connected layer
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = layers.fully_connected(h_pool2_flat, 1024)
    h_fc1_drop = layers.dropout(
        h_fc1, keep_prob=params["dropout"],
        is_training=(mode == ModeKeys.TRAIN))

    # readout layer
    y_conv = layers.fully_connected(h_fc1_drop, 10, activation_fn=None)

    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
    train_op = tf.contrib.layers.optimize_loss(
        loss=cross_entropy,
        global_step=tf.contrib.framework.get_global_step(),
        learning_rate=params["learning_rate"],
        optimizer="Adam")

    predictions = tf.argmax(y_conv, 1)
    return predictions, cross_entropy, train_op
Example #9
0
def _build_q_network(registry, inputs, num_actions, config):
    dueling = config["dueling"]
    hiddens = config["hiddens"]
    frontend = ModelCatalog.get_model(registry, inputs, 1, config["model"])
    frontend_out = frontend.last_layer

    with tf.variable_scope("action_value"):
        action_out = frontend_out
        for hidden in hiddens:
            action_out = layers.fully_connected(
                action_out, num_outputs=hidden, activation_fn=tf.nn.relu)
        action_scores = layers.fully_connected(
            action_out, num_outputs=num_actions, activation_fn=None)

    if dueling:
        with tf.variable_scope("state_value"):
            state_out = frontend_out
            for hidden in hiddens:
                state_out = layers.fully_connected(
                    state_out, num_outputs=hidden, activation_fn=tf.nn.relu)
            state_score = layers.fully_connected(
                state_out, num_outputs=1, activation_fn=None)
        action_scores_mean = tf.reduce_mean(action_scores, 1)
        action_scores_centered = action_scores - tf.expand_dims(
            action_scores_mean, 1)
        return state_score + action_scores_centered
    else:
        return action_scores
Example #10
0
  def add_final_training_ops(self,
                             embeddings,
                             all_labels_count,
                             hidden_layer_size=BOTTLENECK_TENSOR_SIZE / 4,
                             dropout_keep_prob=None):
    """Adds a new softmax and fully-connected layer for training.

     The set up for the softmax and fully-connected layers is based on:
     https://tensorflow.org/versions/master/tutorials/mnist/beginners/index.html

     This function can be customized to add arbitrary layers for
     application-specific requirements.
    Args:
      embeddings: The embedding (bottleneck) tensor.
      all_labels_count: The number of all labels including the default label.
      hidden_layer_size: The size of the hidden_layer. Roughtly, 1/4 of the
                         bottleneck tensor size.
      dropout_keep_prob: the percentage of activation values that are retained.
    Returns:
      softmax: The softmax or tensor. It stores the final scores.
      logits: The logits tensor.
    """
    with tf.name_scope('input'):
      with tf.name_scope('Wx_plus_b'):
        hidden = layers.fully_connected(embeddings, hidden_layer_size)
        # We need a dropout when the size of the dataset is rather small.
        if dropout_keep_prob:
          hidden = tf.nn.dropout(hidden, dropout_keep_prob)
        logits = layers.fully_connected(
            hidden, all_labels_count, activation_fn=None)

    softmax = tf.nn.softmax(logits, name='softmax')
    return softmax, logits
Example #11
0
    def q_func_builder(input_placeholder, num_actions, scope, reuse=False):
        with tf.variable_scope(scope, reuse=reuse):
            latent = network(input_placeholder)
            if isinstance(latent, tuple):
                if latent[1] is not None:
                    raise NotImplementedError("DQN is not compatible with recurrent policies yet")
                latent = latent[0]

            latent = layers.flatten(latent)

            with tf.variable_scope("action_value"):
                action_out = latent
                for hidden in hiddens:
                    action_out = layers.fully_connected(action_out, num_outputs=hidden, activation_fn=None)
                    if layer_norm:
                        action_out = layers.layer_norm(action_out, center=True, scale=True)
                    action_out = tf.nn.relu(action_out)
                action_scores = layers.fully_connected(action_out, num_outputs=num_actions, activation_fn=None)

            if dueling:
                with tf.variable_scope("state_value"):
                    state_out = latent
                    for hidden in hiddens:
                        state_out = layers.fully_connected(state_out, num_outputs=hidden, activation_fn=None)
                        if layer_norm:
                            state_out = layers.layer_norm(state_out, center=True, scale=True)
                        state_out = tf.nn.relu(state_out)
                    state_score = layers.fully_connected(state_out, num_outputs=1, activation_fn=None)
                action_scores_mean = tf.reduce_mean(action_scores, 1)
                action_scores_centered = action_scores - tf.expand_dims(action_scores_mean, 1)
                q_out = state_score + action_scores_centered
            else:
                q_out = action_scores
            return q_out
def multilayer_perceptron(x):
    W_fc1 = tf.Variable(tf.random_normal([784, 256], mean=0, stddev=1))
    b_fc1 = tf.Variable([0] * 256) # ???????
    fc1 = tf.nn.xw_plus_b(x, W_fc1, b_fc1)
    fc2 = layers.fully_connected(fc1, 256, activation_fn=tf.nn.relu, scope='fc2')
    out = layers.fully_connected(fc2, 10, activation_fn=None, scope='out')
    return out
 def dnn_logits_fn():
   """Builds the logits from the input layer."""
   previous_layer = input_layer
   for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
     with variable_scope.variable_scope(
         "hiddenlayer_%d" % layer_id,
         values=(previous_layer,)) as hidden_layer_scope:
       net = layers.fully_connected(
           previous_layer,
           num_hidden_units,
           activation_fn=dnn_activation_fn,
           variables_collections=[dnn_parent_scope],
           scope=hidden_layer_scope)
       if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
         net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
     _add_hidden_layer_summary(net, hidden_layer_scope.name)
     previous_layer = net
   with variable_scope.variable_scope(
       "logits", values=(previous_layer,)) as logits_scope:
     dnn_logits = layers.fully_connected(
         previous_layer,
         head.logits_dimension,
         activation_fn=None,
         variables_collections=[dnn_parent_scope],
         scope=logits_scope)
   _add_hidden_layer_summary(dnn_logits, logits_scope.name)
   return dnn_logits
Example #14
0
    def create(cls, embeddings, labels, **kwargs):

        model = cls()
        model.embeddings = embeddings
        model._record_state(**kwargs)
        model.lengths_key = kwargs.get('lengths_key')

        model.labels = labels
        nc = len(labels)

        # This only exists to make exporting easier
        model.pdrop_value = kwargs.get('dropout', 0.5)
        model.dropin_value = kwargs.get('dropin', {})
        model.sess = kwargs.get('sess', tf.Session())

        model.lengths = kwargs.get('lengths', tf.placeholder(tf.int32, [None], name="lengths"))
        model.y = kwargs.get('y', tf.placeholder(tf.int32, [None, None], name="y"))
        model.pdrop_in = kwargs.get('dropin', 0.0)
        model.labels = labels
        model.crf = bool(kwargs.get('crf', False))
        model.crf_mask = bool(kwargs.get('crf_mask', False))
        model.span_type = kwargs.get('span_type')
        model.proj = bool(kwargs.get('proj', False))
        model.feed_input = bool(kwargs.get('feed_input', False))
        model.activation_type = kwargs.get('activation', 'tanh')
        model.constraint = kwargs.get('constraint')
        # Wrap the constraint in a non-trainable variable so that it is saved
        # into the checkpoint. This means we won't need to recreate the actual
        # values of it when we reload the model
        if model.constraint is not None:
            constraint = []
            for i, c in enumerate(model.constraint):
                constraint.append(tf.get_variable("constraint_{}".format(i), initializer=c, trainable=False))
            model.constraint = constraint

        embedseq = model.embed(**kwargs)
        seed = np.random.randint(10e8)
        enc_out = model.encode(embedseq, **kwargs)

        with tf.variable_scope("output") as model.out_scope:
            if model.feed_input is True:
                enc_out = tf.concat(axis=2, values=[enc_out, embedseq])

            # Converts seq to tensor, back to (B,T,W)
            T = tf.shape(enc_out)[1]
            H = enc_out.get_shape()[2]
            # Flatten from [B x T x H] - > [BT x H]
            enc_out_bt_x_h = tf.reshape(enc_out, [-1, H])
            init = xavier_initializer(True, seed)

            with tf.contrib.slim.arg_scope([fully_connected], weights_initializer=init):
                if model.proj is True:
                    hidden = tf.layers.dropout(fully_connected(enc_out_bt_x_h, H,
                                                           activation_fn=tf_activation(model.activation_type)), model.pdrop_value, training=TRAIN_FLAG())
                    preds = fully_connected(hidden, nc, activation_fn=None, weights_initializer=init)
                else:
                    preds = fully_connected(enc_out_bt_x_h, nc, activation_fn=None, weights_initializer=init)
            model.probs = tf.reshape(preds, [-1, T, nc], name="probs")
        return model
Example #15
0
File: nets.py Project: 1202kbs/GAN
	def __call__(self, z):
		with tf.variable_scope(self.name) as scope:
			g = tcl.fully_connected(z, 4 * 4 * 512, activation_fn=lrelu, normalizer_fn=tcl.batch_norm)
			g = tcl.fully_connected(g, 64, activation_fn=lrelu, normalizer_fn=tcl.batch_norm)
			g = tcl.fully_connected(g, 64, activation_fn=lrelu, normalizer_fn=tcl.batch_norm)
			g = tcl.fully_connected(g, 64*64*3, activation_fn=tf.nn.tanh, normalizer_fn=tcl.batch_norm)
			g = tf.reshape(g, tf.stack([tf.shape(z)[0], 64, 64, 3]))
			return g
Example #16
0
def auto_encoder(x_1, x_2, x_mask_1, x_mask_2, y, dropout, opt):
    x_1_emb, W_emb = embedding(x_1, opt)  # batch L emb
    x_2_emb = tf.nn.embedding_lookup(W_emb, x_2)

    x_1_emb = tf.nn.dropout(x_1_emb, dropout)  # batch L emb
    x_2_emb = tf.nn.dropout(x_2_emb, dropout)  # batch L emb

    biasInit = tf.constant_initializer(0.001, dtype=tf.float32)
    x_1_emb = layers.fully_connected(tf.squeeze(x_1_emb), num_outputs=opt.embed_size, biases_initializer=biasInit, activation_fn=tf.nn.relu, scope='trans', reuse=None)  # batch L emb
    x_2_emb = layers.fully_connected(tf.squeeze(x_2_emb), num_outputs=opt.embed_size, biases_initializer=biasInit, activation_fn=tf.nn.relu, scope='trans', reuse=True)

    x_1_emb = tf.expand_dims(x_1_emb, 3)  # batch L emb 1
    x_2_emb = tf.expand_dims(x_2_emb, 3)

    if opt.encoder == 'aver':
        H_enc_1 = aver_emb_encoder(x_1_emb, x_mask_1)
        H_enc_2 = aver_emb_encoder(x_2_emb, x_mask_2)

    elif opt.encoder == 'max':
        H_enc_1 = max_emb_encoder(x_1_emb, x_mask_1, opt)
        H_enc_2 = max_emb_encoder(x_2_emb, x_mask_2, opt)

    elif opt.encoder == 'concat':
        H_enc_1 = concat_emb_encoder(x_1_emb, x_mask_1, opt)
        H_enc_2 = concat_emb_encoder(x_2_emb, x_mask_2, opt)

    # discriminative loss term
    if opt.combine_enc == 'mult':
        H_enc = tf.multiply(H_enc_1, H_enc_2)  # batch * n_gan

    if opt.combine_enc == 'concat':
        H_enc = tf.concat([H_enc_1, H_enc_2], 1)

    if opt.combine_enc == 'sub':
        H_enc = tf.subtract(H_enc_1, H_enc_2)

    if opt.combine_enc == 'mix':
        H_1 = tf.multiply(H_enc_1, H_enc_2)
        H_2 = tf.concat([H_enc_1, H_enc_2], 1)
        H_3 = tf.subtract(H_enc_1, H_enc_2)
        H_enc = tf.concat([H_1, H_2, H_3], 1)

    # calculate the accuracy
    logits = discriminator_2layer(H_enc, opt, dropout, prefix='classify_', num_outputs=opt.category, is_reuse=None)
    prob = tf.nn.softmax(logits)

    correct_prediction = tf.equal(tf.argmax(prob, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=logits))

    train_op = layers.optimize_loss(
        loss,
        framework.get_global_step(),
        optimizer='Adam',
        # variables=d_vars,
        learning_rate=opt.lr)

    return accuracy, loss, train_op, W_emb
Example #17
0
def dueling_model(img_in, num_actions, scope, noisy=False, reuse=False,
                  concat_softmax=False):
    """As described in https://arxiv.org/abs/1511.06581"""
    with tf.variable_scope(scope, reuse=reuse):
        out = img_in
        with tf.variable_scope("convnet"):
            # original architecture
            out = layers.convolution2d(out, num_outputs=32, kernel_size=8,
                                       stride=4, activation_fn=tf.nn.relu)
            out = layers.convolution2d(out, num_outputs=64, kernel_size=4,
                                       stride=2, activation_fn=tf.nn.relu)
            out = layers.convolution2d(out, num_outputs=64, kernel_size=3,
                                       stride=1, activation_fn=tf.nn.relu)
        out = layers.flatten(out)

        with tf.variable_scope("state_value"):
            if noisy:
                # Apply noisy network on fully connected layers
                # ref: https://arxiv.org/abs/1706.10295
                state_hidden = noisy_dense(out, name='noisy_fc1', size=512,
                                           activation_fn=tf.nn.relu)
                state_score = noisy_dense(state_hidden, name='noisy_fc2',
                                          size=1)
            else:
                state_hidden = layers.fully_connected(
                    out,
                    num_outputs=512,
                    activation_fn=tf.nn.relu
                )
                state_score = layers.fully_connected(state_hidden,
                                                     num_outputs=1,
                                                     activation_fn=None)
        with tf.variable_scope("action_value"):
            if noisy:
                # Apply noisy network on fully connected layers
                # ref: https://arxiv.org/abs/1706.10295
                actions_hidden = noisy_dense(out, name='noisy_fc1', size=512,
                                             activation_fn=tf.nn.relu)
                action_scores = noisy_dense(actions_hidden, name='noisy_fc2',
                                            size=num_actions)
            else:
                actions_hidden = layers.fully_connected(
                    out,
                    num_outputs=512,
                    activation_fn=tf.nn.relu
                )
                action_scores = layers.fully_connected(
                    actions_hidden,
                    num_outputs=num_actions,
                    activation_fn=None
                )
            action_scores_mean = tf.reduce_mean(action_scores, 1)
            action_scores = action_scores - tf.expand_dims(
                action_scores_mean,
                1
            )

        return state_score + action_scores
Example #18
0
    def inference_graph(self, data):
        with ops.device(self.device_assigner.get_device(self.layer_num)):
            # Compute activations for the neural network.
            nn_activations = layers.fully_connected(data, self.params.layer_size)

            for _ in range(1, self.params.num_layers):
                # pylint: disable=W0106
                nn_activations = layers.fully_connected(nn_activations, self.params.layer_size)
            return nn_activations
Example #19
0
def model():
    print("building model ...")
    with tf.variable_scope('train'):
        print("building model ...")
        X_pl = tf.placeholder(tf.float32, [None, num_features])
        X_expand = tf.expand_dims(X_pl, axis=2)
        print("X_pl", X_pl.get_shape())
        t_pl = tf.placeholder(tf.int32, [None,])
        print("t_pl", t_pl.get_shape())
        is_training_pl = tf.placeholder(tf.bool)
        cell_fw = tf.nn.rnn_cell.GRUCell(205)
        cell_bw = tf.nn.rnn_cell.GRUCell(205)
        seq_len = tf.reduce_sum(tf.ones(tf.shape(X_pl), dtype=tf.int32), axis=1)
        _, enc_states = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_fw,
            cell_bw=cell_bw, inputs=X_expand, sequence_length=seq_len,
            dtype=tf.float32)
        enc_states = tf.concat(1, enc_states)
        enc_states_drop = dropout(enc_states, is_training=is_training_pl) 
        l1 = fully_connected(enc_states_drop, 200, activation_fn=None)
        l1 = batch_norm(l1, is_training=is_training_pl)
        l1_relu = relu(l1)
        l1_dropout = dropout(l1_relu, is_training=is_training_pl)
        l2 = fully_connected(l1_dropout, 200, activation_fn=None)
        l2 = batch_norm(l2, is_training=is_training_pl)
        l2_relu = relu(l2)
        l_out = fully_connected(l2_relu, num_outputs=num_classes, activation_fn=None)
        l_out_softmax = tf.nn.softmax(l_out)
        tf.contrib.layers.summarize_variables()

    with tf.variable_scope('metrics'):
        loss = sparse_softmax_cross_entropy_with_logits(l_out, t_pl)
        print("loss", loss.get_shape())
        loss = tf.reduce_mean(loss)
        print("loss", loss.get_shape())
        tf.summary.scalar('train/loss', loss)
        argmax = tf.to_int32(tf.argmax(l_out, 1))
        print("argmax", argmax.get_shape())
        correct = tf.to_float(tf.equal(argmax, t_pl))
        print("correct,", correct.get_shape())
        accuracy = tf.reduce_mean(correct)
        print("accuracy", accuracy.get_shape())

    with tf.variable_scope('optimizer'):
        print("building optimizer ...")
        global_step = tf.Variable(0, name='global_step', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        grads_and_vars = optimizer.compute_gradients(loss)
        gradients, variables = zip(*grads_and_vars)
        clipped_gradients, global_norm = (
            tf.clip_by_global_norm(gradients, clip_norm))
        clipped_grads_and_vars = zip(clipped_gradients, variables)

        tf.summary.scalar('train/global_gradient_norm', global_norm)

        train_op = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step)

    return X_pl, t_pl, is_training_pl, l_out, l_out_softmax, loss, accuracy, train_op, global_step
def multilayer_perceptron(x):
    fc1 = layers.fully_connected(x, 256, activation_fn=tf.nn.relu, scope='fc1')
    tf.histogram_summary('fc1', fc1)
    tf.histogram_summary('fc1/sparsity', tf.nn.zero_fraction(fc1))
    fc2 = layers.fully_connected(fc1, 256, activation_fn=tf.nn.relu, scope='fc2')
    tf.histogram_summary('fc2', fc2)
    tf.histogram_summary('fc2/sparsity', tf.nn.zero_fraction(fc2))
    out = layers.fully_connected(fc2, 10, activation_fn=None, scope='out')
    return out
def multilayer_perceptron(x):
    fc1 = layers.fully_connected(x, 256, activation_fn=tf.nn.relu, scope='fc1')
    fc2 = layers.fully_connected(fc1, 256, activation_fn=tf.nn.relu, scope='fc2')
    out = layers.fully_connected(fc2, 10, activation_fn=None, scope='out')
    assert_op = tf.Assert(tf.reduce_all(out > 0), [out], name='assert_out_positive')
    #out = tf.with_dependencies([assert_op], out)
    with tf.control_dependencies([assert_op]):
        out = tf.identity(out, name='out')
    return out
Example #22
0
def _mlp(hiddens, inpt, num_actions, scope, reuse=False, layer_norm=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = inpt
        for hidden in hiddens:
            out = layers.fully_connected(out, num_outputs=hidden, activation_fn=None)
            if layer_norm:
                out = layers.layer_norm(out, center=True, scale=True)
            out = tf.nn.relu(out)
        q_out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)
        return q_out
Example #23
0
    def build_q_network(self, hiddens):

        out = self._inputs
        
        for hidden in hiddens:
            out= layers.fully_connected(inputs=out, num_outputs= hidden, activation_fn=tf.tanh, weights_regularizer=layers.l2_regularizer(scale=0.1))
            out = tf.nn.dropout(out, self.keep_prob)

        self.Q_t = layers.fully_connected(out, self.num_actions, activation_fn=None)
        self.Q_action = tf.argmax(self.Q_t, dimension=1)
Example #24
0
File: nets.py Project: 1202kbs/GAN
	def __call__(self, x, reuse=True):
		with tf.variable_scope(self.name) as vs:
			if reuse:
				vs.reuse_variables()
			d = tcl.fully_connected(tf.flatten(x), 64, activation_fn=tf.nn.relu,normalizer_fn=tcl.batch_norm)
			d = tcl.fully_connected(d, 64,activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm)
			d = tcl.fully_connected(d, 64,activation_fn=tf.nn.relu, normalizer_fn=tcl.batch_norm)
			logit = tcl.fully_connected(d, 1, activation_fn=None)

		return logit
Example #25
0
def reconstruct_mutual_info(z_c_categoricals,
                            z_c_continuous,
                            categorical_lambda,
                            continuous_lambda,
                            discriminator_hidden,
                            is_training,
                            reuse=None,
                            name="mutual_info"):
    with tf.variable_scope(name, reuse=reuse):
        out = layers.fully_connected(
            discriminator_hidden,
            num_outputs=128,
            activation_fn=leaky_rectify,
            normalizer_fn=layers.batch_norm,
            normalizer_params={"is_training":is_training}
        )

        num_categorical = sum([true_categorical.get_shape()[1].value for true_categorical in z_c_categoricals])
        num_continuous = z_c_continuous.get_shape()[1].value

        out = layers.fully_connected(
            out,
            num_outputs=num_categorical + num_continuous,
            activation_fn=tf.identity
        )

        # distribution logic
        offset = 0
        loss_q_categorical = None
        for z_c_categorical in z_c_categoricals:
            cardinality = z_c_categorical.get_shape()[1].value
            prob_categorical = tf.nn.softmax(out[:, offset:offset + cardinality])
            loss_q_categorical_new = - tf.reduce_sum(tf.log(prob_categorical + TINY) * z_c_categorical,
                reduction_indices=1
            )
            if loss_q_categorical is None:
                loss_q_categorical = loss_q_categorical_new
            else:
                loss_q_categorical = loss_q_categorical + loss_q_categorical_new
            offset += cardinality

        q_mean = out[:, num_categorical:num_categorical + num_continuous]
        q_sd = tf.ones_like(q_mean)

        epsilon = (z_c_continuous - q_mean) / (q_sd + TINY)
        loss_q_continuous = tf.reduce_sum(
            0.5 * np.log(2 * np.pi) + tf.log(q_sd + TINY) + 0.5 * tf.square(epsilon),
            reduction_indices=1,
        )
        loss_mutual_info = continuous_lambda * loss_q_continuous + categorical_lambda * loss_q_categorical
    return (
        tf.reduce_mean(loss_mutual_info),
        tf.reduce_mean(loss_q_categorical),
        tf.reduce_mean(loss_q_continuous)
    )
Example #26
0
 def define_sequence_model(self):
     seed=12345
     np.random.seed(12345)
     layer_list=[]
     with self.graph.as_default() as g:
         utt_length=tf.placeholder(tf.int32,shape=(None))
         g.add_to_collection(name="utt_length",value=utt_length)
         with tf.name_scope("input"):
              input_layer=tf.placeholder(dtype=tf.float32,shape=(None,None,self.n_in),name="input_layer")
              if self.dropout_rate!=0.0:
                 print "Using dropout to avoid overfitting and the dropout rate is",self.dropout_rate
                 is_training_drop=tf.placeholder(dtype=tf.bool,shape=(),name="is_training_drop")
                 input_layer_drop=dropout(input_layer,self.dropout_rate,is_training=is_training_drop)
                 layer_list.append(input_layer_drop)
                 g.add_to_collection(name="is_training_drop",value=is_training_drop)
              else:
                 layer_list.append(input_layer)
         g.add_to_collection("input_layer",layer_list[0])
         with tf.name_scope("hidden_layer"):
            basic_cell=[]
            if "tanh" in self.hidden_layer_type:
                is_training_batch=tf.placeholder(dtype=tf.bool,shape=(),name="is_training_batch")
                bn_params={"is_training":is_training_batch,"decay":0.99,"updates_collections":None}
                g.add_to_collection("is_training_batch",is_training_batch)
            for i in xrange(len(self.hidden_layer_type)):
                if self.dropout_rate!=0.0:
                    if self.hidden_layer_type[i]=="tanh":
                        new_layer=fully_connected(layer_list[-1],self.hidden_layer_size[i],activation_fn=tf.nn.tanh,normalizer_fn=batch_norm,normalizer_params=bn_params)
                        new_layer_drop=dropout(new_layer,self.dropout_rate,is_training=is_training_drop)
                        layer_list.append(new_layer_drop)
                    if self.hidden_layer_type[i]=="lstm":
                        basic_cell.append(MyDropoutWrapper(BasicLSTMCell(num_units=self.hidden_layer_size[i]),self.dropout_rate,self.dropout_rate,is_training=is_training_drop))
                    if self.hidden_layer_type[i]=="gru":
                        basic_cell.append(MyDropoutWrapper(GRUCell(num_units=self.hidden_layer_size[i]),self.dropout_rate,self.dropout_rate,is_training=is_training_drop))
                else:
                    if self.hidden_layer_type[i]=="tanh":
                       new_layer=fully_connected(layer_list[-1],self.hidden_layer_size[i],activation_fn=tf.nn.tanh,normalizer_fn=batch_norm,normalizer_params=bn_params)
                       layer_list.append(new_layer)
                    if self.hidden_layer_type[i]=="lstm":
                       basic_cell.append(LayerNormBasicLSTMCell(num_units=self.hidden_layer_size[i]))
                    if self.hidden_layer_type[i]=="gru":
                       basic_cell.append(LayerNormGRUCell(num_units=self.hidden_layer_size[i]))
            multi_cell=MultiRNNCell(basic_cell)
            rnn_outputs,rnn_states=tf.nn.dynamic_rnn(multi_cell,layer_list[-1],dtype=tf.float32,sequence_length=utt_length)
            layer_list.append(rnn_outputs)
         with tf.name_scope("output_layer"):
              if self.output_type=="linear" :
                  output_layer=tf.layers.dense(rnn_outputs,self.n_out)
               #  stacked_rnn_outputs=tf.reshape(rnn_outputs,[-1,self.n_out])
               #  stacked_outputs=tf.layers.dense(stacked_rnn_outputs,self.n_out)
               #  output_layer=tf.reshape(stacked_outputs,[-1,utt_length,self.n_out])
              g.add_to_collection(name="output_layer",value=output_layer)
         with tf.name_scope("training_op"):
              if self.optimizer=="adam":
                  self.training_op=tf.train.AdamOptimizer()
Example #27
0
def create_a3c_graph(input_shape, n_action, model, opt, beta=None, name='a3c'):
    """
    Implements Actor Critic Model (A3C)

    Returns a dictionary of Tensorflow graph operations to be with a
    tf.Session instance.

    Args:
        n_action: A `int`. Number of actions agent can do.
        model: The Tensorflow model
        opt: A `tf.train.Optimizer`.
        beta: A `float`. Regularization term for the entropy of the policy model.
        If beta is `None` no regularization will be added.
    """
    actions = tf.placeholder(tf.int32, shape=(None))
    returns = tf.placeholder(tf.float32, shape=(None))
    policy_in = tf.placeholder(tf.float32, shape=input_shape)
    value_in = tf.placeholder(tf.float32, shape=input_shape)

    tf.add_to_collection("actions", actions)
    tf.add_to_collection("returns", returns)
    tf.add_to_collection("policy_in", policy_in)
    tf.add_to_collection("value_in", value_in)

    with tf.variable_scope('actor'):
        pnn = model(policy_in)
        probs = tf.nn.softmax(layers.fully_connected(pnn, n_action))
    with tf.variable_scope('critic'):
        v_out = model(value_in)
        value = layers.fully_connected(v_out, 1)

    tf.add_to_collection("policy_out", probs)
    tf.add_to_collection("value_out", value)

    actor_vars = get_vars_from_scope('actor')
    critic_vars = get_vars_from_scope('critic')

    N = tf.shape(states)[0]
    p_vals = slice_2d(probs, tf.range(0, N), actions)
    surr_loss = tf.log(p_vals + 1e-8)

    policy_loss = -surr_loss * (returns - value)
    if beta:
        policy_loss += beta * (-tf.reduce_sum(probs * tf.log(probs + 1e-8), 1))
    policy_loss = tf.reduce_mean(policy_loss, name="policy_loss")
    value_loss = tf.reduce_mean(tf.square(returns - value), name="value_loss")

    policy_train_op = opt.minimize(policy_loss, var_list=actor_vars)
    value_train_op = opt.minimize(value_loss, var_list=critic_vars)

    tf.add_to_collection("policy_loss", policy_loss)
    tf.add_to_collection("value_loss", value_loss)
    tf.add_to_collection("policy_train_op", policy_train_op)
    tf.add_to_collection("value_train_op", value_train_op)
Example #28
0
def atari_model(ram_in, num_actions, scope, reuse=False):
    with tf.variable_scope(scope, reuse=reuse):
        out = ram_in
        #out = tf.concat(1,(ram_in[:,4:5],ram_in[:,8:9],ram_in[:,11:13],ram_in[:,21:22],ram_in[:,50:51], ram_in[:,60:61],ram_in[:,64:65]))
        with tf.variable_scope("action_value"):
            out = layers.fully_connected(out, num_outputs=256, activation_fn=tf.nn.relu)
            out = layers.fully_connected(out, num_outputs=128, activation_fn=tf.nn.relu)
            out = layers.fully_connected(out, num_outputs=64, activation_fn=tf.nn.relu)
            out = layers.fully_connected(out, num_outputs=num_actions, activation_fn=None)

        return out
def multilayer_perceptron(x):
    fc1 = layers.fully_connected(x, 256, activation_fn=tf.nn.relu, scope='fc1')
    fc2 = layers.fully_connected(fc1, 256, activation_fn=tf.nn.relu, scope='fc2')

    def _debug_print_func(fc1_val, fc2_val):
        print 'FC1 : {}, FC2 : {}'.format(fc1_val.shape, fc2_val.shape)
        return False
    debug_print_op = tf.py_func(_debug_print_func, [fc1, fc2], [tf.bool])
    with tf.control_dependencies(debug_print_op):
        out = layers.fully_connected(fc2, 10, activation_fn=None, scope='out')
    return out
Example #30
0
    def create_network(self, scope):
        with tf.variable_scope(scope, reuse=False):
            state_input = tf.placeholder('float', [None, 84, 84, 4])
            out = layers.convolution2d(state_input, num_outputs=32, kernel_size=8, stride=1, activation_fn=tf.nn.relu)
            out = layers.convolution2d(out, num_outputs=64, kernel_size=4, stride=2, activation_fn=tf.nn.relu)
            out = layers.convolution2d(out, num_outputs=64, kernel_size=3, stride=1, activation_fn=tf.nn.relu)

            conv_out = layers.flatten(out)
            value_out = layers.fully_connected(conv_out, num_outputs=256, activation_fn=tf.nn.relu)
            q_value = layers.fully_connected(value_out, num_outputs=self.action_dim, activation_fn=None)
            return state_input, q_value
Example #31
0
def classification_head (net, num_classes):
    net = flatten(net)
    net = fully_connected(net, 4096)
    net = fully_connected(net, 4096)
    net = fully_connected(net, num_classes, activation_fn=None)
    return net
Example #32
0
def model_function(features, targets, mode):

    # input layer
    # Reshape features to 4-D tensor (55000x28x28x1)
    # MNIST images are 28x28 pixels
    # batch size corresponds to number of images: -1 represents ' compute the # images automatically (55000)'
    # +1 represents the # channels. Here #channels =1 since grey image. For color image, #channels=3
    input_layer = tf.reshape(features, [-1, 28, 28, 1])

    # Computes 32 features using a 5x5 filter
    # Padding is added to preserve width
    # Input Tensor Shape: [batch_size,28,28,1]
    # Output Tensor Shape: [batch_size,28,28,32]
    conv1 = layers.conv2d(
        inputs=input_layer,
        num_outputs=32,
        kernel_size=[5, 5],
        stride=1,
        padding=
        "SAME",  # do so much padding such that the feature map is same size as input
        activation_fn=tf.nn.relu)

    # Pooling layer 1
    # Pooling layer ith a 2x2 filter and stride 2
    # Input shape: [batch_size,28,28,32]
    # Output shape: [batch_size,14,14,32]
    pool1 = layers.max_pool2d(inputs=conv1, kernel_size=[2, 2], stride=2)

    # Convolution layer 2
    # Input: 14 x 14 x 32 (32 channels here)
    # Output: 14 x 14 x 64  (32 features/patches fed to each perceptron; discovering 64 features)
    conv2 = layers.conv2d(
        inputs=pool1,
        num_outputs=64,
        kernel_size=[5, 5],
        stride=1,
        padding=
        "SAME",  # do so much padding such that the feature map is same size as input
        activation_fn=tf.nn.relu)

    # Pooling layer 2
    # Input: 14 x14 x 64
    # Output: 7 x 7 x 64
    pool2 = layers.max_pool2d(inputs=conv2, kernel_size=[2, 2], stride=2)

    # Flatten the pool2 to feed to the 1st layer of fully connected layers
    # Input size: [batch_size,7,7,64]
    # Output size: [batch_size, 7x7x64]
    pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])

    # Connected layers with 100, 20 neurons
    # Input shape: [batch_size, 7x7x64]
    # Output shape: [batch_size, 10]
    fclayers = layers.stack(
        pool2_flat,
        layers.fully_connected, [100, 20],
        activation_fn=tf.nn.relu,
        weights_regularizer=layers.l1_l2_regularizer(1.0, 2.0),
        weights_initializer=layers.xavier_initializer(uniform=True, seed=100))

    outputs = layers.fully_connected(
        inputs=fclayers,
        num_outputs=10,  # 10 perceptrons in output layer for 10 numbers (0 to 9)
        activation_fn=None
    )  # Use "None" as activation function specified in "softmax_cross_entropy" loss

    # Calculate loss using cross-entropy error; also use the 'softmax' activation function
    loss = losses.softmax_cross_entropy(outputs, targets)

    optimizer = layers.optimize_loss(
        loss=loss,
        global_step=tf.contrib.framework.get_global_step(),
        learning_rate=0.1,
        optimizer="SGD")

    # Class of output (i.e., predicted number) corresponds to the perceptron returning the highest fractional value
    # Returning both fractional values and corresponding labels
    probs = tf.nn.softmax(outputs)
    return {'probs': probs, 'labels': tf.argmax(probs, 1)}, loss, optimizer
def build_infer_graph(x,
                      batch_size,
                      vocab_size=VOCAB_SIZE,
                      embedding_size=32,
                      rnn_size=128,
                      num_layers=2,
                      p_keep=1.0):
    """
    builds inference graph
    """
    infer_args = {
        "batch_size": batch_size,
        "vocab_size": vocab_size,
        "embedding_size": embedding_size,
        "rnn_size": rnn_size,
        "num_layers": num_layers,
        "p_keep": p_keep
    }
    logger.debug("building inference graph: %s.", infer_args)

    # other placeholders
    p_keep = tf.placeholder_with_default(p_keep, [], "p_keep")
    batch_size = tf.placeholder_with_default(batch_size, [], "batch_size")

    # embedding layer
    embed_seq = layers.embed_sequence(x, vocab_size, embedding_size)
    # shape: [batch_size, seq_len, embedding_size]
    embed_seq = tf.nn.dropout(embed_seq, keep_prob=p_keep)
    # shape: [batch_size, seq_len, embedding_size]

    # RNN layers
    cells = [rnn.LSTMCell(rnn_size) for _ in range(num_layers)]
    cells = [
        rnn.DropoutWrapper(cell, output_keep_prob=p_keep) for cell in cells
    ]
    cells = rnn.MultiRNNCell(cells)
    input_state = cells.zero_state(batch_size, tf.float32)
    # shape: [num_layers, 2, batch_size, rnn_size]
    rnn_out, output_state = tf.nn.dynamic_rnn(cells,
                                              embed_seq,
                                              initial_state=input_state)
    # rnn_out shape: [batch_size, seq_len, rnn_size]
    # output_state shape: [num_layers, 2, batch_size, rnn_size]
    with tf.name_scope("lstm"):
        tf.summary.histogram("outputs", rnn_out)
        for c_state, h_state in output_state:
            tf.summary.histogram("c_state", c_state)
            tf.summary.histogram("h_state", h_state)

    # fully connected layer
    logits = layers.fully_connected(rnn_out, vocab_size, activation_fn=None)
    # shape: [batch_size, seq_len, vocab_size]

    # predictions
    with tf.name_scope("softmax"):
        probs = tf.nn.softmax(logits)
        # shape: [batch_size, seq_len, vocab_size]

    with tf.name_scope("sequence"):
        tf.summary.histogram("embeddings", embed_seq)
        tf.summary.histogram("logits", logits)

    model = {
        "logits": logits,
        "probs": probs,
        "input_state": input_state,
        "output_state": output_state,
        "p_keep": p_keep,
        "batch_size": batch_size,
        "infer_args": infer_args
    }
    return model
Example #34
0
bn_params = {
    'is_training': train_mode,
    'decay': 0.9,
    'updates_collections': None
}

# We can build short code using 'arg_scope' to avoid duplicate code
# same function with different arguments
with arg_scope([fully_connected],
               activation_fn=tf.nn.relu,
               weights_initializer=xavier_init,
               biases_initializer=None,
               normalizer_fn=batch_norm,
               normalizer_params=bn_params
               ):
    hidden_layer1 = fully_connected(X, hidden_output_size, scope="h1")
    h1_drop = dropout(hidden_layer1, keep_prob, is_training=train_mode)
    hidden_layer2 = fully_connected(h1_drop, hidden_output_size, scope="h2")
    h2_drop = dropout(hidden_layer2, keep_prob, is_training=train_mode)
    hidden_layer3 = fully_connected(h2_drop, hidden_output_size, scope="h3")
    h3_drop = dropout(hidden_layer3, keep_prob, is_training=train_mode)
    hidden_layer4 = fully_connected(h3_drop, hidden_output_size, scope="h4")
    h4_drop = dropout(hidden_layer4, keep_prob, is_training=train_mode)
    hypothesis = fully_connected(h4_drop, final_output_size, activation_fn=None, scope="hypothesis")


# define cost/loss & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
 def ModelFn():
     inputs = tf.constant(1.0, shape=(10, 20), dtype=tf.float32)
     reg = layers.l2_regularizer(0.001)
     layers.fully_connected(inputs, 30, weights_regularizer=reg)
Example #36
0
def generator(z, reuse=False):
    """
        generator
        Network to produce samples.
        params:
            z:  Input noise [batch size, latent dimension]
        returns:
            x_hat: Artificial image [batch size, 64, 64, 3]
    """
    batch_norm = layers.batch_norm

    outputs = []
    h = z
    with tf.variable_scope("generator", reuse=reuse) as scope:
        h = layers.fully_connected(inputs=h,
                                   num_outputs=4 * 4 * 1024,
                                   activation_fn=tf.nn.relu,
                                   normalizer_fn=batch_norm)
        h = tf.reshape(h, [-1, 4, 4, 1024])
        # [4,4,1024]

        h = layers.conv2d_transpose(inputs=h,
                                    num_outputs=512,
                                    kernel_size=4,
                                    stride=2,
                                    activation_fn=tf.nn.relu,
                                    normalizer_fn=batch_norm)
        # [8,8,512]

        h = layers.conv2d_transpose(inputs=h,
                                    num_outputs=256,
                                    kernel_size=4,
                                    stride=2,
                                    activation_fn=tf.nn.relu,
                                    normalizer_fn=batch_norm)

        # [16,16,256]

        h = layers.conv2d_transpose(inputs=h,
                                    num_outputs=128,
                                    kernel_size=4,
                                    stride=2,
                                    activation_fn=tf.nn.relu,
                                    normalizer_fn=batch_norm)

        # This is an extra conv layer like the WGAN folks.
        h = layers.conv2d(inputs=h,
                          num_outputs=128,
                          kernel_size=4,
                          stride=1,
                          activation_fn=tf.nn.relu,
                          normalizer_fn=batch_norm)

        # [32,32,128]

        x_hat = layers.conv2d_transpose(inputs=h,
                                        num_outputs=3,
                                        kernel_size=4,
                                        stride=2,
                                        activation_fn=tf.nn.sigmoid,
                                        biases_initializer=None)
        # [64,64,3]
        return x_hat
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None):
  """Deep Neural Net and Linear combined model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * linear_feature_columns: An iterable containing all the feature columns
          used by the Linear model.
      * linear_optimizer: string, `Optimizer` object, or callable that defines
          the optimizer to use for training the Linear model. Defaults to the
          Ftrl optimizer.
      * joint_linear_weights: If True a single (possibly partitioned) variable
          will be used to store the linear model weights. It's faster, but
          requires all columns are sparse and have the 'sum' combiner.
      * dnn_feature_columns: An iterable containing all the feature columns used
          by the DNN model.
      * dnn_optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training the DNN model. Defaults to the Adagrad
          optimizer.
      * dnn_hidden_units: List of hidden units per DNN layer.
      * dnn_activation_fn: Activation function applied to each DNN layer. If
          `None`, will use `tf.nn.relu`.
      * dnn_dropout: When not `None`, the probability we will drop out a given
          DNN coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * embedding_lr_multipliers: Optional. A dictionary from
          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
          multiply with learning rate for the embedding variables.
      * input_layer_min_slice_size: Optional. The min slice size of input layer
          partitions. If not provided, will use the default of 64M.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    `ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time.
  """
  head = params["head"]
  linear_feature_columns = params.get("linear_feature_columns")
  linear_optimizer = params.get("linear_optimizer") or "Ftrl"
  joint_linear_weights = params.get("joint_linear_weights")
  dnn_feature_columns = params.get("dnn_feature_columns")
  dnn_optimizer = params.get("dnn_optimizer") or "Adagrad"
  dnn_hidden_units = params.get("dnn_hidden_units")
  dnn_activation_fn = params.get("dnn_activation_fn") or nn.relu
  dnn_dropout = params.get("dnn_dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  input_layer_min_slice_size = (
      params.get("input_layer_min_slice_size") or 64 << 20)
  num_ps_replicas = config.num_ps_replicas if config else 0
  embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})
  fix_global_step_increment_bug = params.get(
      "fix_global_step_increment_bug", True)

  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        "Either linear_feature_columns or dnn_feature_columns must be defined.")

  features = _get_feature_dict(features)

  # Build DNN Logits.
  dnn_parent_scope = "dnn"

  if not dnn_feature_columns:
    dnn_logits = None
  else:
    if not dnn_hidden_units:
      raise ValueError(
          "dnn_hidden_units must be defined when dnn_feature_columns is "
          "specified.")
    dnn_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    with variable_scope.variable_scope(
        dnn_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner):
      input_layer_partitioner = (
          partitioned_variables.min_max_variable_partitioner(
              max_partitions=num_ps_replicas,
              min_slice_size=input_layer_min_slice_size))
      with variable_scope.variable_scope(
          "input_from_feature_columns",
          values=tuple(six.itervalues(features)),
          partitioner=input_layer_partitioner) as dnn_input_scope:
        net = layers.input_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=dnn_feature_columns,
            weight_collections=[dnn_parent_scope],
            scope=dnn_input_scope)

      for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
        with variable_scope.variable_scope(
            "hiddenlayer_%d" % layer_id,
            values=(net,)) as dnn_hidden_layer_scope:
          net = layers.fully_connected(
              net,
              num_hidden_units,
              activation_fn=dnn_activation_fn,
              variables_collections=[dnn_parent_scope],
              scope=dnn_hidden_layer_scope)
          if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
            net = layers.dropout(
                net,
                keep_prob=(1.0 - dnn_dropout))
        # TODO(b/31209633): Consider adding summary before dropout.
        _add_hidden_layer_summary(net, dnn_hidden_layer_scope.name)

      with variable_scope.variable_scope(
          "logits",
          values=(net,)) as dnn_logits_scope:
        dnn_logits = layers.fully_connected(
            net,
            head.logits_dimension,
            activation_fn=None,
            variables_collections=[dnn_parent_scope],
            scope=dnn_logits_scope)
      _add_hidden_layer_summary(dnn_logits, dnn_logits_scope.name)

  # Build Linear logits.
  linear_parent_scope = "linear"

  if not linear_feature_columns:
    linear_logits = None
  else:
    linear_partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas,
        min_slice_size=64 << 20)
    with variable_scope.variable_scope(
        linear_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=linear_partitioner) as scope:
      if joint_linear_weights:
        linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=linear_feature_columns,
            num_outputs=head.logits_dimension,
            weight_collections=[linear_parent_scope],
            scope=scope)
      else:
        linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=linear_feature_columns,
            num_outputs=head.logits_dimension,
            weight_collections=[linear_parent_scope],
            scope=scope)

  # Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

  def _make_training_op(training_loss):
    """Training op for the DNN linear combined model."""
    train_ops = []
    global_step = training_util.get_global_step()
    if dnn_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=global_step,
              learning_rate=_DNN_LEARNING_RATE,
              optimizer=_get_optimizer(dnn_optimizer),
              gradient_multipliers=_extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                  embedding_lr_multipliers, dnn_parent_scope,
                  dnn_input_scope.name),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(dnn_parent_scope),
              name=dnn_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[],
              increment_global_step=not fix_global_step_increment_bug))
    if linear_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=global_step,
              learning_rate=_linear_learning_rate(len(linear_feature_columns)),
              optimizer=_get_optimizer(linear_optimizer),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(linear_parent_scope),
              name=linear_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[],
              increment_global_step=not fix_global_step_increment_bug))

    train_op = control_flow_ops.group(*train_ops)
    if fix_global_step_increment_bug:
      with ops.control_dependencies([train_op]):
        with ops.colocate_with(global_step):
          return state_ops.assign_add(global_step, 1).op
    return train_op

  return head.create_model_fn_ops(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_make_training_op,
      logits=logits)
Example #38
0
import tensorflow as tf
from tensorflow.python.ops import array_ops

from tensorflow.python.framework import tensor_shape
#from tensorflow.python.ops import rnn_cell
from tensorflow.contrib import rnn as rnn_cell
#from tensorflow.python.ops.rnn_cell import RNNCell, GRUCell, MultiRNNCell, BasicRNNCell
from tensorflow.contrib.rnn import RNNCell, GRUCell, MultiRNNCell, BasicRNNCell
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.util import nest

from tensorflow.python.ops.math_ops import sigmoid, tanh
from tensorflow.contrib.layers import fully_connected


DEFAULT_FUNC = lambda x, n: fully_connected(x, n, activation_fn=None)
DEFAULT_RANK = 2

# WARREN HACK:  WHY NOT JUST PUT THE PROTECTED FUNCTION HERE
def _state_size_with_prefix(state_size, prefix=None):
  """Helper function that enables int or TensorShape shape specification.
  This function takes a size specification, which can be an integer or a
  TensorShape, and converts it into a list of integers. One may specify any
  additional dimensions that precede the final state size specification.
  Args:
    state_size: TensorShape or int that specifies the size of a tensor.
    prefix: optional additional list of dimensions to prepend.
  Returns:
    result_state_size: list of dimensions the resulting tensor size.
  """
  result_state_size = tensor_shape.as_shape(state_size).as_list()
Example #39
0
File: ops.py Project: mxxhcm/code
def dimension_reduction(inputs, num_units=256, scope="dimension_reduction", reuse=None):
    with tf.variable_scope(scope, reuse=reuse):
        out = layers.fully_connected(inputs, num_outputs=num_units * 4, activation_fn=tf.nn.relu)
        out = layers.fully_connected(out, num_outputs=num_units, activation_fn=tf.nn.relu)
        return out
num_hidden = 2
num_outputs = 30

learning_rate = 0.01
"""### Placeholder

Creating a placeholder for the data called X
"""

X = tf.placeholder(tf.float32, shape=[None, 30])
"""### Layers

Creating the hidden layer and the output layers using the [fully_connected](https://www.tensorflow.org/api_docs/python/tf/contrib/layers/fully_connected) function.
"""

hidden1 = fully_connected(X, num_hidden, activation_fn=None)
output = fully_connected(hidden1, num_outputs, activation_fn=None)
"""### Loss Function

Creating a Mean Squared Error loss function.
"""

loss = tf.reduce_mean(tf.square(output - X))
"""### Optimizer

Creating an AdamOptimizer designed to minimize the previous loss function.
"""

optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)
"""### Init
    def apply(self, is_train, context_embed, answer, context_mask=None):
        init_fn = get_keras_initialization(self.init)
        bool_mask = tf.sequence_mask(context_mask, tf.shape(context_embed)[1])

        with tf.variable_scope("predict"):
            m1, m2 = self.mapper.apply(is_train, context_embed, context_mask)

        if self.pre_process is not None:
            with tf.variable_scope("pre-process1"):
                m1 = self.pre_process.apply(is_train, m1, context_mask)
            with tf.variable_scope("pre-process2"):
                m2 = self.pre_process.apply(is_train, m2, context_mask)

        span_vector_lst = []
        mask_lst = []
        with tf.variable_scope("merge"):
            span_vector_lst.append(self.merge.apply(is_train, m1, m2))
        mask_lst.append(bool_mask)
        for i in range(1, self.bound):
            with tf.variable_scope("merge", reuse=True):
                span_vector_lst.append(
                    self.merge.apply(is_train, m1[:, :-i], m2[:, i:]))
            mask_lst.append(bool_mask[:, i:])

        mask = tf.concat(mask_lst, axis=1)
        span_vectors = tf.concat(
            span_vector_lst,
            axis=1)  # all logits -> flattened per-span predictions

        if self.post_process is not None:
            with tf.variable_scope("post-process"):
                span_vectors = self.post_process.apply(is_train, span_vectors)

        with tf.variable_scope("compute_logits"):
            logits = fully_connected(span_vectors,
                                     1,
                                     activation_fn=None,
                                     weights_initializer=init_fn)

        logits = tf.squeeze(logits, squeeze_dims=[2])
        logits = logits + VERY_NEGATIVE_NUMBER * (
            1 - tf.cast(tf.concat(mask, axis=1), tf.float32))

        l = tf.shape(context_embed)[1]

        if len(answer) == 1:
            answer = answer[0]
            if answer.dtype == tf.int32:
                if self.f1_weight == 0:
                    answer_ix = to_packed_coordinates(answer, l, self.bound)
                    loss = tf.reduce_mean(
                        tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits=logits, labels=answer_ix))
                else:
                    f1_mask = packed_span_f1_mask(answer, l, self.bound)
                    if self.f1_weight < 1:
                        f1_mask *= self.f1_weight
                        f1_mask += (1 - self.f1_weight) * tf.one_hot(
                            to_packed_coordinates(answer, l, self.bound), l)

                    # TODO can we stay in log space?  (actually its tricky since f1_mask can have zeros...)
                    probs = tf.nn.softmax(logits)
                    loss = -tf.reduce_mean(
                        tf.log(tf.reduce_sum(probs * f1_mask, axis=1)))
            else:
                log_norm = tf.reduce_logsumexp(logits, axis=1)
                if self.aggregate == "sum":
                    log_score = tf.reduce_logsumexp(
                        logits + VERY_NEGATIVE_NUMBER *
                        (1 - tf.cast(answer, tf.float32)),
                        axis=1)
                elif self.aggregate == "max":
                    log_score = tf.reduce_max(
                        logits + VERY_NEGATIVE_NUMBER *
                        (1 - tf.cast(answer, tf.float32)),
                        axis=1)
                else:
                    raise NotImplementedError()
                loss = tf.reduce_mean(-(log_score - log_norm))
        else:
            raise NotImplementedError()

        tf.add_to_collection(tf.GraphKeys.LOSSES, loss)
        return PackedSpanPrediction(logits, l, self.bound)
Example #42
0
import tensorflow as tf
n_steps = 28
n_inputs = 28
n_neurons = 150
n_outputs = 10
learning_rate = 0.001

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
# 一维输出
y = tf.placeholder(tf.int32, [None])
# 使用最简单的basicRNNcell
basic_cell = tf.contrib.rnn.BasicRNNCell(num_units=n_neurons)
#使用dynamic_rnn
outputs, states = tf.nn.dynamic_rnn(basic_cell, X, dtype=tf.float32)
# 原始输出
logits = fully_connected(states, n_outputs, activation_fn=None)
# 计算和真实的交叉熵
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                          logits=logits)
loss = tf.reduce_mean(xentropy)
# 使用AdamOptimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)
# 计算准确率,只有等于y才是对的,其他都错
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/")
# 转换到合理的输入shape
Example #43
0
    def logits_Discriminator(self, d_input, reuse=False):
        in_dims = d_input.get_shape().as_list()
        hi = d_input
        if len(in_dims) == 2:
            hi = tf.expand_dims(d_input, -1)
        elif len(in_dims) < 2 or len(in_dims) > 3:
            raise ValueError('Discriminator input must be 2-D or 3-D')

        with tf.variable_scope('Discriminator'):
            with tf.variable_scope(self.name) as scope:

                if reuse:
                    scope.reuse_variables()

                def disc_block(block_idx,
                               input_,
                               kwidth,
                               nfmaps,
                               bnorm,
                               activation,
                               pooling=2):
                    with tf.variable_scope('d_block_{}'.format(block_idx)):
                        bias_init = None
                        if self.bias_D_conv:
                            bias_init = tf.constant_initializer(0.)
                        downconv_init = \
                            tf.truncated_normal_initializer(stddev=0.02)

                        # downconvolution
                        hi_a = downconv(input_,
                                        nfmaps,
                                        kwidth=kwidth,
                                        pool=pooling,
                                        init=downconv_init,
                                        bias_init=bias_init)

                        # VBN

                        # activation
                        if activation == 'leakyrelu':
                            hi = leakyrelu(hi_a)
                        elif activation == 'relu':
                            hi = tf.nn.relu(hi_a)
                        else:
                            raise ValueError('Unrecognized activation {}'
                                             'in D'.format(activation))
                        return hi

                # [removed] apply input noisy layer to real and fake samples

                for block_idx, fmaps in enumerate(self.d_num_fmaps):
                    hi = disc_block(block_idx, hi, 31,
                                    self.d_num_fmaps[block_idx], True,
                                    'leakyrelu')
                if not reuse:
                    print('Discriminator deconved shape: ', hi.get_shape())
                #hi_f = flatten(hi)
                d_logit_out = conv1d(
                    hi,
                    kwidth=1,
                    num_kernels=1,
                    init=tf.truncated_normal_initializer(stddev=0.02),
                    name='logits_conv')
                d_logit_out = tf.squeeze(d_logit_out)
                d_logit_out = tf.expand_dims(d_logit_out, 1)
                d_logit_out = fully_connected(d_logit_out,
                                              1,
                                              activation_fn=None)

                if not reuse:
                    print('Discriminator output shape: ',
                          d_logit_out.get_shape())
                    print('*****************************')
                return d_logit_out
Example #44
0
def discriminator(wave_in, reuse=True):
    
      
    hi = wave_in
    
#    hi = tf.expand_dims(wave_in, -1)
        
#    batch_size = int(wave_in.get_shape()[0])


    # set up the disc_block function
   
    with tf.variable_scope('d_model') as scope:
        if reuse:
            scope.reuse_variables()

        def disc_block(block_idx, input_, kwidth, nfmaps, bnorm, activation, name, pooling=2):

                with tf.variable_scope('d_block_{}'.format(block_idx)):

                    if not reuse:
                        print('D block {} input shape: {}'
                              ''.format(block_idx, input_.get_shape()),
                              end=' *** ')

                    bias_init = None

                    if bias_D_conv:
                        if not reuse:
                            print('biasing D conv', end=' *** ')
                        bias_init = tf.constant_initializer(0.)

                    downconv_init = tf.truncated_normal_initializer(stddev=0.02)

##########################################
                    hi_a = downconv(input_, nfmaps, kwidth=kwidth, pool=pooling,
                                    init=downconv_init, bias_init=bias_init, name=name)
##########################################                    
                    
                    if not reuse:
                        print('downconved shape: {} '
                              ''.format(hi_a.get_shape()), end=' *** ')
                    
#                    if bnorm:
#                        if not reuse:
#                            print('Applying VBN', end=' *** ')
#                        hi_a = vbn(hi_a, 'd_vbn_{}'.format(block_idx))
                    
                    if activation == 'leakyrelu':
                        if not reuse:
                            print('Applying Lrelu', end=' *** ')
                        hi = leakyrelu(hi_a)
                    
                    elif activation == 'relu':
                        if not reuse:
                            print('Applying Relu', end=' *** ')
                        hi = tf.nn.relu(hi_a)
                    
                    else:
                        raise ValueError('Unrecognized activation {} '
                                         'in D'.format(activation))
                    return hi
                
                
       #%%         
#            beg_size = canvas_size
          
            # apply input noisy layer to real and fake samples
            
        hi = gaussian_noise_layer(hi, disc_noise_std)
            
        if not reuse:
            print('*** Discriminator summary ***')
            
        
        for block_idx, fmaps in enumerate(d_num_fmaps):
            
            hi = disc_block(block_idx, hi, 31, d_num_fmaps[block_idx], False, 'leakyrelu',
                            name='db_{}_{}'.format(block_idx,fmaps))
            
            if not reuse:
                print()
        
        if not reuse:
            print('discriminator deconved shape: ', hi.get_shape())
        
        hi_f = flatten(hi)  #keeps batch size, flatten everything else
        
        #hi_f = tf.nn.dropout(hi_f, self.keep_prob_var)
        
        d_logit_out = conv1d(hi, kwidth=1, num_kernels=1,
                             init=tf.truncated_normal_initializer(stddev=0.02),
                             name='logits_conv')
        
        d_logit_out = tf.squeeze(d_logit_out)  #removes dimensions of 1
        
        # all logits connected to 1 single neuron for binary classification
        d_logit_out = fully_connected(d_logit_out, 1, activation_fn=None)
        
        if not reuse:
            print('discriminator output shape: ', d_logit_out.get_shape())
            print('*****************************')
            
            
        return d_logit_out    
    def __init__(self):
        self.session = tf.Session()

        # input/output placeholders
        self.inputs_articles = tf.placeholder(tf.float32,
                                              (None, 200, INPUT_SIZE),
                                              name='input_articles')
        self.inputs_headlines = tf.placeholder(tf.float32,
                                               (None, 30, INPUT_SIZE),
                                               name='inputs_headlines')
        self.outputs = tf.placeholder(
            tf.float32, (None, OUTPUT_SIZE),
            name='outputs')  # TODO change to two dimensions

        # LSTM cells, TODO make these bidrectional!
        with tf.variable_scope('scope1') as scope1:
            # Create cell
            self.cell_articles = tf.contrib.rnn.BasicLSTMCell(
                RNN_HIDDEN, state_is_tuple=True)
            self.cell_articles = tf.contrib.rnn.core_rnn_cell.DropoutWrapper(
                self.cell_articles, input_keep_prob=0.7, output_keep_prob=0.2)
            # Initialize batch size, initial states
            batch_size_articles = tf.shape(self.inputs_articles)[0]
            initial_state_articles = self.cell_articles.zero_state(
                batch_size_articles, tf.float32)
            # Hidden states, outputs
            self.rnn_outputs_articles, self.rnn_states_articles = tf.nn.dynamic_rnn(
                self.cell_articles,
                self.inputs_articles,
                initial_state=initial_state_articles,
                time_major=False)
        with tf.variable_scope('scope1') as scope1:
            scope1.reuse_variables()
            # Create cell
            self.cell_headlines = tf.contrib.rnn.BasicLSTMCell(
                RNN_HIDDEN, state_is_tuple=True, reuse=True)
            self.cell_headlines = tf.contrib.rnn.core_rnn_cell.DropoutWrapper(
                self.cell_headlines, input_keep_prob=0.7, output_keep_prob=0.2)
            # Initialize batch size, initial states
            batch_size_headlines = tf.shape(self.inputs_headlines)[0]
            initial_state_headlines = self.rnn_states_articles
            # Hidden states, outputs
            self.rnn_outputs_headlines, self.rnn_states_headlines = tf.nn.dynamic_rnn(
                self.cell_headlines,
                self.inputs_headlines,
                initial_state=initial_state_headlines,
                time_major=False)

        # make prediction by taking LAST rnn_outputs_articles and rnn_outputs_headlines
        # TODO: Take padding out from output
        self.rnn_outputs = tf.concat([
            self.rnn_outputs_articles[:, -1, :],
            self.rnn_outputs_headlines[:, -1, :]
        ], 1)
        #self.rnn_outputs = tf.concat([self.rnn_outputs_articles, self.rnn_outputs_headlines], 1)
        predicted_outputs = layers.fully_connected(self.rnn_outputs,
                                                   num_outputs=OUTPUT_SIZE,
                                                   activation_fn=tf.nn.sigmoid)
        #final_projection = lambda x: layers.fully_connected(x, num_outputs=OUTPUT_SIZE, activation_fn=tf.nn.sigmoid)
        #self.softmaxes = tf.nn.softmax(predicted_outputs)
        #self.pred_stance = tf.argmax(self.softmaxes, 1)

        # cross entropy loss TODO compute cross entropy between softmax and expected output (a one-hot vector)
        # TODO: TF cross entropy function yooooo
        #self.error = -(self.outputs * tf.log(self.softmaxes + TINY) + (1.0 - self.outputs) * tf.log(1.0 - self.softmaxes + TINY))
        #self.error = tf.reduce_mean(self.error)

        # reformat self.outputs (labels)
        #outputs_reformat = tf.argmax(self.outputs, axis = 1)
        self.error = tf.nn.softmax_cross_entropy_with_logits(
            labels=self.outputs, logits=predicted_outputs)
        self.train_fn = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE,
                                               name='train_fn').minimize(
                                                   self.error)
'''
multi_layer_cell = tf.contrib.rnn.MultiRNNCell([
    tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons) for _ in range(n_layers)
])
multi_layer_cell_reversed = tf.contrib.rnn.MultiRNNCell([
    tf.contrib.rnn.BasicLSTMCell(num_units=n_neurons) for _ in range(n_layers)
])

rnn_outputs, states = tf.nn.bidirectional_dynamic_rnn(
    multi_layer_cell, multi_layer_cell_reversed, X, dtype=tf.float32)
rnn_outputs_fw, rnn_outputs_bw = rnn_outputs
stacked_rnn_outputs_fw = tf.reshape(rnn_outputs_fw, [-1, n_neurons])
stacked_rnn_outputs_bw = tf.reshape(rnn_outputs_bw, [-1, n_neurons])
stacked_rnn_outputs = tf.add(stacked_rnn_outputs_fw, stacked_rnn_outputs_bw)
stacked_outputs = fully_connected(stacked_rnn_outputs,
                                  n_outputs,
                                  activation_fn=None)
# stacked_outputs = fully_connected(stacked_rnn_outputs,n_outputs) #This will force the output to be positive numbers
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])

learning_rate = 0.001
#loss = tf.nn.l2_loss(tf.reduce_mean(tf.square(outputs-y))+ sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)))
loss = tf.add_n([tf.reduce_mean(tf.square(outputs - y))] +
                tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
training_op = optimizer.minimize(loss)

init = tf.global_variables_initializer()

n_iterations = 10000
Example #47
0
    def alexnet_v2(inputs,
                   is_training=True,
                   emb_size=4096,
                   dropout_keep_prob=0.5,
                   scope='alexnet_v2'):

        inputs = tf.cast(inputs, tf.float32)
        if new_shape is not None:
            shape = new_shape
            inputs = tf.image.resize_images(
                inputs,
                tf.constant(new_shape[:2]),
                method=tf.image.ResizeMethod.BILINEAR)
        else:
            shape = img_shape
        if is_training and augmentation_function is not None:
            inputs = augmentation_function(inputs, shape)
        if image_summary:
            tf.summary.image('Inputs', inputs, max_outputs=3)

        net = inputs
        mean = tf.reduce_mean(net, [1, 2], True)
        std = tf.reduce_mean(tf.square(net - mean), [1, 2], True)
        net = (net - mean) / (std + 1e-5)
        inputs = net

        with variable_scope.variable_scope(scope, 'alexnet_v2',
                                           [inputs]) as sc:
            end_points_collection = sc.original_name_scope + '_end_points'

            # Collect outputs for conv2d, fully_connected and max_pool2d.
            with arg_scope([
                    layers.conv2d, layers_lib.fully_connected,
                    layers_lib.max_pool2d
            ],
                           outputs_collections=[end_points_collection]):
                net = layers.conv2d(inputs,
                                    64, [11, 11],
                                    4,
                                    padding='VALID',
                                    scope='conv1')
                net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1')
                net = layers.conv2d(net, 192, [5, 5], scope='conv2')
                net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2')
                net = layers.conv2d(net, 384, [3, 3], scope='conv3')
                net = layers.conv2d(net, 384, [3, 3], scope='conv4')
                net = layers.conv2d(net, 256, [3, 3], scope='conv5')
                net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5')

                net = slim.flatten(net, scope='flatten')

                # Use conv2d instead of fully_connected layers.
                with arg_scope(
                    [slim.fully_connected],
                        weights_initializer=trunc_normal(0.005),
                        biases_initializer=init_ops.constant_initializer(0.1)):
                    net = layers.fully_connected(net, 4096, scope='fc6')
                    net = layers_lib.dropout(net,
                                             dropout_keep_prob,
                                             is_training=is_training,
                                             scope='dropout6')
                    net = layers.fully_connected(net, emb_size, scope='fc7')

        return net
def inference(images, size,labels, training_nn, training_class, _reuse):
  #
  #
  batch_norm_decay = 0.9
  batch_norm_epsilon = 1e-5
  batch_norm_scale = True
  batch_norm_params = {
    'is_training': training_nn,
    'decay': batch_norm_decay,
    'epsilon': batch_norm_epsilon,
    'scale': batch_norm_scale,
    'updates_collections': None, #
  } 
  with arg_scope( [layers.conv2d],
             kernel_size = 3,
             weights_initializer = tf.random_normal_initializer(stddev=0.02),
             biases_initializer  = tf.constant_initializer(0.0),
             activation_fn=tf.nn.elu, 
             normalizer_fn=layers.batch_norm,
             normalizer_params=batch_norm_params,
             trainable = training_nn,
             reuse=_reuse,
             padding='SAME',
             stride=1):   


    conv0 = layers.conv2d(images,num_outputs = 64, scope='SecondAMIN/conv0')
    with tf.name_scope('convBlock-1') as scope:
          conv1  = layers.conv2d(conv0,num_outputs = 128, scope='SecondAMIN/conv1')
          bconv1 = layers.conv2d(conv1,num_outputs = 196, scope='SecondAMIN/bconv1')
          conv2  = layers.conv2d(bconv1, num_outputs = 128, scope='SecondAMIN/conv2')
          pool1  = layers.max_pool2d(conv2, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='SecondAMIN/pool1')
          _activation_summary(conv1)
          _activation_summary(bconv1)
          _activation_summary(conv2)

    with tf.name_scope('convBlock-2') as scope:
          conv3  = layers.conv2d(pool1, num_outputs = 128, scope='SecondAMIN/conv3')
          bconv2 = layers.conv2d(conv3, num_outputs = 196, scope='SecondAMIN/bconv2')
          conv4  = layers.conv2d(bconv2, num_outputs = 128, scope='SecondAMIN/conv4')
          pool2  = layers.max_pool2d(conv4, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='SecondAMIN/pool2')
          _activation_summary(conv3)
          _activation_summary(bconv2)
          _activation_summary(conv4)

    with tf.name_scope('convBlock-3') as scope:
          conv5  = layers.conv2d(pool2, num_outputs = 128, scope='SecondAMIN/conv5')
          bconv3 = layers.conv2d(conv5, num_outputs = 196, scope='SecondAMIN/bconv3')
          conv6  = layers.conv2d(bconv3, num_outputs = 128, scope='SecondAMIN/conv6')
          pool3  = layers.avg_pool2d(conv6, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='SecondAMIN/pool3')
          _activation_summary(conv5)
          _activation_summary(bconv3)
          _activation_summary(conv6)

    map1 = tf.image.resize_images(pool1,[32,32])
    map2 = tf.image.resize_images(pool2,[32,32])
    map3 = tf.image.resize_images(pool3,[32,32])
      
    summap = tf.concat([map1, map2, map3],3)
          
    # 
    with tf.name_scope('Depth-Map-Block') as scope:
      conv7 = layers.conv2d(summap, num_outputs = 128, scope='SecondAMIN/conv7')
      dp1 = tf.layers.dropout(conv7,rate = 0.2, training = training_nn, name = 'SecondAMIN/dropout1')
      conv8 = layers.conv2d(dp1, num_outputs = 64, scope='SecondAMIN/conv8')
      _activation_summary(conv7)
      _activation_summary(conv8)
  

  with arg_scope( [layers.conv2d],
             kernel_size = 3,
             weights_initializer = tf.random_normal_initializer(stddev=0.02),
             biases_initializer  = tf.constant_initializer(0.0),
             activation_fn= None, 
             normalizer_fn= None,
             padding='SAME',
                     trainable = training_nn,
             reuse=_reuse,
             stride=1):   
    # 
    conv11 = layers.conv2d(conv8, num_outputs = 1, scope='SecondAMIN/conv11')
    _activation_summary(conv11)
    tf.summary.image('depthMap_Second', conv11, max_outputs=FLAGS.batch_size)  





  
    
  with arg_scope( [layers.conv2d],
             kernel_size = 3,
             weights_initializer = tf.random_normal_initializer(stddev=0.02),
             biases_initializer  = tf.constant_initializer(0.0),
             activation_fn=tf.nn.elu, 
             normalizer_fn=layers.batch_norm,
             normalizer_params=batch_norm_params,
             trainable = training_nn,
             reuse=_reuse,
             padding='SAME',
             stride=1):   
 


 
    conv0_fir = layers.conv2d(images,num_outputs = 24, scope='FirstAMIN/conv0') #
    pool1_fir  = layers.max_pool2d(conv0_fir, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='FirstAMIN/pool1')
    with tf.name_scope('convBlock-1_fir') as scope:
          conv1_fir  = layers.conv2d(pool1_fir,num_outputs = 20, scope='FirstAMIN/conv1')#
          bconv1_fir = layers.conv2d(conv1_fir,num_outputs = 25, scope='FirstAMIN/bconv1')#
          conv2_fir  = layers.conv2d(bconv1_fir, num_outputs = 20, scope='FirstAMIN/conv2')#
      

    with tf.name_scope('convBlock-2_fir') as scope:
            pool2_fir  = layers.max_pool2d(conv2_fir, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='FirstAMIN/pool2')
            conv3_fir  = layers.conv2d(pool2_fir, num_outputs = 20, scope='FirstAMIN/conv3')
            bconv2_fir = layers.conv2d(conv3_fir, num_outputs = 25, scope='FirstAMIN/bconv2')
            conv4_fir  = layers.conv2d(bconv2_fir, num_outputs = 20, scope='FirstAMIN/conv4')
      

    with tf.name_scope('convBlock-3_fir') as scope:
            pool3_fir  = layers.avg_pool2d(conv4_fir, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='FirstAMIN/pool3')
            conv5_fir  = layers.conv2d(pool3_fir, num_outputs = 20, scope='FirstAMIN/conv5')
            bconv3_fir = layers.conv2d(conv5_fir, num_outputs = 25, scope='FirstAMIN/bconv3')
            conv6_fir  = layers.conv2d(bconv3_fir, num_outputs = 20, scope='FirstAMIN/conv6')


    map1_fir = tf.image.resize_images(conv2_fir,[32,32])
    map2_fir = tf.image.resize_images(conv4_fir,[32,32])
    map3_fir = conv6_fir
    
    summap_fir = tf.concat([map1_fir, map2_fir, map3_fir],3)


    #
    with tf.name_scope('Depth-Map-Block_fir') as scope:
      conv7_fir = layers.conv2d(summap_fir, num_outputs = 28, scope='FirstAMIN/conv7')
      dp1_fir = tf.layers.dropout(conv7_fir,rate = 0, training = training_nn, name = 'FirstAMIN/dropout2')
      conv8_fir = layers.conv2d(dp1_fir, num_outputs =16 , scope='FirstAMIN/conv8')
     


  with arg_scope( [layers.conv2d],
             kernel_size = 3,
             weights_initializer = tf.random_normal_initializer(stddev=0.02),
             biases_initializer  = None, #

             activation_fn= None, 
             normalizer_fn= None,
             padding='SAME',
             reuse=_reuse,
             stride=1):   
    # 
    conv11_fir = layers.conv2d(conv8_fir, num_outputs = 1, scope='FirstAMIN/conv11')
    tf.summary.image('ZeroOneMap', tf.cast(256*conv11_fir,tf.uint8), max_outputs=FLAGS.batch_size)  
  
    
  with arg_scope( [layers.conv2d],
             kernel_size = 3,
             weights_initializer = tf.random_normal_initializer(stddev=0.02),
             biases_initializer  = tf.constant_initializer(0.0),
             activation_fn=tf.nn.elu, 
             normalizer_fn=layers.batch_norm,
             normalizer_params=batch_norm_params,
             trainable = training_nn,
             padding='SAME',
             reuse=_reuse,
             stride=1):   


    #
    with tf.name_scope('Score-Map-Block09') as scope:
      summap_fir = tf.image.resize_images(summap_fir,[256,256])
      conv9_fir = layers.conv2d(summap_fir, num_outputs = 28, scope='FirstAMIN/conv9')
      conv10_fir = layers.conv2d(conv9_fir, num_outputs = 24, scope='FirstAMIN/conv10')
      #

      conv12_fir = layers.conv2d(conv10_fir, num_outputs = 20, scope='FirstAMIN/conv12')
      conv13_fir = layers.conv2d(conv12_fir, num_outputs = 20, scope='FirstAMIN/conv13')
      #
      conv14_fir = layers.conv2d(conv13_fir, num_outputs = 20, scope='FirstAMIN/conv14')
      conv15_fir = layers.conv2d(conv14_fir, num_outputs = 16, scope='FirstAMIN/conv15')
      #
      conv16_fir = layers.conv2d(conv15_fir, num_outputs = 16, scope='FirstAMIN/conv16')



  with arg_scope( [layers.conv2d],
             kernel_size = 3,
             weights_initializer = tf.random_normal_initializer(stddev=0.002),
             biases_initializer  = None, #tf.constant_initializer(0.0),

             activation_fn= None, 
             normalizer_fn= None,
             padding='SAME',
             reuse=_reuse,
             stride=1): 
      conv17 = layers.conv2d(conv16_fir, num_outputs = 6, scope='FirstAMIN/conv17')
      
      thirdPart_comp_1 = tf.complex(conv17, tf.zeros_like(conv17))
      thirdPart_comp_1=tf.transpose(thirdPart_comp_1, perm=[0,3,1,2])

      thirdPart_fft_1=tf.abs(tf.fft2d(thirdPart_comp_1, name='summap_fft_real_1'))
      thirdPart_fft_1=tf.transpose(thirdPart_fft_1, perm=[0,2,3,1])
      thirdPart_fft_1=tf.log1p(thirdPart_fft_1[:,32:256-32,32:256-32,:])




      #
      Live_est1= images-conv17/45  
      Live_est_mask = tf.cast(tf.greater(Live_est1,0),tf.float32)                             
      Live_est=Live_est1*Live_est_mask
      #



#################################################################################################################################

  with arg_scope( [layers.conv2d],
             kernel_size = 3,
             weights_initializer = tf.random_normal_initializer(stddev=0.02),
             biases_initializer  = tf.constant_initializer(0.0),
             activation_fn=tf.nn.elu, 
             normalizer_fn=layers.batch_norm,
             normalizer_params=batch_norm_params,
             trainable = training_nn,
             padding='SAME',
             reuse=_reuse,
             stride=1):   
 

    # Score Map Branch
    with tf.name_scope('Score-Map-Block1_dis') as scope:
     
      conv9_dis = layers.conv2d(Live_est, num_outputs = 24, scope='ThirdAMIN/conv9')
      conv10_dis = layers.conv2d(conv9_dis, num_outputs = 20, scope='ThirdAMIN/conv10')
      pool1_dis  = layers.max_pool2d(conv10_dis, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='ThirdPool1')

      conv12_dis = layers.conv2d(pool1_dis, num_outputs = 20, scope='ThirdAMIN/conv12')
      conv13_dis = layers.conv2d(conv12_dis, num_outputs = 16, scope='ThirdAMIN/conv13')
      pool2_dis  = layers.max_pool2d(conv13_dis, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='ThirdPool2')

      conv14_dis = layers.conv2d(pool2_dis, num_outputs = 12, scope='ThirdAMIN/conv14')
      conv15_dis = layers.conv2d(conv14_dis, num_outputs = 6, scope='ThirdAMIN/conv15')
      pool3_dis  = layers.max_pool2d(conv15_dis, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='ThirdPool3')

      conv16_dis = layers.conv2d(pool3_dis, num_outputs = 1, scope='ThirdAMIN/conv16')


      conv20_dis=tf.reshape(conv16_dis, [6,32*32])
      sc333_dis  = layers.fully_connected(conv20_dis, num_outputs = 100, reuse=_reuse, scope='ThirdAMIN/bconv15_sc333_dis')

      dp1_dis = tf.layers.dropout(sc333_dis,rate = 0.2, training = training_nn, name = 'dropout3')
      
      sc  = layers.fully_connected(dp1_dis, num_outputs = 2, reuse=_reuse,
             weights_initializer = tf.random_normal_initializer(stddev=0.02),
             biases_initializer  = None, #tf.constant_initializer(0.0),

             activation_fn= None, 
             normalizer_fn= None,scope='ThirdAMIN/bconv10_sc')


      conv9_dis2 = layers.conv2d(images, num_outputs = 24, reuse= True, scope='ThirdAMIN/conv9')
      conv10_dis2 = layers.conv2d(conv9_dis2, num_outputs = 20,  reuse= True, scope='ThirdAMIN/conv10')
      pool1_dis2  = layers.max_pool2d(conv10_dis2, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='ThirdPool1')

      conv12_dis2 = layers.conv2d(pool1_dis2, num_outputs = 20,reuse= True, scope='ThirdAMIN/conv12')
      conv13_dis2 = layers.conv2d(conv12_dis2, num_outputs = 16, reuse= True,    scope='ThirdAMIN/conv13')
      pool2_dis2  = layers.max_pool2d(conv13_dis2, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='ThirdPool2')

      conv14_dis2 = layers.conv2d(pool2_dis2, num_outputs = 12,  reuse= True, scope='ThirdAMIN/conv14')
      conv15_dis2 = layers.conv2d(conv14_dis2, num_outputs = 6,  reuse= True, scope='ThirdAMIN/conv15')
      pool3_dis2  = layers.max_pool2d(conv15_dis2, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='ThirdPool3')

      conv16_dis2 = layers.conv2d(pool3_dis2, num_outputs = 1,  reuse= True, scope='ThirdAMIN/conv16')


      conv20_dis2=tf.reshape(conv16_dis2, [6,32*32])
      sc333_dis2  = layers.fully_connected(conv20_dis2,  reuse= True, num_outputs = 100,scope='ThirdAMIN/bconv15_sc333_dis')

      dp1_dis2 = tf.layers.dropout(sc333_dis2,rate = 0.2, training = training_nn, name = 'dropout4')
      
      sc2  = layers.fully_connected(dp1_dis2, num_outputs = 2,  reuse= True, 
             weights_initializer = tf.random_normal_initializer(stddev=0.02),
             biases_initializer  = None, #tf.constant_initializer(0.0),

             activation_fn= None, 
             normalizer_fn= None,scope='ThirdAMIN/bconv10_sc')
##################################################################################################################################

  batch_norm_decay = 0.9
  batch_norm_epsilon = 1e-5
  batch_norm_scale = True
  batch_norm_params = { 
    'is_training': False,
    'decay': batch_norm_decay,
    'epsilon': batch_norm_epsilon,
    'scale': batch_norm_scale,
    'updates_collections': None, #
    'trainable':False,
    #'reuse':True
  } 
  with arg_scope( [layers.conv2d],
             kernel_size = 3,
             weights_initializer = tf.random_normal_initializer(stddev=0.02),
             biases_initializer  = tf.constant_initializer(0.0),
             activation_fn=tf.nn.elu, 
             normalizer_fn=layers.batch_norm,
             normalizer_params=batch_norm_params,
             trainable = False,
             padding='SAME',
             reuse=True,
             stride=1): 
 #################################################################################################################################

    conv0_new = layers.conv2d(Live_est,num_outputs = 64, scope='SecondAMIN/conv0')
    with tf.name_scope('convBlock-1_new') as scope:
          conv1_new  = layers.conv2d(conv0_new,num_outputs = 128, scope='SecondAMIN/conv1')
          bconv1_new = layers.conv2d(conv1_new,num_outputs = 196, scope='SecondAMIN/bconv1')
          conv2_new  = layers.conv2d(bconv1_new, num_outputs = 128, scope='SecondAMIN/conv2')
          pool1_new  = layers.max_pool2d(conv2_new, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='SecondAMIN/pool1')

    with tf.name_scope('convBlock-2_new') as scope:
          conv3_new  = layers.conv2d(pool1_new, num_outputs = 128, scope='SecondAMIN/conv3')
          bconv2_new = layers.conv2d(conv3_new, num_outputs = 196, scope='SecondAMIN/bconv2')
          conv4_new  = layers.conv2d(bconv2_new, num_outputs = 128, scope='SecondAMIN/conv4')
          pool2_new  = layers.max_pool2d(conv4_new, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='SecondAMIN/pool2')

    with tf.name_scope('convBlock-3_new') as scope:
          conv5_new  = layers.conv2d(pool2_new, num_outputs = 128, scope='SecondAMIN/conv5')
          bconv3_new = layers.conv2d(conv5_new, num_outputs = 196, scope='SecondAMIN/bconv3')
          conv6_new  = layers.conv2d(bconv3_new, num_outputs = 128, scope='SecondAMIN/conv6')
          pool3_new  = layers.avg_pool2d(conv6_new, kernel_size=[3, 3], stride=[2, 2], padding='SAME', scope='SecondAMIN/pool3')

    map1_new = tf.image.resize_images(pool1_new,[32,32])
    map2_new = tf.image.resize_images(pool2_new,[32,32])
    map3_new = tf.image.resize_images(pool3_new,[32,32])
      
    summap_new = tf.concat([map1_new, map2_new, map3_new],3)
          
    # Depth Map Branch
    with tf.name_scope('Depth-Map-Block_new') as scope:
      conv7_new = layers.conv2d(summap_new, num_outputs = 128, scope='SecondAMIN/conv7')
      dp1_new = tf.layers.dropout(conv7_new,rate = 0.2, training = training_nn, name = 'SecondAMIN/dropout1')
      conv8_new = layers.conv2d(dp1_new, num_outputs = 64, scope='SecondAMIN/conv8')
  

  with arg_scope( [layers.conv2d],
             kernel_size = 3,
             weights_initializer = tf.random_normal_initializer(stddev=0.02),
             biases_initializer  = tf.constant_initializer(0.0),
             activation_fn= None, 
             normalizer_fn= None,
             padding='SAME',
                     trainable = False,
             reuse=True,
             stride=1):   
    # Depth Map Branch
    conv11_new = layers.conv2d(conv8_new, num_outputs = 1, scope='SecondAMIN/conv11')






    label_Amin1=size
    LabelsWholeImage=tf.cast(np.ones([6,32,32,1]), tf.float32)
    LabelsWholeImage2=LabelsWholeImage*tf.reshape(tf.cast(1-label_Amin1,tf.float32),[6,1,1,1])
    LabelsWholeImage=labels*tf.reshape(tf.cast(label_Amin1,tf.float32),[6,1,1,1])

    Z_GT2=np.zeros([6,3,3,1])
    Z_GT2[:,1,1,:]=1
    GT2=tf.cast(Z_GT2, tf.float32)


    tf.summary.image('GT2', LabelsWholeImage[:,:,:,0:1], max_outputs=FLAGS.batch_size) 
    tf.summary.image('SC', tf.cast(256*conv11[:,:,:,0:1],tf.uint8), max_outputs=FLAGS.batch_size) 

    tf.summary.image('Live_SC', tf.cast(256*conv11_new[:,:,:,0:1],tf.uint8), max_outputs=FLAGS.batch_size) 
    tf.summary.image('Live', tf.cast(256*Live_est[:,:,:,3:6],tf.uint8), max_outputs=FLAGS.batch_size) 
    tf.summary.image('inputImage', tf.cast(256*images[:,:,:,3:6],tf.uint8), max_outputs=FLAGS.batch_size) 
    tf.summary.image('GT3_Artifact', LabelsWholeImage2[:,:,:,0:1], max_outputs=FLAGS.batch_size) 
    tf.summary.image('Artifact', conv17[:,:,:,3:6], max_outputs=FLAGS.batch_size)
    return Live_est, conv17, conv11, GT2,conv17,images,thirdPart_fft_1,LabelsWholeImage, conv11_new,conv11_new  , LabelsWholeImage2, sc, sc2, conv11_fir
Example #49
0
        if activation == "relu":
            return tf.nn.relu(z)
        else:
            return z


with tf.name_scope("dnn"):
    hidden1 = neuron_layer(X, n_hidden1, "hidden1", activation="relu")
    hidden2 = neuron_layer(hidden1, n_hidden2, "hidden2", activation="relu")
    # 进入到softmax之前的结果
    logits = neuron_layer(hidden2, n_outputs, "outputs")
'''

with tf.name_scope("dnn"):
    # tensorflow使用这个函数帮助我们使用合适的初始化w和b的策略,默认使用ReLU激活函数
    hidden1 = fully_connected(X, n_hidden1, scope="hidden1")
    hidden2 = fully_connected(hidden1, n_hidden2, scope="hidden2")
    logits = fully_connected(hidden2, n_outputs, scope="outputs", activation_fn=None)

with tf.name_scope("loss"):
    # 定义交叉熵损失函数,并且求个样本平均
    # 函数等价于先使用softmax损失函数,再接着计算交叉熵,并且更有效率
    # 类似的softmax_cross_entropy_with_logits只会给one-hot编码,我们使用的会给0-9分类号
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

learning_rate = 0.01

with tf.name_scope("train"):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)
Example #50
0
n_hidden1 = 300
n_hidden2 = 150
n_hidden3 = 300
n_outputs = n_inputs

learning_rate = 0.001  # 定义梯度优化学习率参数
l2_reg = 0.01

# 开始构建网络模型: 基于mlp的自编码模型
x = tf.placeholder(tf.float32, shape=(None, n_inputs), name="input_tensor")
with tf.contrib.framework.arg_scope(
    [fully_connected],
        activation_fn=tf.nn.elu,
        weights_initializer=tf.contrib.layers.variance_scaling_initializer(),
        weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg)):
    hidden1 = fully_connected(x, n_hidden1)
    hidden2 = fully_connected(hidden1, n_hidden2)
    hidden3 = fully_connected(hidden2, n_hidden3)
    outputs = fully_connected(hidden3, n_outputs, activation_fn=None)

reconstruction_loss = tf.reduce_mean(tf.square(outputs -
                                               x))  # loss function MSE
reg_losses = tf.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)
loss = tf.add_n([reconstruction_loss] + reg_losses)

optimizer = tf.train.AdamOptimizer(learning_rate)
train_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()

# 开始构建训练的session过程
n_epochs = 5
def main(unused_argv):
    mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
    if FLAGS.download_only:
        sys.exit(0)


    # Sanity check on the number of workers and the worker index #
    if FLAGS.task_index >= FLAGS.num_workers:
        raise ValueError("Worker index %d exceeds number of workers %d " % 
                         (FLAGS.task_index, FLAGS.num_workers))

    # Sanity check on the number of parameter servers #
    if FLAGS.num_parameter_servers <= 0:
        raise ValueError("Invalid num_parameter_servers value: %d" % 
                         FLAGS.num_parameter_servers)

    ps_hosts = re.findall(r'[\w\.:]+', FLAGS.ps_hosts)
    worker_hosts = re.findall(r'[\w\.:]+', FLAGS.worker_hosts)
    server = tf.train.Server({"ps":ps_hosts,"worker":worker_hosts}, job_name=FLAGS.job_name, task_index=FLAGS.task_index)

    print("GRPC URL: %s" % server.target)
    print("Task index = %d" % FLAGS.task_index)
    print("Number of workers = %d" % FLAGS.num_workers)

    if FLAGS.job_name == "ps":
        server.join()
    else:
        is_chief = (FLAGS.task_index == 0)

    if FLAGS.sync_replicas:
        if FLAGS.replicas_to_aggregate is None:
            replicas_to_aggregate = FLAGS.num_workers
        else:
            replicas_to_aggregate = FLAGS.replicas_to_aggregate

    # Construct device setter object #
    device_setter = get_device_setter(FLAGS.num_parameter_servers,
                                      FLAGS.num_workers)

    # The device setter will automatically place Variables ops on separate        #
    # parameter servers (ps). The non-Variable ops will be placed on the workers. #
    with tf.device(device_setter):
        global_step = tf.Variable(0, name="global_step", trainable=False)
        with tf.name_scope('input'):
            # input #
            x = tf.placeholder(tf.float32, shape=[None, 784], name="x-input")
            x_image = tf.reshape(x, [-1,28,28,1])
            # label, 10 output classes #
            y_ = tf.placeholder(tf.float32, shape=[None, 10], name="y-input")
            prob = tf.placeholder(tf.float32, name='keep_prob')

        stack1_conv1 = layers.convolution2d(x_image,
                                            64,
                                            [3,3],
                                            weights_regularizer=layers.l2_regularizer(0.1),
                                            biases_regularizer=layers.l2_regularizer(0.1),
                                            scope='stack1_Conv1')
        stack1_conv2 = layers.convolution2d(stack1_conv1,
                                            64,
                                            [3,3],
                                            weights_regularizer=layers.l2_regularizer(0.1),
                                            biases_regularizer=layers.l2_regularizer(0.1),
                                            scope='stack1_Conv2')
        stack1_pool = layers.max_pool2d(stack1_conv2,
                                        [2,2],
                                        padding='SAME',
                                        scope='stack1_Pool')
        stack3_pool_flat = layers.flatten(stack1_pool, scope='stack3_pool_flat')
        fcl1 = layers.fully_connected(stack3_pool_flat, 
                                      512, 
                                      weights_regularizer=layers.l2_regularizer(0.1), 
                                      biases_regularizer=layers.l2_regularizer(0.1), 
                                      scope='FCL1')
        fcl1_d = layers.dropout(fcl1, keep_prob=prob, scope='dropout1')
        fcl2 = layers.fully_connected(fcl1_d, 
                                      128, 
                                      weights_regularizer=layers.l2_regularizer(0.1), 
                                      biases_regularizer=layers.l2_regularizer(0.1), 
                                      scope='FCL2')
        fcl2_d = layers.dropout(fcl2, keep_prob=prob, scope='dropout2')
        y, cross_entropy = skflow.models.logistic_regression(fcl2_d, y_, init_stddev=0.01)
        tf.scalar_summary('cross_entropy', cross_entropy)

        with tf.name_scope('train'):
            start_l_rate = 0.001
            decay_step = 1000
            decay_rate = 0.5
            learning_rate = tf.train.exponential_decay(start_l_rate, global_step, decay_step, decay_rate, staircase=False)
            grad_op = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
            '''rep_op = tf.train.SyncReplicasOptimizer(grad_op, 
                                                    replicas_to_aggregate=len(workers),
                                                    replica_id=FLAGS.task_index, 
                                                    total_num_replicas=len(workers)) # also belong to the same class as other optimizers'''
            train_op = tf.contrib.layers.optimize_loss(loss=cross_entropy, 
                                                       global_step=global_step, 
                                                       learning_rate=0.001, 
                                                       optimizer=grad_op, 
                                                       clip_gradients=1)
            tf.scalar_summary('learning_rate', learning_rate)

        '''if FLAGS.sync_replicas and is_chief:
            # Initial token and chief queue runners required by the sync_replicas mode #
            chief_queue_runner = opt.get_chief_queue_runner()
            init_tokens_op = opt.get_init_tokens_op()'''

        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
            tf.scalar_summary('accuracy', accuracy)


        merged = tf.merge_all_summaries()
        init_op = tf.initialize_all_variables()
        sv = tf.train.Supervisor(is_chief=is_chief,
                                 init_op=init_op,
                                 recovery_wait_secs=1,
                                 global_step=global_step)

        sess_config = tf.ConfigProto(allow_soft_placement=True,
                                     log_device_placement=False,
                                     device_filters=["/job:ps", "/job:worker/task:%d" % FLAGS.task_index])

        # The chief worker (task_index==0) session will prepare the session,   #
        # while the remaining workers will wait for the preparation to complete. #
        if is_chief:
            print("Worker %d: Initializing session..." % FLAGS.task_index)
        else:
            print("Worker %d: Waiting for session to be initialized..." % FLAGS.task_index)

        sess = sv.prepare_or_wait_for_session(server.target,
                                              config=sess_config)


        if tf.gfile.Exists('./summary/train'):
            tf.gfile.DeleteRecursively('./summary/train')
        tf.gfile.MakeDirs('./summary/train')

        train_writer = tf.train.SummaryWriter('./summary/train', sess.graph)
        print("Worker %d: Session initialization complete." % FLAGS.task_index)

        '''if FLAGS.sync_replicas and is_chief:
            # Chief worker will start the chief queue runner and call the init op #
            print("Starting chief queue runner and running init_tokens_op")
            sv.start_queue_runners(sess, [chief_queue_runner])
            sess.run(init_tokens_op)'''

        ## Perform training ##
        time_begin = time.time()
        print("Training begins @ %s" % time.ctime(time_begin))

        local_step = 1
        while True:
            # Training feed #
            batch_xs, batch_ys = mnist.train.next_batch(FLAGS.batch_size)
            train_feed = {x: batch_xs,
                          y_: batch_ys,
                          prob: 0.8}
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
            _, step, loss, summary = sess.run([train_op, global_step, cross_entropy, merged], feed_dict=train_feed, options=run_options, run_metadata=run_metadata)

            now = time.time()
            if(local_step % 2 == 0):
                print("%s: Worker %d: training step %d done (global step: %d), loss: %.6f" %
                   (time.ctime(now), FLAGS.task_index, local_step, step+1, loss))
                train_writer.add_run_metadata(run_metadata, 'step'+str(step+1))
                train_writer.add_summary(summary, step+1)

            if step+1 >= FLAGS.train_steps:
              break
            local_step += 1

        time_end = time.time()
        print("Training ends @ %s" % time.ctime(time_end))
        training_time = time_end - time_begin
        print("Training elapsed time: %f s" % training_time)


        # memory issue occured, split testing data into batch #
        acc_acu = 0.
        for i in xrange(int(10000/1000)):
            test_x, test_y = mnist.test.next_batch(1000)
            acc_batch = sess.run(accuracy, feed_dict={x: test_x, y_: test_y, prob: 1.0})
            print(acc_batch)
            acc_acu += acc_batch
        acc = acc_acu/10.0
        print ("test accuracy %g" % acc)
        sv.stop()
Example #52
0

with tf.name_scope('dnn'):
    hidden1 = my_fully_connected(X, 300, 'hidden1', activation='relu')
    hidden2 = my_fully_connected(hidden1, 100, 'hidden2', activation='relu')
    logits = my_fully_connected(hidden2, 10, 'outputs')
"""

## USING TF.CONTRIB.LAYERS FULLY CONNECTED LAYER
from tensorflow.contrib.layers import fully_connected

##########################
### CONTRSUCTION PHASE ###
##########################
with tf.name_scope('dnn'):
    hidden1 = fully_connected(X, 300, scope='hidden1')
    hidden2 = fully_connected(hidden1, 100, scope='hidden2')
    logits = fully_connected(hidden2, 10, scope='outputs', activation_fn=None)

with tf.name_scope('loss'):
    xentropy = tf.nn.softmax_cross_entropy_with_logits(logits, y)
    loss = tf.reduce_mean(xentropy, name='loss')

lr = 1e-3
with tf.name_scope('train'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
    training_op = optimizer.minimize(loss)

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, tf.argmax(y, axis=1), 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    def __init__(self,
                 config: BaselineConfig,
                 is_training,
                 features,
                 init_embedding=None):
        """Constructor for BertModel.

        Args:
          config: `BertConfig` instance.
          is_training: bool. rue for training model, false for eval model. Controls
            whether dropout will be applied.
          input_ids: int64 Tensor of shape [batch_size, seq_length, feat_size].
          label_ids: (optional) int64 Tensor of shape [batch_size, seq_length].
          seq_length: (optional) int64 Tensor of shape [batch_size].
          init_embedding: (optional)

        Raises:
          ValueError: The config is invalid or one of the input tensor shapes
            is invalid.
        """

        super(BaselineModel).__init__()
        input_ids = features["input_ids"]
        seq_length = features["seq_length"]
        label_ids = features["label_ids"]

        self.input_ids = input_ids
        self.label_ids = label_ids
        self.seq_length = seq_length
        self.is_training = is_training
        input_shape = model_utils.get_shape_list(input_ids, expected_rank=3)
        self.batch_size = input_shape[0]
        self.max_length = input_shape[1]
        self.window_size = input_shape[2]

        if not is_training:
            config.embedding_dropout_prob = 0.0
            config.hidden_dropout_prob = 0.0

        if init_embedding is None:
            self.embedding = tf.get_variable(
                shape=[config.vocab_size, config.embedding_size],
                dtype=tf.float32,
                name='embedding',
                initializer=tf.truncated_normal_initializer(stddev=0.02))
        else:
            self.embedding = tf.Variable(init_embedding,
                                         dtype=tf.float32,
                                         name='embedding')

        with tf.variable_scope('embedding'):
            x = tf.nn.embedding_lookup(self.embedding, self.input_ids)
            feat_size = self.window_size
            x = tf.reshape(
                x, [self.batch_size, -1, feat_size * config.embedding_size])

        x = model_utils.dropout(x, config.embedding_dropout_prob)

        def lstm_cell(dim):
            cell = tf.nn.rnn_cell.LSTMCell(dim, name='basic_lstm_cell')
            cell = rnn.DropoutWrapper(cell,
                                      output_keep_prob=1.0 -
                                      config.hidden_dropout_prob)
            cell = tf.nn.rnn_cell.MultiRNNCell([cell] *
                                               config.num_hidden_layers)
            return cell

        with tf.variable_scope('rnn'):
            (forward_output,
             backword_output), _ = tf.nn.bidirectional_dynamic_rnn(
                 cell_fw=lstm_cell(config.hidden_size),
                 cell_bw=lstm_cell(config.hidden_size),
                 inputs=x,
                 sequence_length=self.seq_length,
                 dtype=tf.float32)
            output = tf.concat([forward_output, backword_output], axis=2)

        with tf.variable_scope('output'):
            scores = layers.fully_connected(inputs=output,
                                            num_outputs=config.num_classes,
                                            activation_fn=None)
            transition_param = tf.get_variable(
                "transitions", [config.num_classes, config.num_classes])
            self.prediction, _ = crf.crf_decode(scores, transition_param,
                                                self.seq_length)

        with tf.variable_scope('loss'):
            # crf
            self.log_likelihood, _ = crf.crf_log_likelihood(
                scores, self.label_ids, self.seq_length, transition_param)

            self.loss = tf.reduce_mean(-self.log_likelihood)
    def __init__(self, args):
        """
        configure model parameters
        :param args: model configuration parameters
        :param mode: train or val
        """
        self._batch_size = args.batch_size
        self._feature_size = args.feature_size
        self._mode = args.mode

        # CNN parameters
        self._in_channel = 1
        self._in_width = args.patch_size  # context w
        self._in_height = args.patch_size  # context h
        self._target_size_cnn = args.target_size_cnn
        self._filter_size = args.filter_size
        self._dropout = args.dropout

        # RNN parameters
        self._discrete_steps = len(args.discrete_timestamps)
        self._target_size = self._discrete_steps * args.feature_size  # output size of fc layer after rnn

        self._rnn_size = args.rnn_size
        self._seq_length = args.seq_length
        self._n_layers = args.n_layers
        self._keep_prob = args.keep_prob
        self._lr = args.learning_rate

        # optimizer global step
        self._global_step = tf.Variable(0, trainable=False)
        # sequence length argument
        self._sequence_length = tf.convert_to_tensor(
            [self._seq_length for _ in range(self._batch_size)])

        # past trajectory
        self._input_data = tf.placeholder(
            tf.float32,
            shape=[self._batch_size, self._seq_length, self._feature_size],
            name='input_data')

        # context path to feed into CNN
        self._context_input = tf.placeholder(
            tf.float32,
            shape=[
                self._batch_size * self._seq_length, self._in_width,
                self._in_height, self._in_channel
            ],
            name='context_data')

        # ground true trajectory
        self._target_data = tf.placeholder(tf.float32,
                                           shape=[
                                               self._batch_size,
                                               self._seq_length,
                                               self._discrete_steps,
                                               self._feature_size
                                           ],
                                           name='target_data')

        # transformed delta ground truth
        self._target_data_delta = tf.placeholder(tf.float32,
                                                 shape=[
                                                     self._batch_size,
                                                     self._seq_length,
                                                     self._discrete_steps,
                                                     self._feature_size
                                                 ],
                                                 name='target_data_delta')

        def conv2d(input, num_output, kernel_size, stride):
            # define a cnn unit: x--> conv -> relu -> max_pooling
            # take 4D input tensor
            x = layers.conv2d(inputs=input,
                              num_outputs=num_output,
                              kernel_size=kernel_size,
                              stride=stride,
                              activation_fn=tf.nn.relu)
            x = tf.contrib.layers.max_pool2d(x, kernel_size=[2, 2], stride=1)

            return x

        # ---- CNN ----
        # Convolution to map context data to vector
        # (N * seq_length, patch_size, patch_size, 1) -> 4 conv -> 3fc
        # output = (N * seq_length, target_size_cnn)
        with tf.variable_scope("conv_net"):
            # input, num_channel, kernel_size, stride
            x = conv2d(self._context_input, 4, 11, 7)
            x = conv2d(x, 8, 7, 5)
            x = conv2d(x, 16, 5, 3)
            x = conv2d(x, 8, 3, 1)

            x = tf.reshape(x, [self._batch_size * self._seq_length, -1])
            x = layers.fully_connected(x, 128)
            x = layers.fully_connected(x, 64)
            x = layers.fully_connected(x, self._target_size_cnn)

            self._initial_output = x

        # Visualize cnn outputs distribution in Tensorboard
        # with tf.name_scope('initial_output'):
        #     mean = tf.reduce_mean(self._initial_output)
        #     tf.summary.scalar('mean', mean)
        #
        #     stddev = tf.sqrt(tf.reduce_mean(tf.square(self._initial_output - mean)))
        #     tf.summary.scalar('stddev', stddev)
        #     tf.summary.histogram('histogram', self._initial_output)

        # ---- RNN ----
        # build inputs for RNN
        with tf.variable_scope("build_complete_input"):

            # tansform input_data to delta
            delta_x = VRU2Model.input_data_delta_tf(self._input_data)
            self._tf_input_data = delta_x

            # reshape cnn outputs back to (N, seq_length, target_size_cnn)
            x = tf.reshape(x,
                           shape=[
                               self._batch_size, self._seq_length,
                               self._target_size_cnn
                           ])

            # concat cnn outputs with delta input, shape (batch_size, seq_length, feature_size + target_size_cnn)
            complete_input = VRU2Model.concat_tensor(delta_x, x)

            self._complete_input = complete_input

        def get_a_cell():
            # initialize a recurrent unit
            single_cell = rnn.GRUCell(num_units=self._rnn_size)

            # wrap a dropout layer if applicable
            if self._mode == 'train' and self._keep_prob < 1.0:
                single_cell = rnn.DropoutWrapper(
                    cell=single_cell, output_keep_prob=self._keep_prob)

            return single_cell

        with tf.variable_scope("rnn"):

            cell = rnn.MultiRNNCell(
                [get_a_cell() for _ in range(self._n_layers)])

            # initial cell state
            _initial_state = cell.zero_state(batch_size=self._batch_size,
                                             dtype=tf.float32)

            # dynamic rnn, output shape (batch_size, seq_length, rnn_size)
            rnn_output, self._final_state = tf.nn.dynamic_rnn(
                cell=cell,
                inputs=complete_input,
                sequence_length=self._sequence_length,
                initial_state=_initial_state,
                dtype=None,
                parallel_iterations=None,
                swap_memory=False,
                time_major=False,
                scope=None)

        with tf.variable_scope('fc_'):

            # reshape to = (batch_size * seq_length, rnn_size)
            rnn_output = tf.reshape(rnn_output, shape=[-1, self._rnn_size])

            # shape = (batch_size * seq_length, target_size)
            outputs = layers.fully_connected(rnn_output, self._target_size)

            # reshape to = (batch_size, seq_length, target_size)
            outputs = tf.reshape(
                outputs,
                shape=[self._batch_size, self._seq_length, self._target_size])
            self._outputs = outputs

        with tf.variable_scope('loss'):

            # transform target data to delta, shape=(batch_size, seq_length, discrete_steps, feature_size)
            self._target_data_delta = VRU2Model.target_data_delta_tf(
                self._input_data, self._target_data)

            # shape = (batch_size, seq_length, discrete_steps, feature_size)
            outputs = tf.reshape(outputs,
                                 shape=[
                                     -1, self._seq_length,
                                     self._discrete_steps, self._feature_size
                                 ])
            self._predicted_outputs = outputs

            # calculate (x-x')^2, (y-y')^2
            sqaure_loss = tf.square(
                tf.subtract(outputs, self._target_data_delta))
            # calculate sqrt ((x-x')^2 + (y-y')^2)
            loss = tf.sqrt(tf.reduce_sum(
                sqaure_loss, 2))  # shape = (batch_size, discrete_steps)
            loss = tf.reduce_mean(loss)

            self._loss = loss

            # add summary operations
            tf.summary.scalar('loss', self._loss)
            self._summary_op = tf.summary.merge_all()

        # transform predicted delta to absolute
        with tf.variable_scope('predicted_final'):
            self._tf_predicted_outputs = VRU2Model.restore_delta_to_absolute(
                self._input_data, outputs)

        if self._mode == 'infer':
            return

        # used in training phase to update parameters
        with tf.variable_scope('optimizer'):
            self._train_op = tf.train.AdamOptimizer(
                learning_rate=self._lr).minimize(loss,
                                                 global_step=self._global_step)
Example #55
0
def discriminator_lstm(self, inputs, lengths, reuse=False):
    """Build DNN model. On first pass will make vars."""
    lstm_cell_size = 256
    num_projection = 40
    lstm_num_layer = 2

    # If test of cv , BN should use global mean / stddev
    is_training = False if self.cross_validation else True

    with tf.variable_scope('d_model') as scope:
        if reuse:
            scope.reuse_variables()

        if self.batch_norm:
            normalizer_fn = batch_norm
            normalizer_params = {
                "is_training": is_training,
                "scale": True,
                "renorm": True
            }
        else:
            normalizer_fn = None
            normalizer_params = None

        if self.l2_scale > 0.0 and is_training:
            weights_regularizer = l2_regularizer(self.l2_scale)
        else:
            weights_regularizer = None
            keep_prob = 1.0

        sys.stdout.flush()
        # Apply input noisy layer
        if not reuse:
            print("*** Discriminator summary ***")
            print("D inputs shape: {}".format(inputs.get_shape()))

        inputs = gaussian_noise_layer(inputs, self.disc_noise_std)

        # h = fully_connected(inputs, num_projection,
        #                     activation_fn=leakyrelu,
        #                     normalizer_fn=normalizer_fn,
        #                     normalizer_params=normalizer_params,
        #                     weights_initializer=xavier_initializer(),
        #                     weights_regularizer=weights_regularizer,
        #                     biases_initializer=tf.zeros_initializer())

        def lstm_cell():
            return tf.contrib.rnn.LSTMCell(lstm_cell_size,
                                           use_peepholes=True,
                                           initializer=xavier_initializer(),
                                           num_proj=num_projection,
                                           forget_bias=1.0,
                                           state_is_tuple=True,
                                           activation=tf.tanh,
                                           reuse=reuse)

        attn_cell = lstm_cell
        if is_training and self.keep_prob < 1.0:

            def attn_cell():
                return tf.contrib.rnn.DropoutWrapper(
                    lstm_cell(), output_keep_prob=self.keep_prob)

        cell = tf.contrib.rnn.MultiRNNCell(
            [attn_cell() for _ in range(lstm_num_layer)], state_is_tuple=True)

        initial_states = cell.zero_state(self.batch_size, tf.float32)
        outputs, states = tf.nn.dynamic_rnn(cell,
                                            inputs,
                                            sequence_length=lengths,
                                            initial_state=initial_states,
                                            dtype=tf.float32,
                                            time_major=False)

        if not reuse:
            print("D hidden layer number is {}".format(lstm_num_layer))
            print("D cell size is {}".format(lstm_cell_size))
            print("D projection num is {}".format(num_projection))
        sys.stdout.flush()

        # Output layer
        y = fully_connected(outputs,
                            1,
                            activation_fn=None,
                            weights_initializer=xavier_initializer(),
                            weights_regularizer=weights_regularizer,
                            biases_initializer=tf.zeros_initializer())
        # y = tf.clip_by_value(y, -0.5, 1.5)
        if not reuse:
            print("d output shape: {}".format(y.get_shape()))
            print("****************************************")
            sys.stdout.flush()
    return y
Example #56
0
    def infer(self, reuse):
        cnn = self.cnn
        activation_fn = tf.nn.relu
        is_training = True

        input_dim = cnn.input_dim
        left_context = cnn.left_context
        right_context = cnn.right_context
        splice_dim = left_context + 1 + right_context

        in_dims = self.inputs.get_shape().as_list()
        if len(in_dims) == 2:
            # shape format [batch, width]
            dims = self.inputs.get_shape().as_list()
            assert dims[0] == cnn.batch_size
            inputs = tf.reshape(self.inputs, [dims[0], splice_dim, input_dim])
            inputs = tf.expand_dims(inputs, -1)
        elif len(in_dims) == 3:
            # shape format [batch, length, width]
            dims = self.inputs.get_shape().as_list()
            assert dims[0] == 1
            inputs = tf.squeeze(self.inputs, [0])
            inputs = tf.reshape(inputs, [-1, splice_dim, input_dim])
            inputs = tf.expand_dims(inputs, -1)

        # If test of cv , BN should use global mean / stddev
        if cnn.cross_validation:
            is_training = False

        with tf.variable_scope('g_model') as scope:
            if reuse:
                scope.reuse_variables()

            if cnn.batch_norm:
                normalizer_fn = batch_norm
                normalizer_params = {
                    "is_training": is_training,
                    "scale": True,
                    "renorm": True
                }
            else:
                normalizer_fn = None
                normalizer_params = None

            if cnn.l2_scale > 0.0 and is_training:
                weights_regularizer = l2_regularizer(cnn.l2_scale)
            else:
                weights_regularizer = None
                keep_prob = 1.0

            if not reuse:
                print("*** Generator summary ***")
                print("G inputs shape: {}".format(inputs.get_shape()))

            # conv1
            # inputs format [batch, in_height, in_width, in_channels]
            # filters format [filter_height, filter_width, in_channels, out_channels]
            filter_num = [32, 64]
            filter_width = [11, 11]
            assert len(filters_num) == len(filters_num)
            for i in range(len(filters_num)):
                inputs = tf.contrib.layers.conv2d(
                    inputs,
                    filters_num[i], [splice_dim, filters_width[i]],
                    activation_fn=activation_fn,
                    normalizer_fn=normalizer_fn,
                    normalizer_params=normalizer_params,
                    weights_initializer=xavier_initializer(),
                    weights_regularizer=weights_regularizer,
                    biases_initializer=tf.zeros_initializer())
                if not reuse:
                    print("Conv{} layer output shape: {}".format(
                        i + 1, inputs.get_shape()),
                          end=" *** ")
                    self.nnet_info(normalizer_fn, rced.keep_prob,
                                   weights_regularizer)

            # kernel = tf.get_variable('weights_1', [11, 11, 1, 32],
            #     initializer=tf.truncated_normal_initializer(stddev=0.05),
            #     regularizer=weights_regularizer)
            # conv = tf.nn.conv2d(inputs, kernel, [1, 1, 1, 1], padding='SAME')
            # biases = tf.get_variable('biases_1', [32],
            #     initializer=tf.constant_initializer(0.1))
            # pre_activation = tf.nn.bias_add(conv, biases)
            # if cnn.batch_norm:
            #     pre_activation = batch_norm(pre_activation, scale=True,
            #                                 is_training=is_training,
            #                                 renorm=True)
            # h = tf.nn.relu(pre_activation)
            # # pool1
            # # pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1],
            # #                        strides=[1, 2, 2, 1],
            # #                        padding='SAME')
            # if not reuse:
            #     print("Conv1 layer output shape: {}".format(h.get_shape()),
            #           end=" *** ")
            #     self.nnet_info(normalizer_fn, cnn.keep_prob, weights_regularizer)

            # # conv2
            # kernel = tf.get_variable('weights_2', [11, 11, 32, 64],
            #     initializer=tf.truncated_normal_initializer(stddev=0.05),
            #     regularizer=weights_regularizer)
            # conv = tf.nn.conv2d(h, kernel, [1, 1, 1, 1], padding='SAME')
            # biases = tf.get_variable('biases_2', [64],
            #     initializer=tf.constant_initializer(0.1))
            # pre_activation = tf.nn.bias_add(conv, biases)
            # if cnn.batch_norm:
            #     pre_activation = batch_norm(pre_activation, scale=True,
            #                                 is_training=is_training,
            #                                 renorm=True)
            # h = tf.nn.relu(pre_activation)
            # # pool2
            # # pool2 = tf.nn.max_pool(conv2, ksize=[1, 3, 3, 1],
            # #                        strides=[1, 2, 2, 1],
            # #                        padding='SAME')
            # if not reuse:
            #     print("Conv2 layer output shape: {}".format(h.get_shape()),
            #           end=" *** ")
            #     self.nnet_info(normalizer_fn, cnn.keep_prob, weights_regularizer)

            # local3
            # Move everything into depth so we can perform a single matrix multiply.
            # reshape = tf.reshape(h, [cnn.batch_size, -1])
            reshape = tf.reshape(
                inputs, [-1, splice_dim * input_dim * filters_num[-1]])
            h = fully_connected(reshape,
                                512,
                                activation_fn=activation_fn,
                                normalizer_fn=normalizer_fn,
                                normalizer_params=normalizer_params,
                                weights_initializer=xavier_initializer(),
                                weights_regularizer=weights_regularizer,
                                biases_initializer=tf.zeros_initializer())
            if not reuse:
                print("Local3 layer output shape: {}".format(h.get_shape()),
                      end=" *** ")
                self.nnet_info(normalizer_fn, cnn.keep_prob,
                               weights_regularizer)

            # local4
            h = fully_connected(
                h,
                512,
                activation_fn=activation_fn,
                normalizer_fn=normalizer_fn,
                normalizer_params=normalizer_params,
                weights_initializer=xavier_initializer(),
                weights_regularizer=weights_regularizer,
                biases_initializer=tf.constant_initializer(0.1))
            if not reuse:
                print("Local4 layer output shape: {}".format(h.get_shape()),
                      end=" *** ")
                self.nnet_info(normalizer_fn, cnn.keep_prob,
                               weights_regularizer)

            # Linear output
            y = fully_connected(
                h,
                cnn.output_dim,
                activation_fn=None,
                weights_initializer=xavier_initializer(),
                weights_regularizer=weights_regularizer,
                biases_initializer=tf.constant_initializer(0.1))
            if not reuse:
                print("G output shape: {}".format(y.get_shape()))
                sys.stdout.flush()

        return y
def get_symble(input_image, **kargs):
    w_decay = kargs.get('w_decay', 1e-5)
    net_name = kargs.get('net_name', 'mobilenet')
    w_r = tfc.l2_regularizer(w_decay)
    width_mult = kargs.get('width_mult', 1.0)
    train_fg = kargs.get('train_fg', True)
    class_num = kargs.get('class_num', 81)
    assert net_name.lower() in [
        'mobilenet', 'mobilenetv2'
    ], "Please sel netname: mobilenet or mobilenetv2"
    cn = [
        int(x * width_mult) for x in [32, 16, 24, 32, 64, 96, 160, 320, 1280]
    ]
    #cn = [int(x*width_mult) for x in [32,16,24,32,64,96,160,320,256]]
    with tf.variable_scope(net_name):
        b0 = Conv_block(input_image,3,filter_num=cn[0],conv_stride=2,relu_type='relu6', \
                        name='cb1',w_regular=w_r,**kargs)
        b1 = Inverted_residual_seq(b0, 1, cn[0], cn[1], 1, 1, **kargs)  #1
        b2 = Inverted_residual_seq(b1,
                                   6,
                                   cn[1],
                                   cn[2],
                                   2,
                                   1,
                                   seq_name='res2',
                                   w_regular=w_r,
                                   **kargs)  #2
        b3 = Inverted_residual_seq(b2,
                                   6,
                                   cn[2],
                                   cn[3],
                                   2,
                                   1,
                                   seq_name='res3',
                                   w_regular=w_r,
                                   **kargs)  #3
        b4 = Inverted_residual_seq(b3,
                                   6,
                                   cn[3],
                                   cn[4],
                                   2,
                                   1,
                                   seq_name='res4',
                                   w_regular=w_r,
                                   **kargs)  #4
        b5 = Inverted_residual_seq(b4,
                                   6,
                                   cn[4],
                                   cn[5],
                                   2,
                                   1,
                                   seq_name='res5',
                                   w_regular=w_r,
                                   **kargs)  #3
        b6 = Inverted_residual_seq(b5,
                                   6,
                                   cn[5],
                                   cn[6],
                                   2,
                                   1,
                                   seq_name='res6',
                                   w_regular=w_r,
                                   **kargs)  #3
        b7 = Inverted_residual_seq(b6,
                                   6,
                                   cn[6],
                                   cn[7],
                                   1,
                                   1,
                                   seq_name='res7',
                                   w_regular=w_r,
                                   **kargs)  #1
        b8 = Conv_block(b7,1,filter_num=cn[8],conv_stride=1,relu_type='relu6', \
                        name='cb2',**kargs)
        pool = GlobalAveragePooling2D(b8, name='pool')
        #fc = tfc.fully_connected(pool,class_num,activation_fn=tf.nn.relu6,trainable=train_fg,\
        #                           weights_regularizer=w_r,scope='fc')
        fc = tfc.fully_connected(pool,class_num,activation_fn=tf.sigmoid,trainable=train_fg,\
                                    weights_regularizer=w_r,scope='fc')
        #dp = tfc.dropout(fc,keep_prob=0.5,is_training=train_fg,scope='drop_out')
        return fc
    def __init__(self, max_seq_len, max_sent_len, num_classes, vocab_size,
                 embedding_size, max_grad_norm, dropout_keep_proba,
                 learning_rate):
        ## Parameters
        self.learning_rate = learning_rate
        self.vocab_size = vocab_size
        self.num_classes = num_classes
        self.max_seq_len = max_seq_len
        self.embedding_size = embedding_size
        self.word_encoder_num_hidden = max_seq_len
        self.word_output_size = max_seq_len
        self.sentence_encoder_num_hidden = max_sent_len
        self.sentence_output_size = max_sent_len
        self.max_grad_norm = max_grad_norm
        self.dropout_keep_proba = dropout_keep_proba

        # tf graph input
        self.input_x = tf.placeholder(shape=[None, None, None],
                                      dtype=tf.int32,
                                      name="input_x")
        self.input_y = tf.placeholder(shape=[None, self.num_classes],
                                      dtype=tf.int32,
                                      name="input_y")
        self.word_lengths = tf.placeholder(shape=[None, None],
                                           dtype=tf.int32,
                                           name="word_lengths")
        self.sentence_lengths = tf.placeholder(shape=[
            None,
        ],
                                               dtype=tf.int32,
                                               name="sentence_lengths")
        self.train = tf.placeholder(dtype=tf.bool, name="train")

        # input_x dims
        (self.document_size, self.sentence_size,
         self.word_size) = tf.unstack(tf.shape(self.input_x))

        with tf.device("/gpu:0"), tf.name_scope("embedding_layer"):
            w = tf.Variable(tf.random_uniform(
                [self.vocab_size, self.embedding_size], -1.0, 1.0),
                            dtype=tf.float32,
                            name="w")
            self.input_x_embedded = tf.nn.embedding_lookup(w, self.input_x)

        # reshape input_x after embedding
        self.input_x_embedded = tf.reshape(self.input_x_embedded, [
            self.document_size * self.sentence_size, self.word_size,
            self.embedding_size
        ])
        self.input_x_embedded_lengths = tf.reshape(
            self.word_lengths, [self.document_size * self.sentence_size])

        with tf.variable_scope("word_level"):
            self.word_encoder_outputs = self.bidirectional_RNN(
                num_hidden=self.word_encoder_num_hidden,
                inputs=self.input_x_embedded)
            word_level_output = self.attention(
                inputs=self.word_encoder_outputs,
                output_size=self.word_output_size)

            with tf.variable_scope("dropout"):
                word_level_output = layers.dropout(
                    word_level_output,
                    keep_prob=self.dropout_keep_proba,
                    train=self.train)

        # reshape word_level output
        self.sentence_encoder_inputs = tf.reshape(
            word_level_output,
            [self.document_size, self.sentence_size, self.word_output_size])

        with tf.variable_scope("sentence_level"):
            self.sentence_encoder_outputs = self.bidirectional_RNN(
                num_hidden=self.sentence_encoder_num_hidden,
                inputs=self.sentence_encoder_inputs)
            sentence_level_output = self.attention(
                inputs=self.sentence_encoder_outputs,
                output_size=self.sentence_output_size)
            with tf.variable_scope("dropout"):
                sentence_level_output = layers.dropout(
                    sentence_level_output,
                    keep_prob=self.dropout_keep_proba,
                    train=self.train)

        # Final model prediction
        with tf.variable_scope("classifier_output"):
            self.logits = layers.fully_connected(sentence_level_output,
                                                 self.num_classes,
                                                 activation_fn=None)
            self.predictions = tf.argmax(self.logits,
                                         axis=1,
                                         name="predictions")

        # Calculate mean cross-entropy loss
        with tf.variable_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                labels=self.input_y, logits=self.logits)
            self.loss = tf.reduce_mean(losses)

        # Accuracy
        with tf.variable_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, axis=1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
    def apply(self, is_train, context_embed, answer, context_mask=None):
        init_fn = get_keras_initialization(self.init)
        m1, m2 = self.predictor.apply(is_train, context_embed, context_mask)
        self.m1 = m1
        self.m2 = m2

        if m1.shape.as_list()[-1] != 1:
            with tf.variable_scope("start_pred"):
                start_logits = fully_connected(m1,
                                               1,
                                               activation_fn=None,
                                               weights_initializer=init_fn)
        else:
            start_logits = m1
        start_logits = tf.squeeze(start_logits, squeeze_dims=[2])

        if m1.shape.as_list()[-1] != 1:
            with tf.variable_scope("end_pred"):
                end_logits = fully_connected(m2,
                                             1,
                                             activation_fn=None,
                                             weights_initializer=init_fn)
        else:
            end_logits = m2
        end_logits = tf.squeeze(end_logits, squeeze_dims=[2])

        masked_start_logits = exp_mask(start_logits, context_mask)
        masked_end_logits = exp_mask(end_logits, context_mask)

        start_atten = tf.einsum("ajk,aj->ak", m1,
                                tf.nn.softmax(masked_start_logits))
        end_atten = tf.einsum("ajk,aj->ak", m2,
                              tf.nn.softmax(masked_end_logits))
        with tf.variable_scope("encode_context"):
            enc = self.encoder.apply(is_train, context_embed, context_mask)
        if len(enc.shape) == 3:
            _, encodings, fe = enc.shape.as_list()
            enc = tf.reshape(enc, (-1, encodings * fe))

        with tf.variable_scope("confidence"):
            conf = [start_atten, end_atten, enc]
            none_logit = self.confidence_predictor.apply(
                is_train, tf.concat(conf, axis=1))
        with tf.variable_scope("confidence_logits"):
            none_logit = fully_connected(none_logit,
                                         1,
                                         activation_fn=None,
                                         weights_initializer=init_fn)
            none_logit = tf.squeeze(none_logit, axis=1)

        batch_dim = tf.shape(start_logits)[0]

        # (batch, (l * l)) logits for each (start, end) pair
        all_logits = tf.reshape(
            tf.expand_dims(masked_start_logits, 1) +
            tf.expand_dims(masked_end_logits, 2), (batch_dim, -1))

        # (batch, (l * l) + 1) logits including the none option
        all_logits = tf.concat(
            [all_logits, tf.expand_dims(none_logit, 1)], axis=1)
        log_norms = tf.reduce_logsumexp(all_logits, axis=1)

        # Now build a "correctness" mask in the same format
        correct_mask = tf.logical_and(tf.expand_dims(answer[0], 1),
                                      tf.expand_dims(answer[1], 2))
        correct_mask = tf.reshape(correct_mask, (batch_dim, -1))
        correct_mask = tf.concat([
            correct_mask,
            tf.logical_not(tf.reduce_any(answer[0], axis=1, keep_dims=True))
        ],
                                 axis=1)

        # Note we are happily allowing the model to place weights on "backwards" spans, and also giving
        # it points for predicting spans that start and end at different answer spans. It would be easy to
        # fix by masking out some of the `all_logit` matrix and specify a more accuracy correct_mask, but I
        # in general left it this way to be consistent with the independent bound models that do the same.
        # Some early tests found properly masking things to not make much difference (or even to hurt), but it
        # still could be an avenue for improvement

        log_correct = tf.reduce_logsumexp(
            all_logits + VERY_NEGATIVE_NUMBER *
            (1 - tf.cast(correct_mask, tf.float32)),
            axis=1)
        per_sample_loss = -(log_correct - log_norms)
        tf.add_to_collection("PER_SAMPLE_LOSSES", per_sample_loss)
        loss = tf.reduce_mean(-(log_correct - log_norms))
        probs = tf.nn.softmax(all_logits)
        tf.add_to_collection(tf.GraphKeys.LOSSES, loss)
        return ConfidencePrediction(probs[:, :-1], masked_start_logits,
                                    masked_end_logits, probs[:, -1],
                                    none_logit, context_mask)
Example #60
0
def _dnn_model_fn(features, labels, mode, params, config=None):
  """Deep Neural Net model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `_Head` instance.
      * hidden_units: List of hidden units per layer.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use the Adagrad
          optimizer with a default learning rate of 0.05.
      * activation_fn: Activation function applied to each layer. If `None`,
          will use `tf.nn.relu`. Note that a string containing the unqualified
          name of the op may also be provided, e.g., "relu", "tanh", or
          "sigmoid".
      * dropout: When not `None`, the probability we will drop out a given
          coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * embedding_lr_multipliers: Optional. A dictionary from
          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
          multiply with learning rate for the embedding variables.
      * input_layer_min_slice_size: Optional. The min slice size of input layer
          partitions. If not provided, will use the default of 64M.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
  head = params["head"]
  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  optimizer = params.get("optimizer") or "Adagrad"
  activation_fn = _get_activation_fn(params.get("activation_fn"))
  dropout = params.get("dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  input_layer_min_slice_size = (
      params.get("input_layer_min_slice_size") or 64 << 20)
  num_ps_replicas = config.num_ps_replicas if config else 0
  embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})

  features = _get_feature_dict(features)
  parent_scope = "dnn"

  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas)
  with variable_scope.variable_scope(
      parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=partitioner):
    input_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas,
            min_slice_size=input_layer_min_slice_size))
    with variable_scope.variable_scope(
        "input_from_feature_columns",
        values=tuple(six.itervalues(features)),
        partitioner=input_layer_partitioner) as input_layer_scope:
      if all(
          isinstance(fc, feature_column._FeatureColumn)  # pylint: disable=protected-access
          for fc in feature_columns
      ):
        net = layers.input_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            weight_collections=[parent_scope],
            scope=input_layer_scope)
      else:
        net = fc_core.input_layer(
            features=features,
            feature_columns=feature_columns,
            weight_collections=[parent_scope])

    for layer_id, num_hidden_units in enumerate(hidden_units):
      with variable_scope.variable_scope(
          "hiddenlayer_%d" % layer_id,
          values=(net,)) as hidden_layer_scope:
        net = layers.fully_connected(
            net,
            num_hidden_units,
            activation_fn=activation_fn,
            variables_collections=[parent_scope],
            scope=hidden_layer_scope)
        if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
          net = layers.dropout(net, keep_prob=(1.0 - dropout))
      _add_hidden_layer_summary(net, hidden_layer_scope.name)

    with variable_scope.variable_scope(
        "logits",
        values=(net,)) as logits_scope:
      logits = layers.fully_connected(
          net,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[parent_scope],
          scope=logits_scope)
    _add_hidden_layer_summary(logits, logits_scope.name)

    def _train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizers.optimize_loss(
          loss=loss,
          global_step=training_util.get_global_step(),
          learning_rate=_LEARNING_RATE,
          optimizer=_get_optimizer(optimizer),
          gradient_multipliers=(
              dnn_linear_combined._extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                  embedding_lr_multipliers, parent_scope,
                  input_layer_scope.name)),
          clip_gradients=gradient_clip_norm,
          name=parent_scope,
          # Empty summaries to prevent optimizers from logging training_loss.
          summaries=[])

    return head.create_model_fn_ops(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_train_op_fn,
        logits=logits)