def make_encoder(activation, num_topics, layer_sizes):
  """Create the encoder function.

  Args:
    activation: Activation function to use.
    num_topics: The number of topics.
    layer_sizes: The number of hidden units per layer in the encoder.

  Returns:
    encoder: A `callable` mapping a bag-of-words `Tensor` to a
      `tf.distributions.Distribution` instance over topics.
  """
  encoder_net = tf.keras.Sequential()
  for num_hidden_units in layer_sizes:
    encoder_net.add(tf.keras.layers.Dense(
        num_hidden_units, activation=activation,
        kernel_initializer=tf.glorot_normal_initializer()))
  encoder_net.add(tf.keras.layers.Dense(
      num_topics, activation=tf.nn.softplus,
      kernel_initializer=tf.glorot_normal_initializer()))

  def encoder(bag_of_words):
    net = _clip_dirichlet_parameters(encoder_net(bag_of_words))
    return tfd.Dirichlet(concentration=net,
                         name="topics_posterior")

  return encoder
def make_lda_variational(activation, num_topics, layer_sizes):
  """Creates the variational distribution for LDA.

  Args:
    activation: Activation function to use.
    num_topics: The number of topics.
    layer_sizes: The number of hidden units per layer in the encoder.

  Returns:
    lda_variational: A function that takes a bag-of-words Tensor as
      input and returns a distribution over topics.
  """
  encoder_net = tf.keras.Sequential()
  for num_hidden_units in layer_sizes:
    encoder_net.add(tf.keras.layers.Dense(
        num_hidden_units, activation=activation,
        kernel_initializer=tf.glorot_normal_initializer()))
  encoder_net.add(tf.keras.layers.Dense(
      num_topics, activation=tf.nn.softplus,
      kernel_initializer=tf.glorot_normal_initializer()))

  def lda_variational(bag_of_words):
    concentration = _clip_dirichlet_parameters(encoder_net(bag_of_words))
    return ed.Dirichlet(concentration=concentration, name="topics_posterior")

  return lda_variational
def make_decoder(num_topics, num_words):
  """Create the decoder function.

  Args:
    num_topics: The number of topics.
    num_words: The number of words.

  Returns:
    decoder: A `callable` mapping a `Tensor` of encodings to a
      `tf.distributions.Distribution` instance over words.
  """
  topics_words_logits = tf.get_variable(
      "topics_words_logits", shape=[num_topics, num_words],
      initializer=tf.glorot_normal_initializer())
  topics_words = tf.nn.softmax(topics_words_logits, axis=-1)

  def decoder(topics):
    word_probs = tf.matmul(topics, topics_words)
    # The observations are bag of words and therefore not one-hot. However,
    # log_prob of OneHotCategorical computes the probability correctly in
    # this case.
    return tfd.OneHotCategorical(probs=word_probs,
                                 name="bag_of_words")

  return decoder, topics_words
Example #4
0
def model_fn(features, labels, mode, params):
    """Build Model function f(x) for Estimator."""

    #------hyper parameters------
    field_size = params['field_size']
    feature_size = params['feature_size']
    embedding_size = params['embedding_size']
    l2_reg = params['l2_reg']
    learning_rate = params['learning_rate']

    dropout = params['dropout']
    attention_factor = params['attention_factor']

    #------build weights------
    Global_Bias = tf.get_variable("bias",
                                  shape=[1],
                                  initializer=tf.constant_initializer(0.0))
    Feat_Wgts = tf.get_variable("linear",
                                shape=[feature_size],
                                initializer=tf.glorot_normal_initializer())
    Feat_Emb = tf.get_variable("emb",
                               shape=[feature_size, embedding_size],
                               initializer=tf.glorot_normal_initializer())

    #------build feature------
    feat_ids = features['feat_ids']
    feat_vals = features['feat_vals']
    feat_ids = tf.reshape(feat_ids, shape=[-1, field_size])
    feat_vals = tf.reshape(feat_vals, shape=[-1, field_size])  # None * F

    #------build f(x)------

    # FM部分: sum(wx)
    with tf.variable_scope("Linear-part"):
        feat_wgts = tf.nn.embedding_lookup(Feat_Wgts, feat_ids)  # None * F * 1
        y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals), 1)

    #Deep部分
    with tf.variable_scope("Embedding_Layer"):
        embeddings = tf.nn.embedding_lookup(Feat_Emb, feat_ids)  # None * F * K
        feat_vals = tf.reshape(feat_vals, shape=[-1, field_size,
                                                 1])  # None * F * 1
        embeddings = tf.multiply(embeddings, feat_vals)  # None * F * K

    with tf.variable_scope("Pair-wise_Interaction_Layer"):
        num_interactions = field_size * (field_size - 1) / 2
        element_wise_product_list = []
        for i in range(0, field_size):
            for j in range(i + 1, field_size):
                element_wise_product_list.append(
                    tf.multiply(embeddings[:, i, :], embeddings[:, j, :]))
        element_wise_product_list = tf.stack(
            element_wise_product_list)  # (F*(F-1)/2) * None * K stack拼接矩阵
        element_wise_product_list = tf.transpose(
            element_wise_product_list, perm=[1, 0, 2])  # None * (F(F-1)/2) * K

    # 得到Attention Score
    with tf.variable_scope("Attention_Netowrk"):

        deep_inputs = tf.reshape(element_wise_product_list,
                                 shape=[-1,
                                        embedding_size])  # (None*F(F-1)/2) * K

        deep_inputs = contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=attention_factor, activation_fn=tf.nn.relu, \
                                             weights_regularizer=contrib.layers.l2_regularizer(l2_reg), scope="attention_net_mlp")

        aij = contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \
                                             weights_regularizer=contrib.layers.l2_regularizer(l2_reg), scope="attention_net_out") # (None*F(F-1)/2) * 1

        # 得到attention score之后,使用softmax进行规范化
        aij = tf.reshape(aij, shape=[-1, int(num_interactions), 1])
        aij_softmax = tf.nn.softmax(
            aij, dim=1,
            name="attention_net_softout")  # None * num_interactions

        # TODO: 为什么要对attention score进行dropout那?? 这里不是很懂
        if mode == tf.estimator.ModeKeys.TRAIN:
            aij_softmax = tf.nn.dropout(aij_softmax, keep_prob=dropout[0])

    with tf.variable_scope("Attention-based_Pooling_Layer"):
        deep_inputs = tf.multiply(element_wise_product_list,
                                  aij_softmax)  # None * (F(F-1)/2) * K
        deep_inputs = tf.reduce_sum(deep_inputs, axis=1)  # None * K Pooling操作

        # Attention-based Pooling Layer的输出也要经过Dropout
        if mode == tf.estimator.ModeKeys.TRAIN:
            deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[1])

        # 该层的输出是一个K维度的向量

    with tf.variable_scope("Prediction_Layer"):
        # 直接跟上输出单元
        deep_inputs = contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \
                                             weights_regularizer=contrib.layers.l2_regularizer(l2_reg), scope="afm_out") # None * 1
        y_deep = tf.reshape(deep_inputs, shape=[-1])  # None

    with tf.variable_scope("AFM_overall"):
        y_bias = Global_Bias * tf.ones_like(y_deep, dtype=tf.float32)
        y = y_bias + y_linear + y_deep
        pred = tf.nn.sigmoid(y)

    # set predictions
    predictions = {"prob": pred}
    export_outputs = {
        tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
        tf.estimator.export.PredictOutput(predictions)
    }
    # Provide an estimator spec for `ModeKeys.PREDICT`
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          export_outputs=export_outputs)

    #------build loss------
    loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)
    ) + l2_reg * tf.nn.l2_loss(Feat_Wgts) + l2_reg * tf.nn.l2_loss(Feat_Emb)
    log_loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels))

    # Provide an estimator spec for `ModeKeys.EVAL`
    eval_metric_ops = {
        # "logloss": tf.losses.log_loss(pred, labels, weights=1.0, scope=None, epsilon=1e-07,loss_collection=tf.GraphKeys.LOSSES, reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS),
        "auc": tf.metrics.auc(labels, pred),
    }

    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)

    #------build optimizer------
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                       beta1=0.9,
                                       beta2=0.999,
                                       epsilon=1e-8)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

    # Provide an estimator spec for `ModeKeys.TRAIN`
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            loss=log_loss,  # 只打印pure log_loss,但是训练依旧按照整个的loss来训练
            train_op=train_op)
Example #5
0
    def __init__(self, config, vocab_size):
        # Placeholders for data, output and dropout
        self.config = config
        self.input_x1 = tf.placeholder(tf.int32,
                                       [None, self.config.max_document_length],
                                       name="input_x1")
        self.input_x2 = tf.placeholder(tf.int32,
                                       [None, self.config.max_document_length],
                                       name="input_x2")
        self.input_y = tf.placeholder(tf.float32, [None], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")
        self.attention_w = tf.Variable(tf.truncated_normal(
            [2 * self.config.hidden_units, self.config.attention_size],
            stddev=0.1),
                                       name='attention_w')
        self.attention_b = tf.Variable(tf.constant(
            0.1, shape=[self.config.attention_size]),
                                       name='attention_b')

        self.u_w = tf.Variable(tf.truncated_normal(
            [self.config.attention_size, 1]),
                               name='attention_uw')
        self.initializer = None
        if self.config.initializer == "normal":
            self.initializer = tf.random_normal_initializer(mean=0.0,
                                                            stddev=0.1)
        elif self.config.initializer == "glorot":
            self.initializer = tf.glorot_uniform_initializer()
        elif self.config.initializer == "xavier":
            self.initializer = tf.glorot_normal_initializer()
        else:
            raise ValueError("Unknown initializer")

        # Embedding layer
        with tf.name_scope("embedding"):
            self.W = tf.get_variable(
                'lookup_table',
                dtype=tf.float32,
                shape=[vocab_size, self.config.embedding_dim],
                initializer=self.initializer,
                trainable=True)
            self.embedded_chars1 = tf.nn.embedding_lookup(
                self.W, self.input_x1)
            self.embedded_chars2 = tf.nn.embedding_lookup(
                self.W, self.input_x2)

        with tf.name_scope("output"):
            # add cnn layer
            output1 = self.cnn_layer(self.embedded_chars1, "side1")
            output2 = self.cnn_layer(self.embedded_chars2, "side2")

            self.out1 = self.BiRNN(output1, self.dropout_keep_prob, "side1",
                                   self.config.max_document_length,
                                   self.config.hidden_units)
            self.out1 = self._highway_layer(self.out1,
                                            self.out1.get_shape()[1],
                                            num_layers=1,
                                            bias=0,
                                            scope="side1")
            self.out2 = self.BiRNN(output2, self.dropout_keep_prob, "side2",
                                   self.config.max_document_length,
                                   self.config.hidden_units)
            self.out2 = self._highway_layer(self.out2,
                                            self.out2.get_shape()[1],
                                            num_layers=1,
                                            bias=0,
                                            scope="side2")

            self.distance = tf.sqrt(
                tf.reduce_sum(tf.square(tf.subtract(self.out1, self.out2)),
                              1,
                              keepdims=True))
            self.distance = tf.div(
                self.distance,
                tf.add(
                    tf.sqrt(
                        tf.reduce_sum(tf.square(self.out1), 1, keepdims=True)),
                    tf.sqrt(
                        tf.reduce_sum(tf.square(self.out2), 1,
                                      keepdims=True))))
            self.distance = tf.reshape(self.distance, [-1], name="distance")
        with tf.name_scope("loss"):
            self.loss = self.contrastive_loss(self.input_y, self.distance,
                                              self.config.batch_size)
        with tf.name_scope("accuracy"):
            self.temp_sim = tf.subtract(tf.ones_like(self.distance),
                                        tf.round(self.distance),
                                        name="temp_sim")  # auto threshold 0.4
            self.correct_predictions = tf.equal(self.temp_sim, self.input_y)
            self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions,
                                                   "float"),
                                           name="accuracy")
Example #6
0
def masked_dense(inputs,
                 units,
                 num_blocks=None,
                 exclusive=False,
                 kernel_initializer=None,
                 reuse=None,
                 name=None,
                 *args,  # pylint: disable=keyword-arg-before-vararg
                 **kwargs):
  """A autoregressively masked dense layer. Analogous to `tf.layers.dense`.

  See [Germain et al. (2015)][1] for detailed explanation.

  Arguments:
    inputs: Tensor input.
    units: Python `int` scalar representing the dimensionality of the output
      space.
    num_blocks: Python `int` scalar representing the number of blocks for the
      MADE masks.
    exclusive: Python `bool` scalar representing whether to zero the diagonal of
      the mask, used for the first layer of a MADE.
    kernel_initializer: Initializer function for the weight matrix.
      If `None` (default), weights are initialized using the
      `tf.glorot_random_initializer`.
    reuse: Python `bool` scalar representing whether to reuse the weights of a
      previous layer by the same name.
    name: Python `str` used to describe ops managed by this function.
    *args: `tf.layers.dense` arguments.
    **kwargs: `tf.layers.dense` keyword arguments.

  Returns:
    Output tensor.

  Raises:
    NotImplementedError: if rightmost dimension of `inputs` is unknown prior to
      graph execution.

  #### References

  [1]: Mathieu Germain, Karol Gregor, Iain Murray, and Hugo Larochelle. MADE:
       Masked Autoencoder for Distribution Estimation. In _International
       Conference on Machine Learning_, 2015. https://arxiv.org/abs/1502.03509
  """
  # TODO(b/67594795): Better support of dynamic shape.
  input_depth = tf.dimension_value(inputs.shape.with_rank_at_least(1)[-1])
  if input_depth is None:
    raise NotImplementedError(
        "Rightmost dimension must be known prior to graph execution.")

  mask = _gen_mask(num_blocks, input_depth, units,
                   MASK_EXCLUSIVE if exclusive else MASK_INCLUSIVE).T

  if kernel_initializer is None:
    kernel_initializer = tf.glorot_normal_initializer()

  def masked_initializer(shape, dtype=None, partition_info=None):
    return mask * kernel_initializer(shape, dtype, partition_info)

  with tf.name_scope(name, "masked_dense", [inputs, units, num_blocks]):
    layer = tf.layers.Dense(
        units,
        kernel_initializer=masked_initializer,
        kernel_constraint=lambda x: mask * x,
        name=name,
        dtype=inputs.dtype.base_dtype,
        _scope=name,
        _reuse=reuse,
        *args,  # pylint: disable=keyword-arg-before-vararg
        **kwargs)
    return layer.apply(inputs)
Example #7
0
def fcn_paper(inputs_32s,
              inputs_16s,
              inputs_8s,
              img_height,
              img_width,
              is_training=True):
    #inputs: [batch,h,w,channels]. And can be (1, any_size, any_size, channels).
    #logits: [batch,h,w,classes]
    #upsampled_logits: [batch,H,W,classes]
    #annotation: [batch,H,W]
    #loss: [batch,H,W]
    #((11, 15, 512), (23, 31, 512), (46, 62, 256), (375, 500), b'2007_000645', '2007_000645')

    with tf.variable_scope('fcn/logits/32s') as scope:
        weights = tf.get_variable('weights',
                                  shape=[1, 1, 4096, num_classes],
                                  dtype=tf.float32,
                                  initializer=tf.glorot_normal_initializer())
        biases = tf.get_variable('biases',
                                 shape=[num_classes],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.0))
        conv = tf.nn.conv2d(inputs_32s,
                            weights,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
        logits_32s = tf.nn.bias_add(conv, biases)

        convt_weights = get_deconv_filter([4, 4, num_classes, num_classes])
        inputs_16s_shape = tf.shape(inputs_16s)
        logits_32s_upsampled = tf.nn.conv2d_transpose(value=logits_32s,
                                                      filter=convt_weights,
                                                      output_shape=[
                                                          inputs_16s_shape[0],
                                                          inputs_16s_shape[1],
                                                          inputs_16s_shape[2],
                                                          num_classes
                                                      ],
                                                      strides=[1, 2, 2, 1],
                                                      padding='SAME',
                                                      data_format='NHWC',
                                                      name=None)

    with tf.variable_scope('fcn/logits/16s') as scope:
        weights = tf.get_variable('weights',
                                  shape=[1, 1, 512, num_classes],
                                  dtype=tf.float32,
                                  initializer=tf.glorot_normal_initializer())
        biases = tf.get_variable('biases',
                                 shape=[num_classes],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.0))
        conv = tf.nn.conv2d(inputs_16s,
                            weights,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
        logits_16s = tf.nn.bias_add(conv, biases)

        fused_logits_16s = logits_16s + logits_32s_upsampled

        convt_weights = get_deconv_filter([4, 4, num_classes, num_classes])
        inputs_8s_shape = tf.shape(inputs_8s)
        logits_16s_upsampled = tf.nn.conv2d_transpose(value=fused_logits_16s,
                                                      filter=convt_weights,
                                                      output_shape=[
                                                          inputs_8s_shape[0],
                                                          inputs_8s_shape[1],
                                                          inputs_8s_shape[2],
                                                          num_classes
                                                      ],
                                                      strides=[1, 2, 2, 1],
                                                      padding='SAME',
                                                      data_format='NHWC',
                                                      name=None)

    with tf.variable_scope('fcn/logits/8s') as scope:
        weights = tf.get_variable('weights',
                                  shape=[1, 1, 256, num_classes],
                                  dtype=tf.float32,
                                  initializer=tf.glorot_normal_initializer())
        biases = tf.get_variable('biases',
                                 shape=[num_classes],
                                 dtype=tf.float32,
                                 initializer=tf.constant_initializer(0.0))
        conv = tf.nn.conv2d(inputs_8s,
                            weights,
                            strides=[1, 1, 1, 1],
                            padding='SAME')
        logits_8s = tf.nn.bias_add(conv, biases)

        fused_logits_8s = logits_8s + logits_16s_upsampled

        convt_weights = get_deconv_filter([16, 16, num_classes, num_classes])
        logits_8s_upsampled = tf.nn.conv2d_transpose(value=fused_logits_8s,
                                                     filter=convt_weights,
                                                     output_shape=[
                                                         inputs_8s_shape[0],
                                                         img_height, img_width,
                                                         num_classes
                                                     ],
                                                     strides=[1, 8, 8, 1],
                                                     padding='SAME',
                                                     data_format='NHWC',
                                                     name=None)

    return logits_8s_upsampled
Example #8
0
def model_fn(features, labels, mode, params):
    """Bulid Model function f(x) for Estimator."""
    #------hyperparameters----
    field_size = params["field_size"]
    feature_size = params["feature_size"]
    embedding_size = params["embedding_size"]
    l2_reg = params["l2_reg"]
    learning_rate = params["learning_rate"]
    #batch_norm_decay = params["batch_norm_decay"]
    #optimizer = params["optimizer"]
    layers = map(int, params["deep_layers"].split(','))
    dropout = map(float, params["dropout"].split(','))

    #------bulid weights------
    FM_B = tf.get_variable(name='fm_bias', shape=[1], initializer=tf.constant_initializer(0.0))
    print "FM_B", FM_B.get_shape()
    FM_W = tf.get_variable(name='fm_w', shape=[feature_size], initializer=tf.glorot_normal_initializer())
    print "FM_W", FM_W.get_shape()
    # F
    FM_V = tf.get_variable(name='fm_v', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer())
    # F * E
    print "FM_V", FM_V.get_shape()
   #------build feaure-------
    feat_ids  = features['feat_ids']
    print "feat_ids", feat_ids.get_shape()
    feat_ids = tf.reshape(feat_ids,shape=[-1,field_size]) # None * f/K * K
    print "feat_ids", feat_ids.get_shape()
    feat_vals = features['feat_vals']
    print "feat_vals", feat_vals.get_shape()
    feat_vals = tf.reshape(feat_vals,shape=[-1,field_size]) # None * f/K * K
    print "feat_vals", feat_vals.get_shape()

    #------build f(x)------
    with tf.variable_scope("First-order"):
        feat_wgts = tf.nn.embedding_lookup(FM_W, feat_ids) # None * f/K * K
        print "feat_wgts", feat_wgts.get_shape()
        y_w = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals),1)

    with tf.variable_scope("Second-order"):
        embeddings = tf.nn.embedding_lookup(FM_V, feat_ids) # None * f/K * K * E
        print "embeddings", embeddings.get_shape()
        feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1]) # None * f/K * K * 1 ?
        print "feat_vals", feat_vals.get_shape()
        embeddings = tf.multiply(embeddings, feat_vals) #vij*xi
        print "embeddings", embeddings.get_shape()
        sum_square = tf.square(tf.reduce_sum(embeddings,1)) # None * K * E
        print "sum_square", sum_square.get_shape()
        square_sum = tf.reduce_sum(tf.square(embeddings),1)
        print "square_sum", square_sum.get_shape()
        y_v = 0.5*tf.reduce_sum(tf.subtract(sum_square, square_sum),1)	# None * 1

    with tf.variable_scope("Deep-part"):
        if FLAGS.batch_norm:
            #normalizer_fn = tf.contrib.layers.batch_norm
            #normalizer_fn = tf.layers.batch_normalization
            if mode == tf.estimator.ModeKeys.TRAIN:
                train_phase = True
                #normalizer_params = {'decay': batch_norm_decay, 'center': True, 'scale': True, 'updates_collections': None, 'is_training': True, 'reuse': None}
            else:
                train_phase = False
                #normalizer_params = {'decay': batch_norm_decay, 'center': True, 'scale': True, 'updates_collections': None, 'is_training': False, 'reuse': True}
        else:
            normalizer_fn = None
            normalizer_params = None

        deep_inputs = tf.reshape(embeddings,shape=[-1,field_size*embedding_size]) # None * (F*K)
        for i in range(len(layers)):
            #if FLAGS.batch_norm:
            #    deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn='bn_%d' %i)
                #normalizer_params.update({'scope': 'bn_%d' %i})
            deep_inputs = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=layers[i], \
                #normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, \
                weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='mlp%d' % i)
            if FLAGS.batch_norm:
                deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn='bn_%d' %i)   #放在RELU之后 https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md#bn----before-or-after-relu
            if mode == tf.estimator.ModeKeys.TRAIN:
                deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i])                              #Apply Dropout after all BN layers and set dropout=0.8(drop_ratio=0.2)
                #deep_inputs = tf.layers.dropout(inputs=deep_inputs, rate=dropout[i], training=mode == tf.estimator.ModeKeys.TRAIN)

        y_deep = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \
                weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='deep_out')
        y_d = tf.reshape(y_deep,shape=[-1])
        #sig_wgts = tf.get_variable(name='sigmoid_weights', shape=[layers[-1]], initializer=tf.glorot_normal_initializer())
        #sig_bias = tf.get_variable(name='sigmoid_bias', shape=[1], initializer=tf.constant_initializer(0.0))
        #deep_out = tf.nn.xw_plus_b(deep_inputs,sig_wgts,sig_bias,name='deep_out')

    with tf.variable_scope("DeepFM-out"):
        #y_bias = FM_B * tf.ones_like(labels, dtype=tf.float32)  # None * 1  warning;这里不能用label,否则调用predict/export函数会出错,train/evaluate正常;初步判断estimator做了优化,用不到label时不传
        y_bias = FM_B * tf.ones_like(y_d, dtype=tf.float32)     # None * 1
        y = y_bias + y_w + y_v + y_d
        pred = tf.sigmoid(y)

    predictions={"prob": pred}
    export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)}
    # Provide an estimator spec for `ModeKeys.PREDICT`
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                export_outputs=export_outputs)

    #------bulid loss------
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \
        l2_reg * tf.nn.l2_loss(FM_W) + \
        l2_reg * tf.nn.l2_loss(FM_V) #+ \ l2_reg * tf.nn.l2_loss(sig_wgts)

    # Provide an estimator spec for `ModeKeys.EVAL`
    eval_metric_ops = {
        "auc": tf.metrics.auc(labels, pred)
    }
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                loss=loss,
                eval_metric_ops=eval_metric_ops)

    #------bulid optimizer------
    if FLAGS.optimizer == 'Adam':
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8)
    elif FLAGS.optimizer == 'Adagrad':
        optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=1e-8)
    elif FLAGS.optimizer == 'Momentum':
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95)
    elif FLAGS.optimizer == 'ftrl':
        optimizer = tf.train.FtrlOptimizer(learning_rate)

    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

    # Provide an estimator spec for `ModeKeys.TRAIN` modes
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                loss=loss,
                train_op=train_op)
def model_fn(features, labels, mode, params):
    """Bulid Model function f(x) for Estimator."""
    #------hyperparameters----
    field_size = params["field_size"]
    feature_size = params["feature_size"]
    embedding_size = params["embedding_size"]
    l2_reg = params["l2_reg"]
    learning_rate = params["learning_rate"]
    #batch_norm_decay = params["batch_norm_decay"]
    #optimizer = params["optimizer"]
    layers = map(int, params["deep_layers"].split(','))
    dropout = map(float, params["dropout"].split(','))

    #------bulid weights------
    #FM_B = tf.get_variable(name='fm_bias', shape=[1], initializer=tf.constant_initializer(0.0))
    #FM_W = tf.get_variable(name='fm_w', shape=[feature_size], initializer=tf.glorot_normal_initializer())
    #FM_V = tf.get_variable(name='fm_v', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer())
    MVM_W = tf.get_variable(name='mvm_w',
                            shape=[feature_size, embedding_size],
                            initializer=tf.glorot_normal_initializer())
    MVM_B = tf.get_variable(name='mvm_b',
                            shape=[field_size, embedding_size],
                            initializer=tf.glorot_normal_initializer())

    #------build feaure-------
    feat_ids = features['feat_ids']
    feat_ids = tf.reshape(feat_ids, shape=[-1, field_size])
    feat_vals = features['feat_vals']
    feat_vals = tf.reshape(feat_vals, shape=[-1, field_size])

    #------build f(x)------
    #with tf.variable_scope("First-order"):
    #    feat_wgts = tf.nn.embedding_lookup(FM_W, feat_ids) # None * F * 1
    #    y_w = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals),1)

    #with tf.variable_scope("Second-order"):
    #    embeddings = tf.nn.embedding_lookup(FM_V, feat_ids) # None * F * K
    #    feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1])
    #    embeddings = tf.multiply(embeddings, feat_vals) #vij*xi
    #    sum_square = tf.square(tf.reduce_sum(embeddings,1))
    #    square_sum = tf.reduce_sum(tf.square(embeddings),1)
    #    y_v = 0.5*tf.reduce_sum(tf.subtract(sum_square, square_sum),1)	# None * 1

    with tf.variable_scope("Embedding-layer"):
        embeddings = tf.nn.embedding_lookup(MVM_W, feat_ids)  # None * F * K
        feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1])
        embeddings = tf.multiply(embeddings, feat_vals)  # None * F * K

    with tf.variable_scope("MVM-part"):
        all_order = tf.add(embeddings, MVM_B)
        x_mvm = all_order[:, 0, :]  # None * 1 * K
        for i in range(1, field_size):
            x_mvm = tf.multiply(x_mvm, all_order[:, i, :])

        x_mvm = tf.reshape(x_mvm, shape=[-1, embedding_size])  # None * K

    with tf.variable_scope("Deep-part"):
        if FLAGS.batch_norm:
            #normalizer_fn = tf.contrib.layers.batch_norm
            #normalizer_fn = tf.layers.batch_normalization
            if mode == tf.estimator.ModeKeys.TRAIN:
                train_phase = True
                #normalizer_params = {'decay': batch_norm_decay, 'center': True, 'scale': True, 'updates_collections': None, 'is_training': True, 'reuse': None}
            else:
                train_phase = False
                #normalizer_params = {'decay': batch_norm_decay, 'center': True, 'scale': True, 'updates_collections': None, 'is_training': False, 'reuse': True}
        else:
            normalizer_fn = None
            normalizer_params = None

        x_deep = tf.reshape(embeddings,
                            shape=[-1, field_size * embedding_size
                                   ])  # None * (F*K)
        for i in range(len(layers)):
            #if FLAGS.batch_norm:
            #    deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn='bn_%d' %i)
            #normalizer_params.update({'scope': 'bn_%d' %i})
            x_deep = tf.contrib.layers.fully_connected(inputs=x_deep, num_outputs=layers[i], \
                #normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, \
                weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='mlp%d' % i)
            if FLAGS.batch_norm:
                x_deep = batch_norm_layer(
                    x_deep, train_phase=train_phase, scope_bn='bn_%d' % i
                )  #放在RELU之后 https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md#bn----before-or-after-relu
            if mode == tf.estimator.ModeKeys.TRAIN:
                x_deep = tf.nn.dropout(
                    x_deep, keep_prob=dropout[i]
                )  #Apply Dropout after all BN layers and set dropout=0.8(drop_ratio=0.2)
                #x_deep = tf.layers.dropout(inputs=x_deep, rate=dropout[i], training=mode == tf.estimator.ModeKeys.TRAIN)

    with tf.variable_scope("DeepMVM-out"):
        x_stack = tf.concat([x_mvm, x_deep],
                            axis=1)  # None * ( F*K+ deep_layers[i])
        y_deep = tf.contrib.layers.fully_connected(inputs=x_stack, num_outputs=1, activation_fn=tf.identity, \
                weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='deep_out')
        y = tf.reshape(y_deep, shape=[-1])
        pred = tf.sigmoid(y)

    predictions = {"prob": pred}
    export_outputs = {
        tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
        tf.estimator.export.PredictOutput(predictions)
    }
    # Provide an estimator spec for `ModeKeys.PREDICT`
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          export_outputs=export_outputs)

    #------bulid loss------
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \
        l2_reg * tf.nn.l2_loss(MVM_W) + \
        l2_reg * tf.nn.l2_loss(MVM_B)

    # Provide an estimator spec for `ModeKeys.EVAL`
    eval_metric_ops = {"auc": tf.metrics.auc(labels, pred)}
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)

    #------bulid optimizer------
    if FLAGS.optimizer == 'Adam':
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                           beta1=0.9,
                                           beta2=0.999,
                                           epsilon=1e-8)
    elif FLAGS.optimizer == 'Adagrad':
        optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate,
                                              initial_accumulator_value=1e-8)
    elif FLAGS.optimizer == 'Momentum':
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                               momentum=0.95)
    elif FLAGS.optimizer == 'ftrl':
        optimizer = tf.train.FtrlOptimizer(learning_rate)

    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

    # Provide an estimator spec for `ModeKeys.TRAIN` modes
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=loss,
                                          train_op=train_op)
Example #10
0
def build_input(features, params):
    cat_columns = params['cat_columns']
    val_columns = params['val_columns']
    column_to_field = params['column_to_field']
    #dnn_columns = params['dnn_columns']
    dimension_config = params['dimension_config']
    reg = params['reg']
    embed_dim = params['embed_dim']
    embedding_table = EmbeddingTable()
    embedding_dict = OrderedDict()
    with tf.variable_scope("fm", reuse=tf.AUTO_REUSE,
                           values=[features]) as scope:
        with tf.device('/cpu:0'):
            for name, col in cat_columns.items():
                field = column_to_field.get(name, name)
                cur_dimension = dimension_config[
                    field] if field in dimension_config else embed_dim
                embedding_table.add_linear_weights(vocab_name=name,
                                                   vocab_size=col._num_buckets)
                embedding_table.add_embed_weights(vocab_name=field,
                                                  vocab_size=col._num_buckets,
                                                  embed_dim=cur_dimension,
                                                  reg=reg)
            for name, col in val_columns.items():
                field = column_to_field.get(name, name)
                cur_dimension = dimension_config[
                    field] if field in dimension_config else embed_dim
                embedding_table.add_linear_weights(vocab_name=name,
                                                   vocab_size=1)
                embedding_table.add_embed_weights(vocab_name=field,
                                                  vocab_size=1,
                                                  embed_dim=cur_dimension,
                                                  reg=reg)

            builder = _LazyBuilder(features)
            # linear part
            linear_outputs = []
            for name, col in cat_columns.items():
                # get sparse tensor of input feature from feature column
                sp_tensor = col._get_sparse_tensors(builder)
                sp_ids = sp_tensor.id_tensor
                linear_weights = embedding_table.get_linear_weights(name)

                # linear_weights: (vocab_size, 1)
                # sp_ids: (batch_size, max_tokens_per_example)
                # sp_values: (batch_size, max_tokens_per_example)
                linear_output = embedding_ops.safe_embedding_lookup_sparse(
                    linear_weights,
                    sp_ids,
                    None,
                    combiner='sum',
                    name='{}_linear_output'.format(name))

                linear_outputs.append(linear_output)
            for name, col in val_columns.items():
                dense_tensor = col._get_dense_tensor(builder)
                linear_weights = embedding_table.get_linear_weights(name)
                linear_output = tf.multiply(dense_tensor, linear_weights)
                linear_outputs.append(linear_output)
            # linear_outputs: (batch_szie, nonzero_feature_num)
            linear_outputs = tf.concat(linear_outputs, axis=1)
            # poly part

            for name, col, in cat_columns.items():
                # get sparse tensor of input feature from feature column
                field = column_to_field.get(name, name)
                sp_tensor = col._get_sparse_tensors(builder)
                sp_ids = sp_tensor.id_tensor
                embed_weights = embedding_table.get_embed_weights(field)

                # embeddings: (batch_size, embed_dim)
                # x_i * v_i
                embeddings = embedding_ops.safe_embedding_lookup_sparse(
                    embed_weights,
                    sp_ids,
                    None,
                    combiner='sum',
                    name='{}_{}_embedding'.format(field, name))
                embedding_dict[field] = embeddings
            for name, col in val_columns.items():
                field = column_to_field.get(name, name)
                dense_tensor = col._get_dense_tensor(builder)
                embed_weights = embedding_table.get_embed_weights(field)
                embeddings = tf.multiply(dense_tensor, embed_weights)
                embedding_dict[field] = embeddings
    with tf.variable_scope("dnn_embed"):
        x = tf.concat(list(embedding_dict.values()), axis=1)
        N = len(embedding_dict)
        T = sum([
            embedding.get_shape().as_list()[1]
            for embedding in embedding_dict.values()
        ])
        print("wkfm N:", N, " T:", T)
        indices = []
        for i, embeddings in enumerate(embedding_dict.values()):
            dim = embeddings.get_shape().as_list()[1]
            indices.extend([i] * dim)
        indices.extend([len(embedding_dict)] * shape)
        outputs = []
        for field, embeddings in embedding_dict.items():
            di = dimension_config[
                field] if field in dimension_config else embed_dim
            U = tf.get_variable('{}_wkfm'.format(field), [T, di],
                                initializer=tf.glorot_normal_initializer(),
                                trainable=True)
            wkfm_weights = tf.get_variable('{}_wkfm_weights'.format(field),
                                           [N],
                                           initializer=tf.ones_initializer,
                                           trainable=True)
            weights = tf.gather(wkfm_weights, indices)
            y = tf.matmul(weights * x, U)
            outputs.append(y)
        y = tf.concat(outputs, axis=1)
        y = x * y
        new_inputs = tf.concat([linear_outputs, y], 1)
        shared_weights = tf.get_variable(
            name="fm_share",
            dtype=tf.float32,
            shape=[new_inputs.get_shape().as_list()[1], 256],
            initializer=tf.glorot_normal_initializer(),
            regularizer=tf.contrib.layers.l2_regularizer(reg),
            trainable=True)
        new_inputs = tf.matmul(new_inputs, shared_weights)
        return new_inputs
Example #11
0
def model_fn(features, labels, mode, params):
    """Bulid Model function f(x) for Estimator."""
    #------hyperparameters----
    field_size = params["field_size"]
    feature_size = params["feature_size"]
    embedding_size = params["embedding_size"]
    l2_reg = params["l2_reg"]
    learning_rate = params["learning_rate"]
    #optimizer = params["optimizer"]
    layers = map(int, params["deep_layers"].split(','))
    dropout = map(float, params["dropout"].split(','))
    num_pairs = field_size * (field_size - 1) / 2

    #------bulid weights------
    Global_Bias = tf.get_variable(name='bias', shape=[1], initializer=tf.constant_initializer(0.0))
    Feat_Bias = tf.get_variable(name='linear', shape=[feature_size], initializer=tf.glorot_normal_initializer())
    Feat_Emb = tf.get_variable(name='emb', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer())
    #Prod_Kernel = tf.get_variable(name='kernel', shape=[embedding_size, num_pairs, embedding_size], initializer=tf.glorot_normal_initializer())


    #------build feaure-------
    feat_ids  = features['feat_ids']									# None * F * 1
    feat_ids = tf.reshape(feat_ids,shape=[-1,field_size])
    feat_vals = features['feat_vals']									# None * F * 1
    feat_vals = tf.reshape(feat_vals,shape=[-1,field_size])

    #------build f(x)------
    with tf.variable_scope("Linear-part"):
        feat_wgts = tf.nn.embedding_lookup(Feat_Bias, feat_ids) 		# None * F * 1
        y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals),1)

    with tf.variable_scope("Embedding-layer"):
        embeddings = tf.nn.embedding_lookup(Feat_Emb, feat_ids) 		# None * F * K
        feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1])
        embeddings = tf.multiply(embeddings, feat_vals) 				# None * F * K

    with tf.variable_scope("Product-layer"):
		if FLAGS.model_type == 'FNN':
			deep_inputs = tf.reshape(embeddings,shape=[-1,field_size*embedding_size])
		elif FLAGS.model_type == 'Inner':
			row = []
			col = []
			for i in range(field_size-1):
				for j in range(i+1, field_size):
					row.append(i)
					col.append(j)
			p = tf.gather(embeddings, row, axis=1)
			q = tf.gather(embeddings, col, axis=1)
	        #p = tf.reshape(p, [-1, num_pairs, embedding_size])
            #q = tf.reshape(q, [-1, num_pairs, embedding_size])
			inner = tf.reshape(tf.reduce_sum(p * q, [-1]), [-1, num_pairs])										# None * (F*(F-1)/2)
			deep_inputs = tf.concat([tf.reshape(embeddings,shape=[-1,field_size*embedding_size]), inner], 1)	# None * ( F*K+F*(F-1)/2 )
		elif FLAGS.model_type == 'Outer':             #ERROR: NOT ready yet
			row = []
			col = []
			for i in range(field_size-1):
				for j in range(i+1, field_size):
					row.append(i)
					col.append(j)
			p = tf.gather(embeddings, row, axis=1)
			q = tf.gather(embeddings, col, axis=1)
	        #p = tf.reshape(p, [-1, num_pairs, embedding_size])
            #q = tf.reshape(q, [-1, num_pairs, embedding_size])
			#einsum('i,j->ij', p, q)  # output[i,j] = p[i]*q[j]				# Outer product
			outer = tf.reshape(tf.einsum('api,apj->apij', p, q), [-1, num_pairs*embedding_size*embedding_size])	# None * (F*(F-1)/2*K*K)
			deep_inputs = tf.concat([tf.reshape(embeddings,shape=[-1,field_size*embedding_size]), outer], 1)	# None * ( F*K+F*(F-1)/2*K*K )


    with tf.variable_scope("Deep-part"):
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_phase = True
        else:
            train_phase = False

        for i in range(len(layers)):
            deep_inputs = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=layers[i], \
            	weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='mlp%d' % i)

            if FLAGS.batch_norm:
				deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn='bn_%d' %i)   	#放在RELU之后 https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md#bn----before-or-after-relu
            if mode == tf.estimator.ModeKeys.TRAIN:
				deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i])                              	#Apply Dropout after all BN layers and set dropout=0.8(drop_ratio=0.2)
            	#deep_inputs = tf.layers.dropout(inputs=deep_inputs, rate=dropout[i], training=mode == tf.estimator.ModeKeys.TRAIN)

        y_deep = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \
            weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='deep_out')
        y_d = tf.reshape(y_deep,shape=[-1])

    with tf.variable_scope("PNN-out"):
        #y_bias = Global_Bias * tf.ones_like(labels, dtype=tf.float32)  # None * 1  warning;这里不能用label,否则调用predict/export函数会出错,train/evaluate正常;初步判断estimator做了优化,用不到label是不传
        y_bias = Global_Bias * tf.ones_like(y_d, dtype=tf.float32)      # None * 1
        y = y_bias + y_linear + y_d
        pred = tf.sigmoid(y)

    predictions={"prob": pred}
    export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)}
    # Provide an estimator spec for `ModeKeys.PREDICT`
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                export_outputs=export_outputs)

    #------bulid loss------
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \
        l2_reg * tf.nn.l2_loss(Feat_Bias) + l2_reg * tf.nn.l2_loss(Feat_Emb)

    # Provide an estimator spec for `ModeKeys.EVAL`
    eval_metric_ops = {
        "auc": tf.metrics.auc(labels, pred)
    }
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                loss=loss,
                eval_metric_ops=eval_metric_ops)

    #------bulid optimizer------
    if FLAGS.optimizer == 'Adam':
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8)
    elif FLAGS.optimizer == 'Adagrad':
        optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=1e-8)
    elif FLAGS.optimizer == 'Momentum':
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95)
    elif FLAGS.optimizer == 'ftrl':
        optimizer = tf.train.FtrlOptimizer(learning_rate)

    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

    # Provide an estimator spec for `ModeKeys.TRAIN` modes
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                loss=loss,
                train_op=train_op)
Example #12
0
def test_matmul():
    ''' Run tests on the Wave custom matmul operator. 
    '''
    tf.reset_default_graph()

    a = tf.get_variable("a", [2, 3],
                        dtype=tf.float32,
                        initializer=tf.glorot_normal_initializer())
    b = tf.get_variable("b", [3, 4],
                        dtype=tf.float32,
                        initializer=tf.glorot_normal_initializer())

    t_init = tf.global_variables_initializer()
    debug = False

    iters = 100
    widgets = ["matmul test: ", pb.Percentage(), ' ', pb.Bar(), ' ', pb.ETA()]
    pbar = pb.ProgressBar(widgets=widgets, maxval=iters)
    pbar.start()

    for i in range(100):
        pbar.update(i)

        # NN variant
        with tf.Session(''):
            t_init.run()
            if debug:
                print(
                    "Wave Kernel (NN):\n-------------------------------------------------"
                )
            if debug: print("a: %s" % (a.eval()))
            if debug: print("b: %s" % (b.eval()))
            # (2, 3) * (3, 4) = (2, 4)
            z = waveflow.wavecomp_ops_module.wave_mat_mul(a, b).eval()
            if debug: print("z: %s" % (z))

            # Convert to numpy
            a_np = np.array(a.eval())
            b_np = np.array(b.eval())
            z2 = np.matmul(a_np, b_np)
            if debug:
                print(
                    "\nNumpy:\n-------------------------------------------------"
                )
            if debug: print("a (np): %s" % (a_np))
            if debug: print("b (np): %s" % (b_np))
            if debug: print("z (np): %s" % (z2))
            if debug: print("\n\n")

            assert np.allclose(z, z2, atol=0.1)

        # TN variant
        with tf.Session(''):
            t_init.run()
            if debug:
                print(
                    "Wave Kernel (TN):\n-------------------------------------------------"
                )
            a_t = tf.transpose(a)
            if debug: print("a: %s" % (a_t.eval()))
            if debug: print("b: %s" % (b.eval()))
            # (3, 2).T * (3, 4) = (2, 4)
            z = waveflow.wavecomp_ops_module.wave_mat_mul(
                a_t, b, transpose_a=True).eval()
            if debug: print("z: %s" % (z))

            # Convert to numpy
            a_np = np.array(a_t.eval())
            b_np = np.array(b.eval())
            assert np.allclose(a.eval(), a_np.T)

            z2 = np.matmul(a_np.T, b_np)
            if debug:
                print(
                    "\nNumpy:\n-------------------------------------------------"
                )
            if debug: print("a (np): %s" % (a_np))
            if debug: print("b (np): %s" % (b_np))
            if debug: print("z (np): %s" % (z2))
            if debug: print("\n\n")

            assert np.allclose(z, z2, atol=0.1)

        # NT variant
        with tf.Session(''):
            t_init.run()
            if debug:
                print(
                    "Wave Kernel (NT):\n-------------------------------------------------"
                )
            b_t = tf.transpose(b)
            if debug: print("a: %s" % (a.eval()))
            if debug: print("b: %s" % (b_t.eval()))
            z = waveflow.wavecomp_ops_module.wave_mat_mul(
                a, b_t, transpose_b=True).eval()
            if debug: print("z: %s" % (z))

            # Convert to numpy
            a_np = np.array(a.eval())
            b_np = np.array(b_t.eval())
            z2 = np.matmul(a_np, b_np.T)
            if debug:
                print(
                    "\nNumpy:\n-------------------------------------------------"
                )
            if debug: print("a (np): %s" % (a_np))
            if debug: print("b (np): %s" % (b_np))
            if debug: print("z (np): %s" % (z2))
            if debug: print("\n\n")

            assert np.allclose(z, z2, atol=0.1)

        # TT variant
        with tf.Session(''):
            t_init.run()
            if debug:
                print(
                    "Wave Kernel (TT):\n-------------------------------------------------"
                )
            a_t = tf.transpose(a)
            b_t = tf.transpose(b)
            if debug: print("a: %s" % (a_t.eval()))
            if debug: print("b: %s" % (b_t.eval()))
            # (3, 2).T * (4, 3).T = (2, 4)
            z = waveflow.wavecomp_ops_module.wave_mat_mul(
                a_t, b_t, transpose_a=True, transpose_b=True).eval()
            if debug: print("z: %s" % (z))

            # Convert to numpy
            a_np = np.array(a_t.eval())
            b_np = np.array(b_t.eval())
            z2 = np.matmul(a_np.T, b_np.T)
            if debug:
                print(
                    "\nNumpy:\n-------------------------------------------------"
                )
            if debug: print("a (np): %s" % (a_np))
            if debug: print("b (np): %s" % (b_np))
            if debug: print("z (np): %s" % (z2))
            if debug: print("\n\n")

            assert np.allclose(z, z2, atol=0.1)

    pbar.finish()
    return True
Example #13
0
images = tf.placeholder(tf.float32, [None, 28 * 28])
image_labels = tf.placeholder(tf.float32, [None, 10])


def my_leaky_relu(x):
    return tf.nn.leaky_relu(x, alpha=.5)


for i in range(num_layers):
    if i == 0:
        layers.append(
            tf.layers.dense(images,
                            128,
                            activation=my_leaky_relu,
                            kernel_initializer=tf.glorot_normal_initializer(
                                seed=None, dtype=tf.float32),
                            name=("Layer" + str(i))))

    elif i == 63:
        layers.append(
            tf.layers.dense(layers[i - 1],
                            128,
                            activation=my_leaky_relu,
                            kernel_initializer=tf.glorot_normal_initializer(
                                seed=None, dtype=tf.float32),
                            name=("Layer_1_" + str(i))))
        layers[i] = layers[i] + tf.layers.dense(
            images,
            128,
            activation=None,
            use_bias=False,
Example #14
0
    def __init__(
      self, sequence_length, num_classes, vocab_size, tags_vocab_size, deps_vocab_size,
      embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0):

        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
        self.input_tags = tf.placeholder(tf.int32, [None, sequence_length], name="input_tags")
        self.input_deps = tf.placeholder(tf.int32, [None, sequence_length], name="input_dependency")
        self.input_head = tf.placeholder(tf.int32, [None, sequence_length], name="input_head")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        self.is_training = tf.placeholder(tf.bool, name="is_training")
        self.tempreture = tf.placeholder(tf.float32, name="Tempreture")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)
        #initializer = tf.contrib.layers.variance_scaling_initializer()
        initializer = tf.glorot_normal_initializer()

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("embedding_words"):
            self.W = tf.get_variable("embed_W_words", [vocab_size, embedding_size], initializer=initializer)
            self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
            self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)

        with tf.device('/cpu:0'), tf.name_scope("embedding_tags"):
            W_tags = tf.get_variable("embed_W_tags", [tags_vocab_size, embedding_size], initializer=initializer)
            embedded_tags = tf.nn.embedding_lookup(W_tags, self.input_tags)
            embedded_tags_expanded = tf.expand_dims(embedded_tags, -1)

        with tf.device('/cpu:0'), tf.name_scope("embedding_deps"):
            W_deps = tf.get_variable("embed_W_deps", [deps_vocab_size, embedding_size], initializer=initializer)
            embedded_deps = tf.nn.embedding_lookup(W_deps, self.input_deps)
            embedded_deps_expanded = tf.expand_dims(embedded_deps, -1)

        with tf.device('/cpu:0'), tf.name_scope("embedding_head"):
            W_head = tf.get_variable("embed_W_head", [vocab_size, embedding_size], initializer=initializer)
            embedded_head = tf.nn.embedding_lookup(W_head, self.input_head)
            embedded_head_expanded = tf.expand_dims(embedded_head, -1)

        cnn_inputs = tf.concat([self.embedded_chars_expanded, embedded_tags_expanded, embedded_deps_expanded, embedded_head_expanded], -1)
        print("Embedded Shape:", cnn_inputs.shape)

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 4, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                #W = tf.get_variable("conv_{}_W".format(filter_size), shape=filter_shape, initializer=initializer)
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                conv = tf.nn.conv2d(
                    cnn_inputs,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply BN
                conv = tf.layers.batch_normalization(conv, axis=-1, training=self.is_training) #axis定的是channel在的维度。
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)

        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(pooled_outputs, 3)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")
            self.probabilities = tf.nn.softmax(self.scores / self.tempreture)

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
Example #15
0
def dnn_layer(features):
    # para set
    feature_size = FLAGS.feature_size
    common_field_size = FLAGS.common_field_size
    embedding_size = FLAGS.embedding_size
    common_dims = common_field_size * embedding_size
    layers = map(int, (FLAGS.deep_layers).split(','))
    dropout = map(float, (FLAGS.dropout).split(','))
    l2_reg = FLAGS.l2_reg

    #{U-A-X-C不需要特殊处理的特征}
    feat_ids = features['feat_ids']
    feat_vals = features['feat_vals']
    #{multi-hot}
    video_ids = features['videoIdsids']

    # ------bulid weights------
    with tf.variable_scope("Embedding", reuse=tf.AUTO_REUSE):
        Feat_Emb = tf.get_variable(name='embeddings',
                                   shape=[feature_size, embedding_size],
                                   initializer=tf.glorot_normal_initializer())

    #------build f(x)------
    with tf.variable_scope("Embedding-layer", reuse=tf.AUTO_REUSE):
        common_embs = tf.nn.embedding_lookup(Feat_Emb,
                                             feat_ids)  # None * F' * K
        feat_vals = tf.reshape(feat_vals, shape=[-1, common_field_size,
                                                 1])  # reshape for_warn
        uac_emb = tf.multiply(common_embs, feat_vals)
        video_emb = tf.nn.embedding_lookup_sparse(Feat_Emb,
                                                  sp_ids=video_ids,
                                                  sp_weights=None,
                                                  combiner="sum")

    with tf.variable_scope("DNN-layer", reuse=tf.AUTO_REUSE):
        if FLAGS.batch_norm:
            if FLAGS.task_type == 'train':
                train_phase = True
            else:
                train_phase = False
        else:
            normalizer_fn = None
            normalizer_params = None

        x_deep = tf.concat(
            [tf.reshape(uac_emb, shape=[-1, common_dims]), video_emb],
            axis=1)  # None * (F*K)
        for i in range(len(layers)):
            x_deep = tf.contrib.layers.fully_connected(
                inputs=x_deep,
                num_outputs=layers[i],
                weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg),
                scope='dnn%d' % i)
            if FLAGS.batch_norm:
                x_deep = batch_norm_layer(x_deep,
                                          train_phase=train_phase,
                                          scope_bn='bn_%d' % i)
            if FLAGS.task_type == 'train':
                x_deep = tf.nn.dropout(
                    x_deep, keep_prob=dropout[i]
                )  #Apply Dropout after all BN layers and set dropout=0.8(drop_ratio=0.2)
    return x_deep
Example #16
0
def generator(inputs, is_train=True, reuse=False):
    image_size = 64
    s16 = image_size // 16
    gf_dim = 64  # Dimension of gen filters in first conv layer. [64]
    c_dim = FLAGS.c_dim  # n_color 3
    w_init = tf.glorot_normal_initializer()
    gamma_init = tf.random_normal_initializer(1., 0.02)

    with tf.variable_scope("generator", reuse=reuse):

        net_in = InputLayer(inputs, name='g/in')
        net_h0 = DenseLayer(net_in,
                            n_units=(gf_dim * 8 * s16 * s16),
                            W_init=w_init,
                            act=tf.identity,
                            name='g/h0/lin')
        net_h0 = ReshapeLayer(net_h0,
                              shape=[-1, s16, s16, gf_dim * 8],
                              name='g/h0/reshape')
        net_h0 = BatchNormLayer(net_h0,
                                decay=0.9,
                                act=tf.nn.relu,
                                is_train=is_train,
                                gamma_init=gamma_init,
                                name='g/h0/batch_norm')

        net_h1 = DeConv2d(net_h0,
                          gf_dim * 4, (5, 5),
                          strides=(2, 2),
                          padding='SAME',
                          act=None,
                          W_init=w_init,
                          name='g/h1/decon2d')
        net_h1 = BatchNormLayer(net_h1,
                                decay=0.9,
                                act=tf.nn.relu,
                                is_train=is_train,
                                gamma_init=gamma_init,
                                name='g/h1/batch_norm')

        net_h2 = DeConv2d(net_h1,
                          gf_dim * 2, (5, 5),
                          strides=(2, 2),
                          padding='SAME',
                          act=None,
                          W_init=w_init,
                          name='g/h2/decon2d')
        net_h2 = BatchNormLayer(net_h2,
                                decay=0.9,
                                act=tf.nn.relu,
                                is_train=is_train,
                                gamma_init=gamma_init,
                                name='g/h2/batch_norm')

        net_h3 = DeConv2d(net_h2,
                          gf_dim, (5, 5),
                          strides=(2, 2),
                          padding='SAME',
                          act=None,
                          W_init=w_init,
                          name='g/h3/decon2d')
        net_h3 = BatchNormLayer(net_h3,
                                decay=0.9,
                                act=tf.nn.relu,
                                is_train=is_train,
                                gamma_init=gamma_init,
                                name='g/h3/batch_norm')

        net_h4 = DeConv2d(net_h3,
                          c_dim, (5, 5),
                          strides=(2, 2),
                          padding='SAME',
                          act=None,
                          W_init=w_init,
                          name='g/h4/decon2d')
        net_h4.outputs = tf.nn.tanh(net_h4.outputs)
    return net_h4
Example #17
0
def model_fn(features, labels, mode, params):
    """Build Model function f(x) for Estimator."""

    #------hyper parameters------
    field_size = params['field_size']
    feature_size = params['feature_size']
    embedding_size = params['embedding_size']
    l2_reg = params['l2_reg']
    learning_rate = params['learning_rate']

    dropout = params['dropout']
    attention_factor = params['attention_factor']

    #------build weights------
    Global_Bias = tf.get_variable("bias", shape=[1], initializer=tf.constant_initializer(0.0))
    Feat_Wgts = tf.get_variable("linear", shape=[feature_size], initializer=tf.glorot_normal_initializer())
    Feat_Emb = tf.get_variable("emb", shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer())

    #------build feature------
    feat_ids = features['feat_ids']
    feat_vals = features['feat_vals']
    feat_ids = tf.reshape(feat_ids, shape=[-1, field_size])
    feat_vals = tf.reshape(feat_vals, shape=[-1, field_size]) # None * F

    #------build f(x)------

    # FM部分: sum(wx)
    with tf.variable_scope("Linear-part"):
        feat_wgts = tf.nn.embedding_lookup(Feat_Wgts, feat_ids) # None * F * 1
        y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals), 1)

    #Deep部分
    with tf.variable_scope("Embedding_Layer"):
        embeddings = tf.nn.embedding_lookup(Feat_Emb, feat_ids) # None * F * K
        feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1]) # None * F * 1
        embeddings = tf.multiply(embeddings, feat_vals) # None * F * K


    with tf.variable_scope("Pair-wise_Interaction_Layer"):
        num_interactions = field_size * (field_size - 1) / 2
        element_wise_product_list = []
        for i in range(0, field_size):
            for j in range(i + 1, field_size):
                element_wise_product_list.append(tf.multiply(embeddings[:, i, :], embeddings[:, j, :]))
        element_wise_product_list = tf.stack(element_wise_product_list) # (F*(F-1)/2) * None * K stack拼接矩阵
        element_wise_product_list = tf.transpose(element_wise_product_list, perm=[1,0,2]) # None * (F(F-1)/2) * K

    # 得到Attention Score
    with tf.variable_scope("Attention_Netowrk"):

        deep_inputs = tf.reshape(element_wise_product_list, shape=[-1, embedding_size]) # (None*F(F-1)/2) * K

        deep_inputs = contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=attention_factor, activation_fn=tf.nn.relu, \
                                             weights_regularizer=contrib.layers.l2_regularizer(l2_reg), scope="attention_net_mlp")

        aij = contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \
                                             weights_regularizer=contrib.layers.l2_regularizer(l2_reg), scope="attention_net_out") # (None*F(F-1)/2) * 1

        # 得到attention score之后,使用softmax进行规范化
        aij = tf.reshape(aij, shape=[-1, int(num_interactions), 1])
        aij_softmax = tf.nn.softmax(aij, dim=1, name="attention_net_softout") # None * num_interactions

        # TODO: 为什么要对attention score进行dropout那?? 这里不是很懂
        if mode == tf.estimator.ModeKeys.TRAIN:
            aij_softmax = tf.nn.dropout(aij_softmax, keep_prob=dropout[0])

    with tf.variable_scope("Attention-based_Pooling_Layer"):
        deep_inputs = tf.multiply(element_wise_product_list, aij_softmax) # None * (F(F-1)/2) * K
        deep_inputs = tf.reduce_sum(deep_inputs, axis=1) # None * K Pooling操作

        # Attention-based Pooling Layer的输出也要经过Dropout
        if mode == tf.estimator.ModeKeys.TRAIN:
            deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[1])

        # 该层的输出是一个K维度的向量

    with tf.variable_scope("Prediction_Layer"):
        # 直接跟上输出单元
        deep_inputs = contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \
                                             weights_regularizer=contrib.layers.l2_regularizer(l2_reg), scope="afm_out") # None * 1
        y_deep = tf.reshape(deep_inputs, shape=[-1]) # None

    with tf.variable_scope("AFM_overall"):
        y_bias = Global_Bias * tf.ones_like(y_deep, dtype=tf.float32)
        y = y_bias + y_linear + y_deep
        pred = tf.nn.sigmoid(y)

    # set predictions
    predictions = {"prob": pred}
    export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)}
    # Provide an estimator spec for `ModeKeys.PREDICT`
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs=export_outputs)

    #------build loss------
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + l2_reg * tf.nn.l2_loss(Feat_Wgts) + l2_reg * tf.nn.l2_loss(Feat_Emb)
    # Provide an estimator spec for `ModeKeys.EVAL`
    eval_metric_ops = {
        "auc": tf.metrics.auc(labels, pred)
    }
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            loss=loss,
            eval_metric_ops=eval_metric_ops)


    #------build optimizer------
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

    # Provide an estimator spec for `ModeKeys.TRAIN`
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            loss=loss,
            train_op=train_op)
Example #18
0
def model_fn(features, labels, mode, params):
    print('params', params)

    field_size = params['field_size']
    embedding_size = params['embedding_size']
    feature_size = params['feature_size']
    l2_reg = params['l2_reg']
    learning_rate = params['learning_rate']
    layers = list(map(int, params['deep_layer'].split(',')))
    dropout = list(map(float, params['dropout'].split(',')))

    Global_Bias = tf.get_variable('bias',
                                  shape=[1],
                                  initializer=tf.constant_initializer(0.0))
    Feat_Bias = tf.get_variable('linear',
                                shape=[feature_size],
                                initializer=tf.glorot_normal_initializer())
    Feat_Emb = tf.get_variable('emb',
                               shape=[feature_size, embedding_size],
                               initializer=tf.glorot_normal_initializer())

    feat_ids = features['feat_ids']
    feat_ids = tf.reshape(feat_ids, shape=[-1, field_size])
    feat_vals = features['feat_vals']
    feat_vals = tf.reshape(feat_vals, shape=[-1, field_size])

    with tf.variable_scope('Linear-part'):
        feat_wgts = tf.nn.embedding_lookup(Feat_Bias, feat_ids)
        y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals), 1)

    with tf.variable_scope('BiInter-part'):
        embedding = tf.nn.embedding_lookup(Feat_Emb, feat_ids)
        feat_vals = tf.reshape(feat_vals, [-1, field_size, 1])
        embedding = tf.multiply(embedding, feat_vals)
        sum_square_emb = tf.square(tf.reduce_sum(embedding, 1))
        square_sum_emb = tf.reduce_sum(tf.square(embedding), 1)
        deep_input = 0.5 * tf.subtract(sum_square_emb, square_sum_emb)

    with tf.variable_scope('Deep-part'):
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_phase = True
        else:
            train_phase = False

        if mode == tf.estimator.ModeKeys.TRAIN:
            deep_input = tf.nn.dropout(deep_input, keep_prob=dropout[0])

        for i in range(len(layers)):
            deep_input = tf.contrib.layers.fully_connected(
                inputs=deep_input,
                num_outputs=layers[i],
                weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg),
                scope='mlp%d' % i)

            if FLAGS.batch_norm:
                deep_input = batch_norm_layer(deep_input,
                                              train_phase=train_phase,
                                              scope_bn='bn_%d' % i)

            if mode == tf.estimator.ModeKeys.TRAIN:
                deep_input = tf.nn.dropout(deep_input,
                                           keep_prob=dropout[i + 1])

        y_deep = tf.contrib.layers.fully_connected(
            inputs=deep_input,
            num_outputs=1,
            activation_fn=tf.identity,
            weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg),
            scope='deep_out')
        y_d = tf.reshape(y_deep, [-1])

    with tf.variable_scope('NfM-out'):
        y_bias = Global_Bias * tf.ones_like(y_d, dtype=tf.float32)
        y = y_bias + y_linear + y_d
        pred = tf.sigmoid(y)

    predictions = {'prob': pred}
    export_outputs = {
        tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
        tf.estimator.export.PredictOutput(predictions)
    }
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=pred,
                                          export_outputs=export_outputs)

    loss = tf.reduce_mean(
        tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=y) +
        l2_reg * tf.nn.l2_loss(Feat_Bias) + l2_reg * tf.nn.l2_loss(Feat_Emb))

    eval_metric_ops = {'auc': tf.metrics.auc(labels, pred)}
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode,
                                          predictions=predictions,
                                          loss=loss,
                                          eval_metric_ops=eval_metric_ops)

    if FLAGS.optimizer == 'Adam':
        opt = tf.train.AdamOptimizer(learning_rate,
                                     beta1=0.9,
                                     beta2=0.999,
                                     epsilon=1e-8)
    elif FLAGS.optimizer == 'Adagrad':
        opt = tf.train.AdagradOptimizer(learning_rate,
                                        initial_accumulator_value=1e-8)
    elif FLAGS.optimizer == 'Momentum':
        opt = tf.train.MomentumOptimizer(learning_rate, momentum=0.95)
    elif FLAGS.optimizer == 'ftrl':
        opt = tf.train.FtrlOptimizer(learning_rate)

    train_op = opt.minimize(loss, global_step=tf.train.get_global_step())

    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode,
                                          predictions=predictions,
                                          loss=loss,
                                          train_op=train_op)
Example #19
0
def discriminator(inputs, is_train=True, reuse=False):
    df_dim = 64  # Dimension of discrim filters in first conv layer. [64]
    w_init = tf.glorot_normal_initializer()
    gamma_init = tf.random_normal_initializer(1., 0.02)
    lrelu = lambda x: tf.nn.leaky_relu(x, 0.2)
    with tf.variable_scope("discriminator", reuse=reuse):

        net_in = InputLayer(inputs, name='d/in')
        net_h0 = Conv2d(net_in,
                        df_dim, (5, 5), (2, 2),
                        act=lrelu,
                        padding='SAME',
                        W_init=w_init,
                        name='d/h0/conv2d')

        net_h1 = Conv2d(net_h0,
                        df_dim * 2, (5, 5), (2, 2),
                        act=None,
                        padding='SAME',
                        W_init=w_init,
                        name='d/h1/conv2d')
        net_h1 = BatchNormLayer(net_h1,
                                decay=0.9,
                                act=lrelu,
                                is_train=is_train,
                                gamma_init=gamma_init,
                                name='d/h1/batch_norm')

        net_h2 = Conv2d(net_h1,
                        df_dim * 4, (5, 5), (2, 2),
                        act=None,
                        padding='SAME',
                        W_init=w_init,
                        name='d/h2/conv2d')
        net_h2 = BatchNormLayer(net_h2,
                                decay=0.9,
                                act=lrelu,
                                is_train=is_train,
                                gamma_init=gamma_init,
                                name='d/h2/batch_norm')

        net_h3 = Conv2d(net_h2,
                        df_dim * 8, (5, 5), (2, 2),
                        act=None,
                        padding='SAME',
                        W_init=w_init,
                        name='d/h3/conv2d')
        net_h3 = BatchNormLayer(net_h3,
                                decay=0.9,
                                act=lrelu,
                                is_train=is_train,
                                gamma_init=gamma_init,
                                name='d/h3/batch_norm')

        net_h4 = FlattenLayer(net_h3, name='d/h4/flatten')
        net_h4 = DenseLayer(net_h4,
                            n_units=1,
                            act=tf.identity,
                            W_init=w_init,
                            name='d/h4/lin_sigmoid')
        logits = net_h4.outputs
        net_h4.outputs = tf.nn.sigmoid(net_h4.outputs)
    return net_h4, logits
Example #20
0
        def model_fn(features, labels, mode):
            """
            the model_fn feeds into Estimator
            """
            feature_columns = self.create_feature_columns(tf_transform_output)
            input_layer = tf.feature_column.input_layer(
                features=features, feature_columns=feature_columns)

            # Network structure
            # Batch norm after linear combination and before activation. Dropout after activation.
            h1 = tf.layers.Dense(
                units=MODEL_NUM_UNIT_SCALE * 4,
                activation=None,
                kernel_initializer=tf.glorot_normal_initializer(),
                bias_initializer=tf.zeros_initializer()
            )(input_layer)
            h1_bn = tf.layers.batch_normalization(h1, training=(mode == tf.estimator.ModeKeys.TRAIN))
            h1_act = tf.nn.relu(h1_bn)
            h1_do = tf.layers.dropout(
                inputs=h1_act,
                rate=DROPOUT_PROB,
                training=(mode == tf.estimator.ModeKeys.TRAIN))

            h2 = tf.layers.Dense(
                units=MODEL_NUM_UNIT_SCALE * 2,
                activation=None,
                kernel_initializer=tf.glorot_normal_initializer(),
                bias_initializer=tf.zeros_initializer()
            )(h1_do)
            h2_bn = tf.layers.batch_normalization(h2, training=(mode == tf.estimator.ModeKeys.TRAIN))
            h2_act = tf.nn.relu(h2_bn)
            h2_do = tf.layers.dropout(
                inputs=h2_act,
                rate=DROPOUT_PROB,
                training=(mode == tf.estimator.ModeKeys.TRAIN))

            # Head for label1
            h30 = tf.layers.Dense(
                units=MODEL_NUM_UNIT_SCALE,
                activation=None,
                kernel_initializer=tf.glorot_normal_initializer(),
                bias_initializer=tf.zeros_initializer()
            )(h2_do)
            h3_bn0 = tf.layers.batch_normalization(h30, training=(mode == tf.estimator.ModeKeys.TRAIN))
            h3_act0 = tf.nn.relu(h3_bn0)
            h3_do0 = tf.layers.dropout(
                inputs=h3_act0,
                rate=DROPOUT_PROB,
                training=(mode == tf.estimator.ModeKeys.TRAIN))
            logits0 = tf.layers.Dense(
                units=2,
                activation=None,
                kernel_initializer=tf.glorot_normal_initializer(),
                bias_initializer=tf.zeros_initializer()
            )(h3_do0)
            softmax0 = tf.contrib.layers.softmax(logits0)

            q_values = tf.div(softmax0[:, 1] - tf.reduce_min(softmax0[:, 1]),
                              tf.reduce_max(softmax0[:, 1]) - tf.reduce_min(softmax0[:, 1]))

            if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
                labels0 = labels  # int64 Notice: use labels but not labels[0], because we only have 1 label now.
                onehot_labels0 = tf.one_hot(labels0,
                                            depth=2)  # shape(2,0) should [batch_size, num_classes]  , logit should [batch_size, num_classes]
                # logit(?,2)
                # `ror_20_days_bool` loss definition: weighting to correct for class imbalances.
                unweighted_losses0 = tf.losses.softmax_cross_entropy(
                    onehot_labels=onehot_labels0, logits=logits0, reduction=Reduction.NONE)
                class_weights0 = tf.constant([[1., 1.]])
                sample_weights0 = tf.reduce_sum(tf.multiply(onehot_labels0, class_weights0), 1)
                loss0 = tf.reduce_mean(unweighted_losses0 * sample_weights0)

                loss = loss0

                # Metrics
                auroc0 = tf.metrics.auc(labels0, softmax0[:, 1], num_thresholds=10000, curve='ROC')
                prauc0 = tf.metrics.auc(labels0, softmax0[:, 1], num_thresholds=10000, curve='PR',
                                        summation_method='careful_interpolation')

            if mode == tf.estimator.ModeKeys.TRAIN:

                # MSE loss, optimized with Adam
                optimizer = tf.train.AdamOptimizer(FIX_LEARNING_RATE)

                # This is to make sure we also update the rolling mean/var for `tf.layers.batch_normalization`
                # (which is stored outside of the Estimator scope).
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                with tf.control_dependencies(update_ops):
                    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

                # TensorBoard performance metrics.
                with tf.name_scope('losses'):
                    tf.summary.scalar('loss_ror_20', loss0)

                # TensorBoard model evolution over time.
                with tf.name_scope('layer_1'):
                    weights = tf.get_default_graph().get_tensor_by_name(os.path.split(h1.name)[0] + '/kernel:0')
                    biases = tf.get_default_graph().get_tensor_by_name(os.path.split(h1.name)[0] + '/bias:0')
                    tf.summary.histogram('weights', weights)
                    tf.summary.histogram('biases', biases)
                    tf.summary.histogram('activations', h1_act)
                with tf.name_scope('layer_2'):
                    weights = tf.get_default_graph().get_tensor_by_name(os.path.split(h2.name)[0] + '/kernel:0')
                    biases = tf.get_default_graph().get_tensor_by_name(os.path.split(h2.name)[0] + '/bias:0')
                    tf.summary.histogram('weights', weights)
                    tf.summary.histogram('biases', biases)
                    tf.summary.histogram('activations', h2_act)
                with tf.name_scope('layer_3_ror_20'):
                    weights = tf.get_default_graph().get_tensor_by_name(os.path.split(h30.name)[0] + '/kernel:0')
                    biases = tf.get_default_graph().get_tensor_by_name(os.path.split(h30.name)[0] + '/bias:0')
                    tf.summary.histogram('weights', weights)
                    tf.summary.histogram('biases', biases)
                    tf.summary.histogram('activations', h3_act0)
                with tf.name_scope('logits_ror_20'):
                    weights = tf.get_default_graph().get_tensor_by_name(
                        os.path.split(logits0.name)[0] + '/kernel:0')
                    biases = tf.get_default_graph().get_tensor_by_name(os.path.split(logits0.name)[0] + '/bias:0')
                    tf.summary.histogram('weights', weights)
                    tf.summary.histogram('biases', biases)
                    tf.summary.histogram('activations', h3_act0)
                with tf.name_scope('q_values_ror_20'):
                    tf.summary.histogram('q0', softmax0[:, 0])
                    tf.summary.histogram('q1', softmax0[:, 1])

                # Log a few predictions.label0 : ror_xxx_days_bool
                # to watch the labels and softmax in training
                label_and_softmax0 = tf.stack([tf.cast(labels0, tf.float32), softmax0[:, 1]], axis=1)
                logging_hook = tf.train.LoggingTensorHook({
                    'label_and_softmax0': label_and_softmax0[0:10, :],  # label_and_softmax0 size is batch size in train_config "TRAIN_BATCH_SIZE"
                }, every_n_iter=LOG_FREQ_STEP)

                return tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=loss,
                    train_op=train_op,
                    training_hooks=[logging_hook])

            elif mode == tf.estimator.ModeKeys.EVAL:
                return tf.estimator.EstimatorSpec(
                    mode=mode,
                    loss=loss,
                    # These metrics are computed over the complete eval dataset.
                    eval_metric_ops={
                        'metrics_ror_20_days_bool/AUC_ROC': auroc0,
                        'metrics_ror_20_days_bool/AUC_PR': prauc0,
                    }, predictions={SignatureKeys.PREDICTIONS: q_values})

            elif mode == tf.estimator.ModeKeys.PREDICT:
                """
                A policy derived from the Q-value network. This epsilon-greedy policy
                computes the seeds with the `TOP_SEEDS_K` values and replaces them according to a
                `epsilon_greedy_probability` probability with a random value in [0, 1000).
                """

                # Indices of top `p.TOP_SEEDS_K` Q-values.
                top_q_idx = tf.nn.top_k(q_values, k=TOP_SEEDS_K)[1]
                sel_q_idx = tf.random_shuffle(top_q_idx)[0:SEEDS_K_FINAL]
                # Since seeds are in [1, `p.SEEDS_K_FINAL`], we have to add 1 to the index.
                predictions = sel_q_idx + 1

                class_labels_ror_20 = tf.reshape(
                    tf.tile(tf.constant(['0', '1']), (tf.shape(softmax0)[0],)),
                    (tf.shape(softmax0)[0], 2))

                export_outputs = {
                    # Default output (used in serving-infra)
                    # * output: Seed list. Requires using `SignatureKeys.OUTPUT` dict key, since this is
                    #   used by the downstream SRS.
                    # * eps_rnd_selection: Boolean list of whether a random seed (with eps prob)
                    #   was recommend or a predicted seed.
                    # * q_values: Q-values for all `SEED_LIST_LENGTH` seeds.
                    SignatureDefs.DEFAULT: tf.estimator.export.PredictOutput(
                        {SignatureKeys.OUTPUT: predictions,
                         "q_values": tf.transpose(q_values)}),
                    # Analysis output
                    SignatureDefs.ANALYSIS_ROR_20: tf.estimator.export.ClassificationOutput(
                        scores=softmax0,
                        classes=class_labels_ror_20),
                    SignatureDefs.ANALYSIS_Q: tf.estimator.export.RegressionOutput(
                        value=q_values)
                }

                return tf.estimator.EstimatorSpec(
                    mode=mode,
                    predictions={SignatureKeys.PREDICTIONS: q_values},
                    export_outputs=export_outputs)
def model_fn(features, labels, mode, params, config):
  """Builds the model function for use in an Estimator.

  Arguments:
    features: The input features for the Estimator.
    labels: The labels, unused here.
    mode: Signifies whether it is train or test or predict.
    params: Some hyperparameters as a dictionary.
    config: The RunConfig, unused here.

  Returns:
    EstimatorSpec: A tf.estimator.EstimatorSpec instance.
  """
  del labels, config

  # Set up the model's learnable parameters.
  logit_concentration = tf.get_variable(
      "logit_concentration",
      shape=[1, params["num_topics"]],
      initializer=tf.constant_initializer(
          _softplus_inverse(params["prior_initial_value"])))
  concentration = _clip_dirichlet_parameters(
      tf.nn.softplus(logit_concentration))

  num_words = features.shape[1]
  topics_words_logits = tf.get_variable(
      "topics_words_logits",
      shape=[params["num_topics"], num_words],
      initializer=tf.glorot_normal_initializer())
  topics_words = tf.nn.softmax(topics_words_logits, axis=-1)

  # Compute expected log-likelihood. First, sample from the variational
  # distribution; second, compute the log-likelihood given the sample.
  lda_variational = make_lda_variational(
      params["activation"],
      params["num_topics"],
      params["layer_sizes"])
  with ed.tape() as variational_tape:
    _ = lda_variational(features)

  with ed.tape() as model_tape:
    with ed.interception(
        make_value_setter(topics=variational_tape["topics_posterior"])):
      posterior_predictive = latent_dirichlet_allocation(concentration,
                                                         topics_words)

  log_likelihood = posterior_predictive.distribution.log_prob(features)
  tf.summary.scalar("log_likelihood", tf.reduce_mean(log_likelihood))

  # Compute the KL-divergence between two Dirichlets analytically.
  # The sampled KL does not work well for "sparse" distributions
  # (see Appendix D of [2]).
  kl = variational_tape["topics_posterior"].distribution.kl_divergence(
      model_tape["topics"].distribution)
  tf.summary.scalar("kl", tf.reduce_mean(kl))

  # Ensure that the KL is non-negative (up to a very small slack).
  # Negative KL can happen due to numerical instability.
  with tf.control_dependencies([tf.assert_greater(kl, -1e-3, message="kl")]):
    kl = tf.identity(kl)

  elbo = log_likelihood - kl
  avg_elbo = tf.reduce_mean(elbo)
  tf.summary.scalar("elbo", avg_elbo)
  loss = -avg_elbo

  # Perform variational inference by minimizing the -ELBO.
  global_step = tf.train.get_or_create_global_step()
  optimizer = tf.train.AdamOptimizer(params["learning_rate"])

  # This implements the "burn-in" for prior parameters (see Appendix D of [2]).
  # For the first prior_burn_in_steps steps they are fixed, and then trained
  # jointly with the other parameters.
  grads_and_vars = optimizer.compute_gradients(loss)
  grads_and_vars_except_prior = [
      x for x in grads_and_vars if x[1] != logit_concentration]

  def train_op_except_prior():
    return optimizer.apply_gradients(
        grads_and_vars_except_prior,
        global_step=global_step)

  def train_op_all():
    return optimizer.apply_gradients(
        grads_and_vars,
        global_step=global_step)

  train_op = tf.cond(
      global_step < params["prior_burn_in_steps"],
      true_fn=train_op_except_prior,
      false_fn=train_op_all)

  # The perplexity is an exponent of the average negative ELBO per word.
  words_per_document = tf.reduce_sum(features, axis=1)
  log_perplexity = -elbo / words_per_document
  tf.summary.scalar("perplexity", tf.exp(tf.reduce_mean(log_perplexity)))
  (log_perplexity_tensor, log_perplexity_update) = tf.metrics.mean(
      log_perplexity)
  perplexity_tensor = tf.exp(log_perplexity_tensor)

  # Obtain the topics summary. Implemented as a py_func for simplicity.
  topics = tf.py_func(
      functools.partial(get_topics_strings, vocabulary=params["vocabulary"]),
      [topics_words, concentration], tf.string, stateful=False)
  tf.summary.text("topics", topics)

  return tf.estimator.EstimatorSpec(
      mode=mode,
      loss=loss,
      train_op=train_op,
      eval_metric_ops={
          "elbo": tf.metrics.mean(elbo),
          "log_likelihood": tf.metrics.mean(log_likelihood),
          "kl": tf.metrics.mean(kl),
          "perplexity": (perplexity_tensor, log_perplexity_update),
          "topics": (topics, tf.no_op()),
      },
  )
Example #22
0
def icnr_weights(init=tf.glorot_normal_initializer(),
                 scale=2,
                 shape=[3, 3, 32, 4],
                 dtype=tf.float32):
    sess = tf.Session()
    return sess.run(ICNR(init, scale=scale)(shape=shape, dtype=dtype))
Example #23
0
 def get_instance(args):
     # pylint: disable=unused-argument
     """
     create an instance of the initializer
     """
     return tf.glorot_normal_initializer(seed=SEED)
Example #24
0
    
    image = tf.reshape(image,(32*32*3,))
    target  = tf.one_hot(label,NUM_CLASSES)


    
    min_after_dequeue = 1000
    capacity = min_after_dequeue + 3* BATCH_SIZE
    image_batch, target_batch = tf.train.shuffle_batch([image,target],
                                                       batch_size= BATCH_SIZE,
                                                       capacity=capacity,
                                                       min_after_dequeue
                                                       =min_after_dequeue)
    
    
    W = tf.get_variable("W", shape=(32*32,), initializer=tf.glorot_normal_initializer())
    b = tf.get_variable("b", shape=(32,), initializer=tf.constant_initializer(0))
    
    
    logits = tf.nn.xw_plus_b(image_batch,W,b)
    ce_loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                      labels=target_batch)

    opt = tf.train.AdamOptimizer().minimize(ce_loss)    
    
    
    
    
    init = (tf.global_variables_initializer(),
            tf.local_variables_initializer())
    
    def __init__(self, num_entities, num_relations):

        super(HyperER, self).__init__()

        self.entity_dim = 200
        self.relation_dim = 200
        self.num_entities = num_entities
        self.num_relations = num_relations

        self.in_channels = 1
        self.out_channels = 32
        self.kernal_h = 1
        self.kernal_w = 9

        self.dense1_size_out = self.in_channels * self.out_channels * self.kernal_h * self.kernal_w
        self.dense2_size_in = (1 - self.kernal_h + 1) * (
            self.entity_dim - self.kernal_w + 1) * self.out_channels

        # self.inp_drop = 0.2
        # self.feature_map_drop = 0.2
        # self.hidden_drop = 0.3

        self.weights_dense1 = tf.Variable(lambda: tf.glorot_normal_initializer(
        )([self.relation_dim, self.dense1_size_out]))
        self.bias_dense1 = tf.Variable(lambda: tf.glorot_normal_initializer()
                                       ([self.dense1_size_out]))

        self.weights_dense2 = tf.Variable(lambda: tf.glorot_normal_initializer(
        )([self.dense2_size_in, self.entity_dim]))
        self.bias_dense2 = tf.Variable(lambda: tf.glorot_normal_initializer()
                                       ([self.entity_dim]))

        self.bias_logits = tf.Variable(lambda: tf.glorot_normal_initializer()
                                       ([self.num_entities]))

        # Generate random embedding representaitons for  and relations
        self.embedding_matrix_entities = tf.Variable(
            lambda: tf.glorot_normal_initializer()
            ([self.num_entities, self.entity_dim]))
        self.embedding_matrix_relations = tf.Variable(
            lambda: tf.glorot_normal_initializer()
            ([self.num_relations, self.relation_dim]))

        self.bn0 = tf.keras.layers.BatchNormalization(axis=3,
                                                      momentum=0.1,
                                                      epsilon=1e-05)
        self.bn1 = tf.keras.layers.BatchNormalization(axis=3,
                                                      momentum=0.1,
                                                      epsilon=1e-05)
        self.bn2 = tf.keras.layers.BatchNormalization(axis=1,
                                                      momentum=0.1,
                                                      epsilon=1e-05)

        self.inp_drop = tf.keras.layers.Dropout(0.2)
        self.feature_map_drop = tf.keras.layers.SpatialDropout2D(0.2)
        self.hidden_drop = tf.keras.layers.Dropout(0.3)

        # self.dense1 = tf.keras.layers.Dense(self.dense1_size_out)
        # self.dense2 = tf.keras.layers.Dense(self.entity_dim)

        self.add = tf.keras.layers.Add()
Example #26
0
    def __init__(self, alpha=0.03):
        self.alpha = alpha
        self.global_step = tf.train.get_or_create_global_step()

        self.matrix_init = tf.glorot_normal_initializer()
        self.zeros_init = tf.constant_initializer(0.)
 def classifier_rot(self, x):
     with tf.variable_scope('classify_rot', reuse=tf.AUTO_REUSE):
         return tf.layers.dense(
             x, 4, kernel_initializer=tf.glorot_normal_initializer())
#-*- coding: utf-8 -*-
'''
Author: Haoran Chen
Initial Date: 9/11/2019
'''

import tensorflow as tf
from layers import *

global_kwargs = {
    'initializer': tf.glorot_normal_initializer(),
    'dtype': tf.float32,
}

class SGRU():
    def __init__(self, options):
        '''
        n_w is word embedding dimension.
        n_h is hidden state dimension.
        n_f is mid-input dimension.
        n_v is the size of vocabulary.
        n_t is the dimension of tagging.
        n_z is the total video dimension.
        n_z1 is the ECO dimension.
        n_z2 is the ResNeXt dimension.
        '''
        self.options = options
        self.n_w = options.n_w
        self.n_h = options.n_h
        self.n_f = options.n_f
        self.n_t = options.n_t
Example #29
0
    def model_fn(self, features, labels, mode, params):
        field_size = params["training"]["field_size"]
        feature_size = params["training"]["feature_size"]
        embedding_size = params["training"]["embedding_size"]
        l2_reg = params["training"]["l2_reg"]
        learning_rate = params["training"]["learning_rate"]
        batch_norm = params["training"]["batch_norm"]
        batch_norm_decay = params["training"]["batch_norm_decay"]
        optimizer = params["training"]["optimizer"]
        seed = params["training"]["seed"]
        metric = params['output']['metric']
        layers = params["training"]["deep_layers"]
        dropout = params["training"]["dropout"]

        np.random.seed(seed)
        tf.set_random_seed(seed)

        fm_bias = tf.get_variable(name='fm_bias', shape=[1],
                                  initializer=tf.constant_initializer(0.0))
        fm_weight = tf.get_variable(name='fm_weight', shape=[feature_size],
                                    initializer=tf.glorot_normal_initializer())
        fm_vector = tf.get_variable(name='fm_vector', shape=[feature_size, embedding_size],
                                    initializer=tf.glorot_normal_initializer())

        with tf.variable_scope("Feature"):
            feat_ids = features['feat_ids']
            feat_ids = tf.reshape(feat_ids, shape=[-1, field_size])
            feat_vals = features['feat_vals']
            feat_vals = tf.reshape(feat_vals, shape=[-1, field_size])

        with tf.variable_scope("First_order"):
            feat_weights = tf.nn.embedding_lookup(fm_weight, feat_ids)
            y_w = tf.reduce_sum(tf.multiply(feat_weights, feat_vals), 1)

        with tf.variable_scope("Second_order"):
            embeddings = tf.nn.embedding_lookup(fm_vector, feat_ids)
            feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1])
            embeddings = tf.multiply(embeddings, feat_vals)
            sum_square = tf.square(tf.reduce_sum(embeddings, 1))
            square_sum = tf.reduce_sum(tf.square(embeddings), 1)
            y_v = 0.5 * tf.reduce_sum(tf.subtract(sum_square, square_sum), 1)

        with tf.variable_scope("Deep-part"):
            if batch_norm:
                if mode == tf.estimator.ModeKeys.TRAIN:
                    train_phase = True
                else:
                    train_phase = False

            deep_inputs = tf.reshape(embeddings, shape=[-1, field_size * embedding_size])
            for i in range(len(layers)):
                deep_inputs = tf.contrib.layers.fully_connected(
                    inputs=deep_inputs, num_outputs=layers[i],
                    weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg),
                    scope='mlp%d' % i)
                if batch_norm:
                    deep_inputs = batch_norm_layer(
                        deep_inputs, train_phase=train_phase,
                        scope_bn='bn_%d' % i, batch_norm_decay=batch_norm_decay)
                if mode == tf.estimator.ModeKeys.TRAIN:
                    deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i])

            y_deep = tf.contrib.layers.fully_connected(
                inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity,
                weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg),
                scope='deep_out')
            y_d = tf.reshape(y_deep, shape=[-1])

        with tf.variable_scope("DeepFM-out"):
            y_bias = fm_bias * tf.ones_like(y_d, dtype=tf.float32)
            y = y_bias + y_w + y_v + y_d
            pred = tf.sigmoid(y)

        predictions = {"probabilities": pred}
        export_outputs = {
            tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                tf.estimator.export.PredictOutput(predictions)}
        # Provide an estimator spec for `ModeKeys.PREDICT`
        if mode == tf.estimator.ModeKeys.PREDICT:
            return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                export_outputs=export_outputs)

        with tf.name_scope("Loss"):
            loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \
                   l2_reg * tf.nn.l2_loss(fm_weight) + l2_reg * tf.nn.l2_loss(fm_vector)

        # Provide an estimator spec for `ModeKeys.EVAL`
        eval_metric_ops = {}
        if metric == 'auc':
            eval_metric_ops['auc'] = tf.metrics.auc(labels, pred)
        else:
            raise TypeError("Can not find loss_type :", params['training']['loss_type'])

        if mode == tf.estimator.ModeKeys.EVAL:
            return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                loss=loss,
                eval_metric_ops=eval_metric_ops)

        with tf.name_scope("Optimizer"):
            if optimizer == 'adam':
                op = tf.train.AdamOptimizer(learning_rate=learning_rate,
                                            beta1=0.9, beta2=0.999, epsilon=1e-8)
            elif optimizer == 'adagrad':
                op = tf.train.AdagradOptimizer(
                    learning_rate=learning_rate, initial_accumulator_value=1e-8)
            elif optimizer == 'momentum':
                op = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95)
            elif optimizer == 'ftrl':
                op = tf.train.FtrlOptimizer(learning_rate)
            else:
                raise TypeError("Can not find optimizer :", optimizer)

        train_op = op.minimize(loss, global_step=tf.train.get_global_step())

        # Provide an estimator spec for `ModeKeys.TRAIN` modes
        if mode == tf.estimator.ModeKeys.TRAIN:
            return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                loss=loss,
                train_op=train_op)
Example #30
0
    def construct_network(self):
        """
        Constructs a variant of the multi-head attention labeller (MHAL)
        that does not use keys, queries and values, but only a simple form
        of additive attention, as proposed by Yang et al. (2016).
        """
        self.word_ids = tf.placeholder(tf.int32, [None, None], name="word_ids")
        self.char_ids = tf.placeholder(tf.int32, [None, None, None],
                                       name="char_ids")
        self.sentence_lengths = tf.placeholder(tf.int32, [None],
                                               name="sentence_lengths")
        self.word_lengths = tf.placeholder(tf.int32, [None, None],
                                           name="word_lengths")
        self.sentence_labels = tf.placeholder(tf.float32, [None],
                                              name="sentence_labels")
        self.word_labels = tf.placeholder(tf.float32, [None, None],
                                          name="word_labels")

        self.word_objective_weights = tf.placeholder(
            tf.float32, [None, None], name="word_objective_weights")
        self.sentence_objective_weights = tf.placeholder(
            tf.float32, [None], name="sentence_objective_weights")

        self.learning_rate = tf.placeholder(tf.float32, name="learning_rate")
        self.is_training = tf.placeholder(tf.int32, name="is_training")
        self.loss = 0.0

        if self.config["initializer"] == "normal":
            self.initializer = tf.random_normal_initializer(stddev=0.1)
        elif self.config["initializer"] == "glorot":
            self.initializer = tf.glorot_uniform_initializer()
        elif self.config["initializer"] == "xavier":
            self.initializer = tf.glorot_normal_initializer()

        zeros_initializer = tf.zeros_initializer()

        self.word_embeddings = tf.get_variable(
            name="word_embeddings",
            shape=[len(self.word2id), self.config["word_embedding_size"]],
            initializer=(zeros_initializer if self.config["emb_initial_zero"]
                         else self.initializer),
            trainable=(True if self.config["train_embeddings"] else False))
        word_input_tensor = tf.nn.embedding_lookup(self.word_embeddings,
                                                   self.word_ids)

        if self.config["char_embedding_size"] > 0 and self.config[
                "char_recurrent_size"] > 0:
            with tf.variable_scope("chars"), tf.control_dependencies([
                    tf.assert_equal(tf.shape(self.char_ids)[2],
                                    tf.reduce_max(self.word_lengths),
                                    message="Char dimensions don't match")
            ]):
                self.char_embeddings = tf.get_variable(
                    name="char_embeddings",
                    shape=[
                        len(self.char2id), self.config["char_embedding_size"]
                    ],
                    initializer=self.initializer,
                    trainable=True)
                char_input_tensor = tf.nn.embedding_lookup(
                    self.char_embeddings, self.char_ids)

                char_input_tensor_shape = tf.shape(char_input_tensor)
                char_input_tensor = tf.reshape(
                    char_input_tensor,
                    shape=[
                        char_input_tensor_shape[0] *
                        char_input_tensor_shape[1], char_input_tensor_shape[2],
                        self.config["char_embedding_size"]
                    ])
                _word_lengths = tf.reshape(self.word_lengths,
                                           shape=[
                                               char_input_tensor_shape[0] *
                                               char_input_tensor_shape[1]
                                           ])

                char_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(
                    self.config["char_recurrent_size"],
                    use_peepholes=self.config["lstm_use_peepholes"],
                    state_is_tuple=True,
                    initializer=self.initializer,
                    reuse=False)
                char_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(
                    self.config["char_recurrent_size"],
                    use_peepholes=self.config["lstm_use_peepholes"],
                    state_is_tuple=True,
                    initializer=self.initializer,
                    reuse=False)

                # Concatenate the final forward and the backward character contexts
                # to obtain a compact character representation for each word.
                _, ((_, char_output_fw),
                    (_, char_output_bw)) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw=char_lstm_cell_fw,
                        cell_bw=char_lstm_cell_bw,
                        inputs=char_input_tensor,
                        sequence_length=_word_lengths,
                        dtype=tf.float32,
                        time_major=False)

                char_output_tensor = tf.concat(
                    [char_output_fw, char_output_bw], axis=-1)
                char_output_tensor = tf.reshape(
                    char_output_tensor,
                    shape=[
                        char_input_tensor_shape[0], char_input_tensor_shape[1],
                        2 * self.config["char_recurrent_size"]
                    ])

                # Include a char-based language modelling loss, LMc.
                if self.config["lm_cost_char_gamma"] > 0.0:
                    self.loss += self.config["lm_cost_char_gamma"] * \
                                 self.construct_lm_cost(
                                     input_tensor_fw=char_output_tensor,
                                     input_tensor_bw=char_output_tensor,
                                     sentence_lengths=self.sentence_lengths,
                                     target_ids=self.word_ids,
                                     lm_cost_type="separate",
                                     name="lm_cost_char_separate")

                if self.config["lm_cost_joint_char_gamma"] > 0.0:
                    self.loss += self.config["lm_cost_joint_char_gamma"] * \
                                 self.construct_lm_cost(
                                     input_tensor_fw=char_output_tensor,
                                     input_tensor_bw=char_output_tensor,
                                     sentence_lengths=self.sentence_lengths,
                                     target_ids=self.word_ids,
                                     lm_cost_type="joint",
                                     name="lm_cost_char_joint")

                if self.config["char_hidden_layer_size"] > 0:
                    char_output_tensor = tf.layers.dense(
                        inputs=char_output_tensor,
                        units=self.config["char_hidden_layer_size"],
                        activation=tf.tanh,
                        kernel_initializer=self.initializer)

                if self.config["char_integration_method"] == "concat":
                    word_input_tensor = tf.concat(
                        [word_input_tensor, char_output_tensor], axis=-1)
                elif self.config["char_integration_method"] == "none":
                    word_input_tensor = word_input_tensor
                else:
                    raise ValueError("Unknown char integration method")

        if self.config["dropout_input"] > 0.0:
            dropout_input = (self.config["dropout_input"] *
                             tf.cast(self.is_training, tf.float32) +
                             (1.0 - tf.cast(self.is_training, tf.float32)))
            word_input_tensor = tf.nn.dropout(word_input_tensor,
                                              dropout_input,
                                              name="dropout_word")

        word_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(
            self.config["word_recurrent_size"],
            use_peepholes=self.config["lstm_use_peepholes"],
            state_is_tuple=True,
            initializer=self.initializer,
            reuse=False)
        word_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(
            self.config["word_recurrent_size"],
            use_peepholes=self.config["lstm_use_peepholes"],
            state_is_tuple=True,
            initializer=self.initializer,
            reuse=False)

        with tf.control_dependencies([
                tf.assert_equal(tf.shape(self.word_ids)[1],
                                tf.reduce_max(self.sentence_lengths),
                                message="Sentence dimensions don't match")
        ]):
            (lstm_outputs_fw, lstm_outputs_bw), ((_, lstm_output_fw), (_, lstm_output_bw)) = \
                tf.nn.bidirectional_dynamic_rnn(
                    cell_fw=word_lstm_cell_fw, cell_bw=word_lstm_cell_bw, inputs=word_input_tensor,
                    sequence_length=self.sentence_lengths, dtype=tf.float32, time_major=False)

        lstm_output_states = tf.concat([lstm_output_fw, lstm_output_bw],
                                       axis=-1)

        if self.config["dropout_word_lstm"] > 0.0:
            dropout_word_lstm = (self.config["dropout_word_lstm"] *
                                 tf.cast(self.is_training, tf.float32) +
                                 (1.0 - tf.cast(self.is_training, tf.float32)))
            lstm_outputs_fw = tf.nn.dropout(
                lstm_outputs_fw,
                dropout_word_lstm,
                noise_shape=tf.convert_to_tensor([
                    tf.shape(self.word_ids)[0], 1,
                    self.config["word_recurrent_size"]
                ],
                                                 dtype=tf.int32))
            lstm_outputs_bw = tf.nn.dropout(
                lstm_outputs_bw,
                dropout_word_lstm,
                noise_shape=tf.convert_to_tensor([
                    tf.shape(self.word_ids)[0], 1,
                    self.config["word_recurrent_size"]
                ],
                                                 dtype=tf.int32))
            lstm_output_states = tf.nn.dropout(lstm_output_states,
                                               dropout_word_lstm)

        # The forward and backward states are concatenated at every token position.
        lstm_outputs_states = tf.concat([lstm_outputs_fw, lstm_outputs_bw],
                                        axis=-1)

        if self.config["whidden_layer_size"] > 0:
            lstm_outputs_states = tf.layers.dense(
                lstm_outputs_states,
                self.config["whidden_layer_size"],
                activation=tf.tanh,
                kernel_initializer=self.initializer)

        if self.config["model_type"] == "last":
            processed_tensor = lstm_output_states
            token_scores = tf.layers.dense(
                lstm_outputs_states,
                units=len(self.label2id_tok),
                kernel_initializer=self.initializer,
                name="token_scores_last_lstm_outputs_ff")
            if self.config["hidden_layer_size"] > 0:
                processed_tensor = tf.layers.dense(
                    processed_tensor,
                    units=self.config["hidden_layer_size"],
                    activation=tf.tanh,
                    kernel_initializer=self.initializer)
            sentence_scores = tf.layers.dense(
                processed_tensor,
                units=len(self.label2id_sent),
                kernel_initializer=self.initializer,
                name="sentence_scores_last_lstm_outputs_ff")
        else:
            with tf.variable_scope("attention"):
                token_scores_list = []
                sentence_scores_list = []

                for i in range(len(self.label2id_tok)):
                    keys = tf.layers.dense(
                        lstm_outputs_states,
                        units=self.config["attention_evidence_size"],
                        activation=tf.tanh,
                        kernel_initializer=self.initializer)
                    values = tf.layers.dense(
                        lstm_outputs_states,
                        units=self.config["attention_evidence_size"],
                        activation=tf.tanh,
                        kernel_initializer=self.initializer)

                    token_scores_head = tf.layers.dense(
                        keys, units=1,
                        kernel_initializer=self.initializer)  # [B, M, 1]
                    token_scores_head = tf.reshape(
                        token_scores_head,
                        shape=tf.shape(self.word_ids))  # [B, M]
                    token_scores_list.append(token_scores_head)

                    if self.config["attention_activation"] == "sharp":
                        attention_weights_unnormalized = tf.exp(
                            token_scores_head)
                    elif self.config["attention_activation"] == "soft":
                        attention_weights_unnormalized = tf.sigmoid(
                            token_scores_head)
                    elif self.config["attention_activation"] == "linear":
                        attention_weights_unnormalized = token_scores_head
                    else:
                        raise ValueError(
                            "Unknown/unsupported token scoring method: %s" %
                            self.config["attention_activation"])
                    attention_weights_unnormalized = tf.where(
                        tf.sequence_mask(self.sentence_lengths),
                        attention_weights_unnormalized,
                        tf.zeros_like(attention_weights_unnormalized))
                    attention_weights = attention_weights_unnormalized / tf.reduce_sum(
                        attention_weights_unnormalized, axis=1,
                        keep_dims=True)  # [B, M]
                    processed_tensor = tf.reduce_sum(
                        values * attention_weights[:, :, numpy.newaxis],
                        axis=1)  # [B, E]

                    if self.config["hidden_layer_size"] > 0:
                        processed_tensor = tf.layers.dense(
                            processed_tensor,
                            units=self.config["hidden_layer_size"],
                            activation=tf.tanh,
                            kernel_initializer=self.initializer)

                    sentence_score_head = tf.layers.dense(
                        processed_tensor,
                        units=1,
                        kernel_initializer=self.initializer,
                        name="output_ff_head_%d" % i)  # [B, 1]
                    sentence_score_head = tf.reshape(
                        sentence_score_head,
                        shape=[tf.shape(processed_tensor)[0]])  # [B]
                    sentence_scores_list.append(sentence_score_head)

                token_scores = tf.stack(token_scores_list,
                                        axis=-1)  # [B, M, H]
                all_sentence_scores = tf.stack(sentence_scores_list,
                                               axis=-1)  # [B, H]

                if len(self.label2id_tok) != len(self.label2id_sent):
                    if len(self.label2id_sent) == 2:
                        default_sentence_score = tf.gather(all_sentence_scores,
                                                           indices=[0],
                                                           axis=1)  # [B, 1]
                        maximum_non_default_sentence_score = tf.gather(
                            all_sentence_scores,
                            indices=list(range(1, len(self.label2id_tok))),
                            axis=1)  # [B, num_heads-1]
                        maximum_non_default_sentence_score = tf.reduce_max(
                            maximum_non_default_sentence_score,
                            axis=1,
                            keep_dims=True)  # [B, 1]
                        sentence_scores = tf.concat(
                            [
                                default_sentence_score,
                                maximum_non_default_sentence_score
                            ],
                            axis=-1,
                            name="sentence_scores_concatenation")  # [B, 2]
                    else:
                        sentence_scores = tf.layers.dense(
                            all_sentence_scores,
                            units=len(self.label2id_sent),
                            kernel_initializer=self.initializer
                        )  # [B, num_sent_labels]
                else:
                    sentence_scores = all_sentence_scores

        # Mask the token scores that do not fall in the range of the true sentence length.
        # Do this for each head (change shape from [B, M] to [B, M, num_heads]).
        tiled_sentence_lengths = tf.tile(
            input=tf.expand_dims(tf.sequence_mask(self.sentence_lengths),
                                 axis=-1),
            multiples=[1, 1, len(self.label2id_tok)])
        self.token_probabilities = tf.nn.softmax(token_scores, axis=-1)
        self.token_probabilities = tf.where(
            tiled_sentence_lengths, self.token_probabilities,
            tf.zeros_like(self.token_probabilities))
        self.token_predictions = tf.argmax(self.token_probabilities, axis=2)

        self.sentence_probabilities = tf.nn.softmax(sentence_scores)
        self.sentence_predictions = tf.argmax(self.sentence_probabilities,
                                              axis=1)

        if self.config["word_objective_weight"] > 0:
            word_objective_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=token_scores,
                labels=tf.cast(self.word_labels, tf.int32))
            word_objective_loss = tf.where(
                tf.sequence_mask(self.sentence_lengths), word_objective_loss,
                tf.zeros_like(word_objective_loss))
            self.loss += self.config["word_objective_weight"] * tf.reduce_sum(
                self.word_objective_weights * word_objective_loss)

        if self.config["sentence_objective_weight"] > 0:
            self.loss += self.config[
                "sentence_objective_weight"] * tf.reduce_sum(
                    self.sentence_objective_weights *
                    tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=sentence_scores,
                        labels=tf.cast(self.sentence_labels, tf.int32)))

        max_over_token_heads = tf.reduce_max(self.token_probabilities,
                                             axis=1)  # [B, H]
        one_hot_sentence_labels = tf.one_hot(tf.cast(self.sentence_labels,
                                                     tf.int32),
                                             depth=len(self.label2id_sent))
        if self.config["enable_label_smoothing"]:
            one_hot_sentence_labels_smoothed = label_smoothing(
                one_hot_sentence_labels,
                epsilon=self.config["smoothing_epsilon"])
        else:
            one_hot_sentence_labels_smoothed = one_hot_sentence_labels

        # At least one token has a label corresponding to the true sentence label.
        # This loss also pushes the maximums over the other heads towards 0 (but smoothed).
        if self.config["type1_attention_objective_weight"] > 0:
            this_max_over_token_heads = max_over_token_heads
            if len(self.label2id_tok) != len(self.label2id_sent):
                if len(self.label2id_sent) == 2:
                    max_default_head = tf.gather(max_over_token_heads,
                                                 indices=[0],
                                                 axis=-1)  # [B, 1]
                    max_non_default_head = tf.reduce_max(
                        tf.gather(max_over_token_heads,
                                  indices=list(range(1,
                                                     len(self.label2id_tok))),
                                  axis=-1),
                        axis=1,
                        keep_dims=True)  # [B, 1]
                    this_max_over_token_heads = tf.concat(
                        [max_default_head, max_non_default_head],
                        axis=-1)  # [B, 2]
                else:
                    raise ValueError(
                        "Unsupported attention loss for num_heads != num_sent_lables "
                        "and num_sentence_labels != 2.")
            self.loss += self.config["type1_attention_objective_weight"] * (
                tf.reduce_sum(self.sentence_objective_weights * tf.reduce_sum(
                    tf.square(this_max_over_token_heads -
                              one_hot_sentence_labels_smoothed),
                    axis=-1)))

        # The predicted distribution over the token labels (heads) should be similar to the
        # predicted distribution over the sentence representations.
        if self.config["type2_attention_objective_weight"] > 0:
            all_sentence_scores_probabilities = tf.nn.softmax(
                all_sentence_scores)  # [B, H]
            self.loss += self.config["type2_attention_objective_weight"] * (
                tf.reduce_sum(self.sentence_objective_weights * tf.reduce_sum(
                    tf.square(max_over_token_heads -
                              all_sentence_scores_probabilities),
                    axis=-1)))

        # At least one token has a label corresponding to the true sentence label.
        if self.config["type3_attention_objective_weight"] > 0:
            this_max_over_token_heads = max_over_token_heads
            if len(self.label2id_tok) != len(self.label2id_sent):
                if len(self.label2id_sent) == 2:
                    max_default_head = tf.gather(max_over_token_heads,
                                                 indices=[0],
                                                 axis=-1)  # [B, 1]
                    max_non_default_head = tf.reduce_max(
                        tf.gather(max_over_token_heads,
                                  indices=list(range(1,
                                                     len(self.label2id_tok))),
                                  axis=-1),
                        axis=1,
                        keep_dims=True)  # [B, 1]
                    this_max_over_token_heads = tf.concat(
                        [max_default_head, max_non_default_head],
                        axis=-1)  # [B, 2]
                else:
                    raise ValueError(
                        "Unsupported attention loss for num_heads != num_sent_lables "
                        "and num_sentence_labels != 2.")
            self.loss += self.config["type3_attention_objective_weight"] * (
                tf.reduce_sum(
                    self.sentence_objective_weights * tf.reduce_sum(tf.square(
                        (this_max_over_token_heads * one_hot_sentence_labels) -
                        one_hot_sentence_labels_smoothed),
                                                                    axis=-1)))

        # A sentence that has a default label, should only contain tokens labeled as default.
        if self.config["type4_attention_objective_weight"] > 0:
            default_head = tf.gather(self.token_probabilities,
                                     indices=[0],
                                     axis=-1)  # [B, M, 1]
            default_head = tf.squeeze(default_head, axis=-1)  # [B, M]
            self.loss += self.config["type4_attention_objective_weight"] * (
                tf.reduce_sum(
                    self.sentence_objective_weights *
                    tf.cast(tf.equal(self.sentence_labels, 0.0), tf.float32) *
                    tf.reduce_sum(
                        tf.square(default_head - tf.ones_like(default_head)),
                        axis=-1)))

        # Every sentence has at least one default label.
        if self.config["type5_attention_objective_weight"] > 0:
            default_head = tf.gather(self.token_probabilities,
                                     indices=[0],
                                     axis=-1)  # [B, M, 1]
            max_default_head = tf.reduce_max(tf.squeeze(default_head, axis=-1),
                                             axis=-1)  # [B]
            self.loss += self.config["type5_attention_objective_weight"] * (
                tf.reduce_sum(self.sentence_objective_weights *
                              tf.square(max_default_head -
                                        tf.ones_like(max_default_head))))

        # Include a word-based language modelling loss, LMw.
        if self.config["lm_cost_lstm_gamma"] > 0.0:
            self.loss += self.config[
                "lm_cost_lstm_gamma"] * self.construct_lm_cost(
                    input_tensor_fw=lstm_outputs_fw,
                    input_tensor_bw=lstm_outputs_bw,
                    sentence_lengths=self.sentence_lengths,
                    target_ids=self.word_ids,
                    lm_cost_type="separate",
                    name="lm_cost_lstm_separate")

        if self.config["lm_cost_joint_lstm_gamma"] > 0.0:
            self.loss += self.config[
                "lm_cost_joint_lstm_gamma"] * self.construct_lm_cost(
                    input_tensor_fw=lstm_outputs_fw,
                    input_tensor_bw=lstm_outputs_bw,
                    sentence_lengths=self.sentence_lengths,
                    target_ids=self.word_ids,
                    lm_cost_type="joint",
                    name="lm_cost_lstm_joint")

        self.train_op = self.construct_optimizer(
            opt_strategy=self.config["opt_strategy"],
            loss=self.loss,
            learning_rate=self.learning_rate,
            clip=self.config["clip"])
        print("Notwork built.")
size = 400

inp = tf.placeholder(tf.float32, shape=(None, 5))

seed = tf.get_variable('seed', (5, ),
                       initializer=tf.random_uniform_initializer(minval=-10,
                                                                 maxval=10))

batch_size = tf.shape(inp)[0]

h = tf.concat([inp, tf.tile(tf.expand_dims(seed, 0), [batch_size, 1])], 1)
h = tf.layers.dense(h,
                    20,
                    activation=tf.nn.sigmoid,
                    kernel_initializer=tf.glorot_normal_initializer(),
                    bias_initializer=tf.random_uniform_initializer(
                        minval=-0.1, maxval=0.1)) * 100
h = tf.layers.dense(h,
                    10,
                    activation=tf.nn.sigmoid,
                    kernel_initializer=tf.glorot_normal_initializer(),
                    bias_initializer=tf.random_uniform_initializer(
                        minval=-0.1, maxval=0.1)) * 8
h = tf.layers.dense(h,
                    1,
                    activation=tf.sin,
                    kernel_initializer=tf.glorot_normal_initializer(),
                    bias_initializer=tf.glorot_normal_initializer())

if True:
def masked_dense(inputs,
                 units,
                 num_blocks=None,
                 exclusive=False,
                 kernel_initializer=None,
                 reuse=None,
                 name=None,
                 *args,  # pylint: disable=keyword-arg-before-vararg
                 **kwargs):
  """A autoregressively masked dense layer. Analogous to `tf.layers.dense`.

  See [Germain et al. (2015)][1] for detailed explanation.

  Arguments:
    inputs: Tensor input.
    units: Python `int` scalar representing the dimensionality of the output
      space.
    num_blocks: Python `int` scalar representing the number of blocks for the
      MADE masks.
    exclusive: Python `bool` scalar representing whether to zero the diagonal of
      the mask, used for the first layer of a MADE.
    kernel_initializer: Initializer function for the weight matrix.
      If `None` (default), weights are initialized using the
      `tf.glorot_random_initializer`.
    reuse: Python `bool` scalar representing whether to reuse the weights of a
      previous layer by the same name.
    name: Python `str` used to describe ops managed by this function.
    *args: `tf.layers.dense` arguments.
    **kwargs: `tf.layers.dense` keyword arguments.

  Returns:
    Output tensor.

  Raises:
    NotImplementedError: if rightmost dimension of `inputs` is unknown prior to
      graph execution.

  #### References

  [1]: Mathieu Germain, Karol Gregor, Iain Murray, and Hugo Larochelle. MADE:
       Masked Autoencoder for Distribution Estimation. In _International
       Conference on Machine Learning_, 2015. https://arxiv.org/abs/1502.03509
  """
  # TODO(b/67594795): Better support of dynamic shape.
  input_depth = inputs.shape.with_rank_at_least(1)[-1].value
  if input_depth is None:
    raise NotImplementedError(
        "Rightmost dimension must be known prior to graph execution.")

  mask = _gen_mask(num_blocks, input_depth, units,
                   MASK_EXCLUSIVE if exclusive else MASK_INCLUSIVE).T

  if kernel_initializer is None:
    kernel_initializer = tf.glorot_normal_initializer()

  def masked_initializer(shape, dtype=None, partition_info=None):
    return mask * kernel_initializer(shape, dtype, partition_info)

  with tf.name_scope(name, "masked_dense", [inputs, units, num_blocks]):
    layer = layers.Dense(
        units,
        kernel_initializer=masked_initializer,
        kernel_constraint=lambda x: mask * x,
        name=name,
        dtype=inputs.dtype.base_dtype,
        _scope=name,
        _reuse=reuse,
        *args,  # pylint: disable=keyword-arg-before-vararg
        **kwargs)
    return layer.apply(inputs)
Example #33
0
def model_fn(features, labels, mode, params):
    """Build Model function f(x) for Estimator."""

    #------hyper parameters------
    field_size = params['field_size']
    feature_size = params['feature_size']
    embedding_size = params['embedding_size']
    l2_reg = params['l2_reg']
    learning_rate = params['learning_rate']
    dropout = params['dropout']
    layers = params['layers']


    #------build weights------
    Global_Bias = tf.get_variable(name='bias', shape=[1], initializer=tf.constant_initializer(0.0))
    Feat_Wgts = tf.get_variable(name='linear', shape=[feature_size], initializer=tf.glorot_normal_initializer())
    Feat_Emb = tf.get_variable(name='emb', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer())

    #------build feature------
    feat_ids = features['feat_ids']
    feat_ids = tf.reshape(feat_ids, shape=[-1, field_size])
    feat_vals = features['feat_vals']
    feat_vals = tf.reshape(feat_vals, shape=[-1, field_size])

    #------build f(x)------
    # f(x) = bias + sum(wx) + MLP(BI(embed_vec))

    # FM部分
    with tf.variable_scope("Linear-part"):
        feat_wgts = tf.nn.embedding_lookup(Feat_Wgts, feat_ids) # None * F * 1
        y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals), 1)  # None * 1


    with tf.variable_scope("BiInter-part"):
        embeddings = tf.nn.embedding_lookup(Feat_Emb, feat_ids) # None * F * k
        feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1]) # None * F * 1
        embeddings = tf.multiply(embeddings, feat_vals) # vi * xi
        sum_square_emb = tf.square(tf.reduce_sum(embeddings, 1))
        square_sum_emb = tf.reduce_sum(tf.square(embeddings), 1)
        deep_inputs = 0.5 * tf.subtract(sum_square_emb, square_sum_emb) # None * k

    with tf.variable_scope("Deep-part"):

        if mode == tf.estimator.ModeKeys.TRAIN:
            train_phase = True
        else:
            train_phase = False

        # BI的输出需要进行Batch Normalization
        deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn="bn_after_bi")

        # BI的输出进行Dropout
        if mode == tf.estimator.ModeKeys.TRAIN:
            deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[-1]) # dropout at bilinear interaction layer

        for i in range(len(layers)):
            deep_inputs = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=layers[i], weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope="mlp%d" % i)

            # 注意是先进行Batch Norm,再进行Dropout
            # Batch Normalization
            deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn="bn%d" % i)

            # Dropout
            if mode == tf.estimator.ModeKeys.TRAIN:
                deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i])

        # Output
        y_deep = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope="deep_out")
        y_d = tf.reshape(y_deep, shape=[-1])

    with tf.variable_scope("NFM-out"):
        y_bias = Global_Bias * tf.ones_like(y_d, dtype=tf.float32)
        y = y_bias + y_linear + y_d
        pred = tf.sigmoid(y)

    predictions = {"prob": pred}

    export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)}
    # Provide an estimator spec for `ModeKeys.PREDICT`
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs=export_outputs)

    #------build loss------
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + l2_reg * tf.nn.l2_loss(Feat_Wgts) + l2_reg * tf.nn.l2_loss(Feat_Emb)

    # Provide an estimator spec for `ModeKeys.EVAL`
    eval_metric_ops = {
        "auc": tf.metrics.auc(labels, pred)
    }
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            loss=loss,
            eval_metric_ops=eval_metric_ops)

    #------build optimizer------
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8)

    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())


    # Provide an estimator spec for `ModeKeys.TRAIN` modes
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            loss=loss,
            train_op=train_op)
if not os.path.exists(TRAINLOG):
    os.makedirs(TRAINLOG)
if not os.path.exists(TESTLOG):
    os.makedirs(TESTLOG)

# general configs
epochs = 1
batch_size = 128

# network configs
config = dict()
config["tf"] = dict()

config["tf"]["dense"] = {"activation": tf.nn.relu,
                         "use_bias": True,
                         "kernel_initializer": tf.glorot_normal_initializer(),
                         "bias_initializer": tf.zeros_initializer(),
                         "kernel_regularizer": None,
                         "bias_regularizer": None,
                         "activity_regularizer": None,
                         "kernel_constraint": None,
                         "bias_constraint": None,
                         "trainable": True,
                         "name": None,
                         "reuse": None}

config["tf"]["conv2d"] = {"activation": tf.nn.relu,
                          "strides": (1, 1),
                          "padding": "same",
                          "data_format": "channels_last",
                          "dilation_rate": (1, 1),
Example #35
0
import tensorflow as tf
env = gym.make('CartPole-v0')
env.reset()
n_inputs = 4  # == env.observation_space.shape[0]
n_hidden1 = 10
n_hidden2 = 10
n_outputs = 1  # only outputs the probability of accelerating left
learning_rate = 0.01

X = tf.placeholder(tf.float32, shape=[None, n_inputs])
y = tf.placeholder(tf.float32, shape=[None, n_outputs])

hidden1 = tf.layers.dense(X,
                          n_hidden1,
                          activation=tf.nn.relu,
                          kernel_initializer=tf.glorot_normal_initializer())
hidden2 = tf.layers.dense(hidden1,
                          n_hidden2,
                          activation=tf.nn.relu,
                          kernel_initializer=tf.glorot_normal_initializer())

logits = tf.layers.dense(hidden2, n_outputs)
outputs = tf.nn.sigmoid(logits)
p_left_and_right = tf.concat(axis=1, values=[outputs, 1 - outputs])
action = tf.multinomial(tf.log(p_left_and_right), num_samples=1)

cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y,
                                                        logits=logits)
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(cross_entropy)
    def train(self, batch_data, is_train=True):
        """ 1 定义输入数据 """
        print("1 定义输入数据")
        with tf.name_scope('input_data'):
            # 标签:[batch_size, 1]
            labels = batch_data['labels']
            # 用户特征向量:[batch_size, feature_size]
            dense_vector = tf.reshape(batch_data['dense_vector'],
                                      shape=[-1, self.feature_size
                                             ])  # None * feature_size
            print("%s: %s" % ("dense_vector", dense_vector))
            print("%s: %s" % ("labels", labels))
        """ 2 FM层网络输出 """
        print("2 FM层网络输出")
        with tf.name_scope("FM"):
            # FM参数,生成或者获取W V
            with tf.variable_scope("fm_layer", reuse=tf.AUTO_REUSE):
                self.FM_W = tf.get_variable(
                    name='fm_w',
                    shape=[self.feature_size, 1],
                    initializer=tf.glorot_normal_initializer())
                self.FM_V = tf.get_variable(
                    name='fm_v',
                    shape=[self.feature_size, self.fm_v_size],
                    initializer=tf.glorot_normal_initializer())
            print("%s: %s" % ("FM_W", self.FM_W))
            print("%s: %s" % ("FM_V", self.FM_V))

            # 输入样本准备
            Input_x = tf.reshape(dense_vector,
                                 shape=[-1, self.feature_size,
                                        1])  # None * feature_size
            print("%s: %s" % ("Input_x", Input_x))

            # ---------- W * X ----------
            Y_first = tf.reduce_sum(tf.multiply(self.FM_W, Input_x),
                                    2)  # None * F
            ## 增加dropout,防止过拟合
            if is_train and self.is_dropout_fm:
                Y_first = tf.nn.dropout(Y_first,
                                        self.dropout_fm[0])  # None * F
            print("%s: %s" % ("Y_first", Y_first))

            # ---------- Vij * Wij ---------------
            # sum_square part
            embeddings = tf.multiply(self.FM_V, Input_x)  # None * V * X
            print("%s: %s" % ("embeddings", embeddings))

            summed_features_emb = tf.reduce_sum(embeddings, 1)  # sum(v*x)
            summed_features_emb_square = tf.square(
                summed_features_emb)  # (sum(v*x))^2

            # square_sum part
            squared_features_emb = tf.square(embeddings)  # (v*x)^2
            squared_sum_features_emb = tf.reduce_sum(squared_features_emb,
                                                     1)  # sum((v*x)^2)

            # second order
            Y_second = 0.5 * tf.subtract(
                summed_features_emb_square,
                squared_sum_features_emb)  # 0.5*((sum(v*x))^2 - sum((v*x)^2))
            if is_train and self.is_dropout_fm:
                Y_second = tf.nn.dropout(Y_second,
                                         self.dropout_fm[1])  # None * K
            print("%s: %s" % ("Y_second", Y_second))

            # 正则化,默认L2
            if reg_type == 'l1_reg':
                lr_regularization = tf.reduce_sum(tf.abs(self.FM_W))
                fm_regularization = tf.reduce_sum(tf.abs(self.FM_V))
            elif reg_type == 'l2_reg':
                lr_regularization = tf.nn.l2_loss(self.FM_W)
                fm_regularization = tf.nn.l2_loss(self.FM_V)
            else:
                lr_regularization = tf.nn.l2_loss(self.FM_W)
                fm_regularization = tf.nn.l2_loss(self.FM_V)
        """ 3 Deep层网络输出 """
        print("3 Deep层网络输出")
        with tf.name_scope("Deep"):
            # 第一层计算
            print("lay%s, input_size: %s, output_size: %s, active_fuc: %s" %
                  (1, self.feature_size * self.fm_v_size, self.dnn_layer[0],
                   self.dnn_active_fuc[0]))
            with tf.variable_scope("deep_layer1", reuse=tf.AUTO_REUSE):
                input_size = self.feature_size * self.fm_v_size
                output_size = self.dnn_layer[0]
                deep_inputs = tf.reshape(embeddings,
                                         shape=[-1,
                                                input_size])  # None * (F*K)
                print("%s: %s" % ("lay1, deep_inputs", deep_inputs))
                # 输入dropout
                if is_train and self.is_dropout_dnn:
                    deep_inputs = tf.nn.dropout(deep_inputs,
                                                self.dropout_dnn[0])
                # 全连接计算
                deep_outputs = self._udf_full_connect(deep_inputs, input_size,
                                                      output_size,
                                                      self.dnn_active_fuc[0])
                print("%s: %s" % ("lay1, deep_outputs", deep_outputs))
                # batch_norm
                if self.is_batch_norm:
                    deep_outputs = tf.layers.batch_normalization(
                        deep_outputs, axis=-1, training=is_train)
                # 输出dropout
                if is_train and self.is_dropout_dnn:
                    deep_outputs = tf.nn.dropout(deep_outputs, dropout_dnn[1])
            # 中间层计算
            for i in range(len(self.dnn_layer) - 1):
                with tf.variable_scope("deep_layer%d" % (i + 2),
                                       reuse=tf.AUTO_REUSE):
                    print(
                        "lay%s, input_size: %s, output_size: %s, active_fuc: %s"
                        % (i + 2, self.dnn_layer[i], self.dnn_layer[i + 1],
                           self.dnn_active_fuc[i + 1]))
                    # 全连接计算
                    deep_outputs = self._udf_full_connect(
                        deep_outputs, self.dnn_layer[i], self.dnn_layer[i + 1],
                        self.dnn_active_fuc[i + 1])
                    print("lay%s, deep_outputs: %s" % (i + 2, deep_outputs))
                    # batch_norm
                    if self.is_batch_norm:
                        deep_outputs = tf.layers.batch_normalization(
                            deep_outputs, axis=-1, training=is_train)
                    # 输出dropout
                    if is_train and self.is_dropout_dnn:
                        deep_outputs = tf.nn.dropout(deep_outputs,
                                                     self.dropout_dnn[i + 2])
            # 输出层计算
            print("lay_last, input_size: %s, output_size: %s, active_fuc: %s" %
                  (self.dnn_layer[-1], 1, self.dnn_active_fuc[-1]))
            with tf.variable_scope("deep_layer%d" % (len(dnn_layer) + 1),
                                   reuse=tf.AUTO_REUSE):
                deep_outputs = self._udf_full_connect(deep_outputs,
                                                      self.dnn_layer[-1], 1,
                                                      self.dnn_active_fuc[-1])
                print("lay_last, deep_outputs: %s" % (deep_outputs))

            # 正则化,默认L2
            dnn_regularization = 0.0
            for j in range(len(self.dnn_layer) + 1):
                with tf.variable_scope("deep_layer%d" % (j + 1), reuse=True):
                    weights = tf.get_variable("weights")
                    if self.reg_type == 'l1_reg':
                        dnn_regularization = dnn_regularization + tf.reduce_sum(
                            tf.abs(weights))
                    elif self.reg_type == 'l2_reg':
                        dnn_regularization = dnn_regularization + tf.nn.l2_loss(
                            weights)
                    else:
                        dnn_regularization = dnn_regularization + tf.nn.l2_loss(
                            weights)
        # Deep输出
        Y_deep = deep_outputs
        print("%s: %s" % ("Y_deep", Y_deep))
        """ 4 DeepFM层网络输出 """
        print("4 DeepFM层网络输出")
        # ---------- DeepFM ----------
        with tf.name_scope("Deep_FM"):
            # 最后一层的输入层准备
            concat_input = tf.concat([Y_first, Y_second, Y_deep], axis=1)
            if self.model_type == "deep_fm":
                concat_input = tf.concat([Y_first, Y_second, Y_deep], axis=1)
                print("%s: %s" % ("concat_input", concat_input))
                input_size = self.feature_size + self.fm_v_size + self.dnn_layer[
                    -1]
                regularization = self.reg_w * lr_regularization + self.reg_v * fm_regularization + self.reg_dnn * dnn_regularization
            elif self.model_type == "fm":
                concat_input = tf.concat([Y_first, Y_second], axis=1)
                print("%s: %s" % ("concat_input", concat_input))
                input_size = self.feature_size + self.fm_v_size
                regularization = self.reg_w * lr_regularization + self.reg_v * fm_regularization
            elif self.model_type == "dnn":
                concat_input = Y_deep
                print("%s: %s" % ("concat_input", concat_input))
                input_size = self.dnn_layer[-1]
                regularization = self.reg_dnn * dnn_regularization
            elif self.model_type == "lr":
                concat_input = tf.concat([Y_first], axis=1)
                print("%s: %s" % ("concat_input", concat_input))
                input_size = self.feature_size
                regularization = self.reg_w * lr_regularization
            else:
                concat_input = tf.concat([Y_first, Y_second, Y_deep], axis=1)
                print("%s: %s" % ("concat_input", concat_input))
                input_size = self.feature_size + self.fm_v_size + self.dnn_layer[
                    -1]
                regularization = self.reg_w * lr_regularization + self.reg_v * fm_regularization + self.reg_dnn * dnn_regularization

            # 最后一层的输出,采用w*concat_input + b 全连接 ,也可以直接对concat_input进行sum求和
            with tf.variable_scope("deepfm_out", reuse=tf.AUTO_REUSE):
                self.DF_W = tf.get_variable(
                    name='df_w',
                    shape=[input_size, 1],
                    initializer=tf.glorot_normal_initializer())
                self.DF_B = tf.get_variable(
                    name='df_bias',
                    shape=[1],
                    initializer=tf.constant_initializer(0.0))
            print("%s: %s" % ("DF_W", self.DF_W))
            print("%s: %s" % ("DF_B", self.DF_B))
            print("%s: %s" % ("out_lay_type", self.out_lay_type))
            if self.out_lay_type == "line":
                Y_sum = tf.reduce_sum(concat_input, 1)  # None * 1
                print("%s: %s" % ("Y_sum", Y_sum))
                Y_bias = self.DF_B * tf.ones_like(Y_sum, dtype=tf.float32)
                print("%s: %s" % ("Y_bias", Y_bias))
                Y_Out = tf.add(Y_sum, Y_bias, name='Y_Out')
            elif self.out_lay_type == "matmul":
                Y_Out = tf.add(tf.matmul(concat_input, self.DF_W),
                               self.DF_B,
                               name='Y_Out')
            else:
                Y_sum = tf.reduce_sum(concat_input, 1)  # None * 1
                print("%s: %s" % ("Y_sum", Y_sum))
                Y_bias = self.DF_B * tf.ones_like(Y_sum, dtype=tf.float32)
                print("%s: %s" % ("Y_bias", Y_bias))
                Y_Out = tf.add(Y_sum, Y_bias, name='Y_Out')
            print("%s: %s" % ("Y_Out", Y_Out))
        score = tf.nn.sigmoid(Y_Out, name='score')
        score = tf.reshape(score, shape=[-1, 1])
        print("%s: %s" % ("score", score))
        """ 5 定义损失函数和AUC指标 """
        print("5 定义损失函数和AUC指标")
        with tf.name_scope("loss"):
            # loss:Squared_error,Cross_entropy ,FTLR
            if self.loss_fuc == 'Squared_error':
                loss = tf.reduce_mean(
                    tf.reduce_sum(tf.square(labels - score),
                                  reduction_indices=[1])) + regularization
            elif self.loss_fuc == 'Cross_entropy':
                loss = tf.reduce_mean(
                    tf.nn.sigmoid_cross_entropy_with_logits(
                        logits=tf.reshape(Y_Out, [-1]),
                        labels=tf.reshape(labels, [-1]))) + regularization
            elif self.loss_fuc == 'FTLR':
                loss = tf.reduce_mean(
                    tf.reduce_sum(tf.square(labels - score),
                                  reduction_indices=[1])) + regularization
            # AUC
            auc = tf.metrics.auc(labels, score)
            print("%s: %s" % ("labels", labels))
        """ 6 设定optimizer """
        print("6 设定optimizer")
        with tf.name_scope("optimizer"):
            with tf.variable_scope("optimizer", reuse=tf.AUTO_REUSE):
                #------bulid optimizer------
                if self.train_optimizer == 'Adam':
                    optimizer = tf.train.AdamOptimizer(
                        learning_rate=learning_rate,
                        beta1=0.9,
                        beta2=0.999,
                        epsilon=1e-8)
                elif self.train_optimizer == 'Adagrad':
                    optimizer = tf.train.AdagradOptimizer(
                        learning_rate=learning_rate,
                        initial_accumulator_value=1e-8)
                elif self.train_optimizer == 'Momentum':
                    optimizer = tf.train.MomentumOptimizer(
                        learning_rate=learning_rate, momentum=0.95)
                elif self.train_optimizer == 'ftrl':
                    optimizer = tf.train.FtrlOptimizer(learning_rate)
                train_step = optimizer.minimize(loss,
                                                global_step=self.global_step)
        """7 设定summary,以便在Tensorboard里进行可视化 """
        print("7 设定summary")
        with tf.name_scope("summaries"):
            tf.summary.scalar("loss", loss)
            tf.summary.scalar("accumulate_auc", auc[0])
            tf.summary.histogram("FM_W", self.FM_W)
            tf.summary.histogram("FM_V", self.FM_V)
            for j in range(len(self.dnn_layer) + 1):
                with tf.variable_scope("deep_layer%d" % (j + 1), reuse=True):
                    weights = tf.get_variable("weights")
                    tf.summary.histogram("dnn_w_%d" % (j + 1), weights)
            # 好几个summary,所以这里要merge_all
            summary_op = tf.summary.merge_all()
        """8 返回结果 """
        return Y_Out, score, regularization, loss, auc, train_step, labels, score, summary_op
Example #37
0
def model_fn(features, labels, mode, params):
    """Bulid Model function f(x) for Estimator."""
    #------hyperparameters----
    field_size = params["field_size"]
    feature_size = params["feature_size"]
    embedding_size = params["embedding_size"]
    l2_reg = params["l2_reg"]
    learning_rate = params["learning_rate"]
    #batch_norm_decay = params["batch_norm_decay"]
    #optimizer = params["optimizer"]
    layers = map(int, params["deep_layers"].split(','))
    dropout = map(float, params["dropout"].split(','))

    #------bulid weights------
    FM_B = tf.get_variable(name='fm_bias', shape=[1], initializer=tf.constant_initializer(0.0))
    FM_W = tf.get_variable(name='fm_w', shape=[feature_size], initializer=tf.glorot_normal_initializer())
    FM_V = tf.get_variable(name='fm_v', shape=[feature_size, embedding_size], initializer=tf.glorot_normal_initializer())

    #------build feaure-------
    feat_ids  = features['feat_ids']
    feat_ids = tf.reshape(feat_ids,shape=[-1,field_size])
    feat_vals = features['feat_vals']
    feat_vals = tf.reshape(feat_vals,shape=[-1,field_size])

    #------build f(x)------
    with tf.variable_scope("First-order"):
        feat_wgts = tf.nn.embedding_lookup(FM_W, feat_ids) # None * F * 1
        y_w = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals),1)

    with tf.variable_scope("Second-order"):
        embeddings = tf.nn.embedding_lookup(FM_V, feat_ids) # None * F * K
        feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1])
        embeddings = tf.multiply(embeddings, feat_vals) #vij*xi
        sum_square = tf.square(tf.reduce_sum(embeddings,1))
        square_sum = tf.reduce_sum(tf.square(embeddings),1)
        y_v = 0.5*tf.reduce_sum(tf.subtract(sum_square, square_sum),1)	# None * 1

    with tf.variable_scope("Deep-part"):
        if FLAGS.batch_norm:
            #normalizer_fn = tf.contrib.layers.batch_norm
            #normalizer_fn = tf.layers.batch_normalization
            if mode == tf.estimator.ModeKeys.TRAIN:
                train_phase = True
                #normalizer_params = {'decay': batch_norm_decay, 'center': True, 'scale': True, 'updates_collections': None, 'is_training': True, 'reuse': None}
            else:
                train_phase = False
                #normalizer_params = {'decay': batch_norm_decay, 'center': True, 'scale': True, 'updates_collections': None, 'is_training': False, 'reuse': True}
        else:
            normalizer_fn = None
            normalizer_params = None

        deep_inputs = tf.reshape(embeddings,shape=[-1,field_size*embedding_size]) # None * (F*K)
        for i in range(len(layers)):
            #if FLAGS.batch_norm:
            #    deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn='bn_%d' %i)
                #normalizer_params.update({'scope': 'bn_%d' %i})
            deep_inputs = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=layers[i], \
                #normalizer_fn=normalizer_fn, normalizer_params=normalizer_params, \
                weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='mlp%d' % i)
            if FLAGS.batch_norm:
                deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn='bn_%d' %i)   #放在RELU之后 https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md#bn----before-or-after-relu
            if mode == tf.estimator.ModeKeys.TRAIN:
                deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i])                              #Apply Dropout after all BN layers and set dropout=0.8(drop_ratio=0.2)
                #deep_inputs = tf.layers.dropout(inputs=deep_inputs, rate=dropout[i], training=mode == tf.estimator.ModeKeys.TRAIN)

        y_deep = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \
                weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='deep_out')
        y_d = tf.reshape(y_deep,shape=[-1])
        #sig_wgts = tf.get_variable(name='sigmoid_weights', shape=[layers[-1]], initializer=tf.glorot_normal_initializer())
        #sig_bias = tf.get_variable(name='sigmoid_bias', shape=[1], initializer=tf.constant_initializer(0.0))
        #deep_out = tf.nn.xw_plus_b(deep_inputs,sig_wgts,sig_bias,name='deep_out')

    with tf.variable_scope("DeepFM-out"):
        #y_bias = FM_B * tf.ones_like(labels, dtype=tf.float32)  # None * 1  warning;这里不能用label,否则调用predict/export函数会出错,train/evaluate正常;初步判断estimator做了优化,用不到label时不传
        y_bias = FM_B * tf.ones_like(y_d, dtype=tf.float32)     # None * 1
        y = y_bias + y_w + y_v + y_d
        pred = tf.sigmoid(y)

    predictions={"prob": pred}
    export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)}
    # Provide an estimator spec for `ModeKeys.PREDICT`
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                export_outputs=export_outputs)

    #------bulid loss------
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \
        l2_reg * tf.nn.l2_loss(FM_W) + \
        l2_reg * tf.nn.l2_loss(FM_V) #+ \ l2_reg * tf.nn.l2_loss(sig_wgts)

    # Provide an estimator spec for `ModeKeys.EVAL`
    eval_metric_ops = {
        "auc": tf.metrics.auc(labels, pred)
    }
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                loss=loss,
                eval_metric_ops=eval_metric_ops)

    #------bulid optimizer------
    if FLAGS.optimizer == 'Adam':
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8)
    elif FLAGS.optimizer == 'Adagrad':
        optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=1e-8)
    elif FLAGS.optimizer == 'Momentum':
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95)
    elif FLAGS.optimizer == 'ftrl':
        optimizer = tf.train.FtrlOptimizer(learning_rate)

    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

    # Provide an estimator spec for `ModeKeys.TRAIN` modes
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                loss=loss,
                train_op=train_op)
Example #38
0
    def __init__(self, config):

        self.config = config
        self.N = config.N

        ######### not running out gpu sources ##########
        tf_config = tf.ConfigProto()
        tf_config.gpu_options.allow_growth = True
        self.sess = tf.Session(config=tf_config)

        ######### profiling #############################
        #self.options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
        #self.run_metadata = tf.RunMetadata()

        ############ define variables ##################
        self.W = {}
        self.b = {}
        self.scale = {}
        self.beta = {}
        self.pop_mean = {}
        self.pop_var = {}
        self.alpha = {}

        self.dn_vars = []

        # pre
        name_block = "pre"
        self.W[name_block + "3_l_0"] = tf.get_variable(
            name_block + "3_l_0",
            [3, 3, config.patch_size[2], config.pre_Nfeat],
            dtype=tf.float32,
            initializer=tf.glorot_normal_initializer())
        #self.create_bn_variables(name_block+"3_0", config.pre_Nfeat)
        self.W[name_block + "5_l_0"] = tf.get_variable(
            name_block + "5_l_0",
            [5, 5, config.patch_size[2], config.pre_Nfeat],
            dtype=tf.float32,
            initializer=tf.glorot_normal_initializer())
        #self.create_bn_variables(name_block+"5_0", config.pre_Nfeat)
        self.W[name_block + "7_l_0"] = tf.get_variable(
            name_block + "7_l_0",
            [7, 7, config.patch_size[2], config.pre_Nfeat],
            dtype=tf.float32,
            initializer=tf.glorot_normal_initializer())
        #self.create_bn_variables(name_block+"7_0", config.pre_Nfeat)
        self.dn_vars = self.dn_vars + [
            self.W[name_block + "3_l_0"], self.W[name_block + "5_l_0"],
            self.W[name_block + "7_l_0"]
        ]
        for i in range(1, config.pre_n_layers):
            self.W[name_block + "3_l_" + str(i)] = tf.get_variable(
                name_block + "3_l_" + str(i),
                [3, 3, config.pre_Nfeat, config.pre_Nfeat],
                dtype=tf.float32,
                initializer=tf.glorot_normal_initializer())
            #self.create_bn_variables(name_block+"3_"+str(i), config.pre_Nfeat)
            self.W[name_block + "5_l_" + str(i)] = tf.get_variable(
                name_block + "5_l_" + str(i),
                [5, 5, config.pre_Nfeat, config.pre_Nfeat],
                dtype=tf.float32,
                initializer=tf.glorot_normal_initializer())
            #self.create_bn_variables(name_block+"5_"+str(i), config.pre_Nfeat)
            self.W[name_block + "7_l_" + str(i)] = tf.get_variable(
                name_block + "7_l_" + str(i),
                [7, 7, config.pre_Nfeat, config.pre_Nfeat],
                dtype=tf.float32,
                initializer=tf.glorot_normal_initializer())
            #self.create_bn_variables(name_block+"7_"+str(i), config.pre_Nfeat)
            self.dn_vars = self.dn_vars + [
                self.W[name_block + "3_l_" + str(i)],
                self.W[name_block + "5_l_" + str(i)],
                self.W[name_block + "7_l_" + str(i)]
            ]

        # pregconv
        name_block = "pregconv"
        for i in range(config.pregconv_n_layers):
            self.create_gconv_variables(name_block + "3", i, config.pre_Nfeat,
                                        config.pre_fnet_Nfeat,
                                        config.pre_Nfeat, config.rank_theta,
                                        config.stride_pregconv,
                                        config.stride_pregconv)
            self.create_gconv_variables(name_block + "5", i, config.pre_Nfeat,
                                        config.pre_fnet_Nfeat,
                                        config.pre_Nfeat, config.rank_theta,
                                        config.stride_pregconv,
                                        config.stride_pregconv)
            self.create_gconv_variables(name_block + "7", i, config.pre_Nfeat,
                                        config.pre_fnet_Nfeat,
                                        config.pre_Nfeat, config.rank_theta,
                                        config.stride_pregconv,
                                        config.stride_pregconv)
        #self.create_bn_variables(name_block, config.Nfeat)

        # hpf
        name_block = "hpf"
        self.create_conv_variables(name_block, 0, config.Nfeat, config.Nfeat)
        self.create_bn_variables(name_block + "_c_" + "_" + str(0),
                                 config.Nfeat)
        for i in range(config.hpf_n_layers):
            self.create_gconv_variables(name_block, i, config.Nfeat,
                                        config.hpf_fnet_Nfeat, config.Nfeat,
                                        config.rank_theta, config.stride,
                                        config.stride)
            #self.create_bn_variables(name_block+"_"+str(i), config.Nfeat)

        # prox
        name_block = "prox"
        for i in range(config.prox_n_layers):
            self.create_conv_variables(name_block, i, config.Nfeat,
                                       config.Nfeat)
            self.create_bn_variables(name_block + "_c_" + "_" + str(i),
                                     config.Nfeat)
            for j in range(config.lpf_n_layers):
                self.create_gconv_variables(name_block + str(i), j,
                                            config.Nfeat,
                                            config.prox_fnet_Nfeat,
                                            config.Nfeat, config.rank_theta,
                                            config.stride, config.stride)
                self.create_bn_variables(name_block + str(i) + "_" + str(j),
                                         config.Nfeat)
            self.alpha["alpha_" + str(i)] = tf.get_variable(
                "alpha_" + str(i), [],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.5))
            self.beta["beta_" + str(i)] = tf.get_variable(
                "beta_" + str(i), [],
                dtype=tf.float32,
                initializer=tf.constant_initializer(0.5))
            self.dn_vars = self.dn_vars + [
                self.alpha["alpha_" + str(i)], self.beta["beta_" + str(i)]
            ]

        # last
        name_block = "last"
        self.create_gconv_variables(name_block, 0, config.Nfeat,
                                    config.prox_fnet_Nfeat,
                                    config.patch_size[2], config.rank_theta,
                                    config.stride, config.patch_size[2])

        ############ define placeholders ##############
        self.x_clean = tf.placeholder("float", [
            None, config.patch_size[0], config.patch_size[1],
            config.patch_size[2]
        ],
                                      name="clean_image")
        self.x_noisy = tf.placeholder("float", [
            None, config.patch_size[0], config.patch_size[1],
            config.patch_size[2]
        ],
                                      name="noisy_image")
        self.is_training = tf.placeholder(tf.bool, (), name="is_training")
        self.local_mask = tf.placeholder("float", [
            config.searchN,
        ],
                                         name="local_mask")

        self.id_mat = 2 * tf.eye(config.searchN)

        ########### computational graph ###############
        self.__make_compute_graph()

        ################## losses #####################
        self.__make_loss()

        ################ optimizer ops ################
        #update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        #with tf.control_dependencies(update_ops):

        #global_step = tf.Variable(0, trainable=False)
        #l_r = tf.train.exponential_decay(config.starter_learning_rate, global_step, config.decay_step, config.decay_rate, staircase=True)

        #self.opt = tf.train.AdamOptimizer(l_r)
        # create a copy of all trainable variables with `0` as initial values
        #self.accum_vars = [tf.Variable(tf.zeros_like(t_var.initialized_value()),trainable=False) for t_var in dn_vars]
        # create a op to initialize all accums vars
        #self.zero_accum_vars = [tv.assign(tf.zeros_like(tv)) for tv in self.accum_vars]
        # compute gradients for a batch
        #batch_grads_vars = self.opt.compute_gradients(self.loss, dn_vars)
        # collect the batch gradient into accumulated vars
        #self.accum_op = self.my_accum_op(batch_grads_vars)
        #self.accum_op = [self.accum_vars[i].assign_add(batch_grad_var[0]) if batch_grad_var[0] is not None else self.accum_vars[i].assign_add(tf.zeros_like(self.accum_vars[i])) for i, batch_grad_var in enumerate(batch_grads_vars)]
        # apply accums gradients
        #print [(self.accum_vars[i], batch_grad_var[1]) for i, batch_grad_var in enumerate(batch_grads_vars)]
        #print batch_grads_vars
        #grad_and_vars_final = [(self.accum_vars[i], batch_grad_var[1]) if batch_grad_var[0] is not None else (None, batch_grad_var[1]) for i, batch_grad_var in enumerate(batch_grads_vars)]
        #self.apply_accum = self.opt.apply_gradients(grad_and_vars_final)
        #self.apply_accum = self.opt.apply_gradients(batch_grads_vars)
        self.opt = tf.train.AdamOptimizer(config.end_learning_rate).minimize(
            self.loss,
            var_list=self.dn_vars,
            aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE)

        ################# summaries ###################
        tf.summary.scalar('loss', self.loss)
        tf.summary.scalar('PSNR', self.psnr)
        tf.summary.image('denoised_image',
                         tf.expand_dims(self.x_hat[0, :, :, :], 0))
        tf.summary.image('noisy_image',
                         tf.expand_dims(self.x_noisy[0, :, :, :], 0))
        tf.summary.image('clean_image',
                         tf.expand_dims(self.x_clean[0, :, :, :], 0))
        self.summaries = tf.summary.merge_all()
        # Check if log_dir exists, if so delete contents
        #if tf.gfile.Exists(self.config.log_dir):
        #	tf.gfile.DeleteRecursively(self.config.log_dir)
        #	tf.gfile.MkDir(self.config.log_dir+'train/')
        #	tf.gfile.MkDir(self.config.log_dir+'val/')
        self.train_summaries_writer = tf.summary.FileWriter(
            self.config.log_dir + 'train/', self.sess.graph)
        self.val_summaries_writer = tf.summary.FileWriter(
            self.config.log_dir + 'val/', self.sess.graph)
 def get_instance(args):
     # pylint: disable=unused-argument
     """
     create an instance of the initializer
     """
     return tf.glorot_normal_initializer(seed=SEED)
    def graphing(self, features, labels, mode):
        self.logger.info('mode = {}'.format(mode))
        p = self.hparam
        self.features, self.labels = features, labels
        for name, tensor in self.features.items():
            setattr(self, name, tensor)

        with tf.variable_scope("init") as scope:
            init_fn = tf.glorot_normal_initializer()
            emb_init_fn = tf.glorot_uniform_initializer()
            self.b_global = tf.Variable(emb_init_fn(shape=[]), name="b_global")

            with tf.variable_scope("embedding") as scope:
                self.w_query_movie_ids = tf.Variable(
                    emb_init_fn(shape=[self.n_items, p.dim]),
                    name="w_query_movie_ids")
                self.b_query_movie_ids = tf.Variable(
                    emb_init_fn(shape=[p.dim]), name="b_query_movie_ids")
                self.w_candidate_movie_id = tf.Variable(
                    init_fn(shape=[self.n_items, p.dim]),
                    name="w_candidate_movie_id")
                self.b_candidate_movie_id = tf.Variable(
                    init_fn(shape=[p.dim + 8 + 2]),
                    name="b_candidate_movie_id")
                # self.b_candidate_movie_id = tf.Variable(init_fn(shape=[self.n_items]), name="b_candidate_movie_id")
                self.w_genres = tf.Variable(
                    emb_init_fn(shape=[self.n_genres, 8]), name="w_genres")

        with tf.variable_scope("user_encoding") as scope:
            # query_movie embedding
            self.emb_query = tf.nn.embedding_lookup(self.w_query_movie_ids,
                                                    self.query_movie_ids)
            query_movie_mask = tf.expand_dims(
                tf.nn.l2_normalize(
                    tf.to_float(tf.sequence_mask(self.query_movie_ids_len)),
                    1), -1)
            self.emb_query = tf.reduce_sum(self.emb_query * query_movie_mask,
                                           1)
            self.query_bias = tf.matmul(self.emb_query,
                                        self.b_query_movie_ids[:, tf.newaxis])
            self.emb_query = tf.layers.dense(self.emb_query,
                                             128,
                                             kernel_initializer=init_fn,
                                             activation=tf.nn.selu)
            self.emb_query = tf.layers.dense(self.emb_query,
                                             64,
                                             kernel_initializer=init_fn,
                                             activation=tf.nn.selu)
            self.emb_query = tf.layers.dense(self.emb_query,
                                             32,
                                             kernel_initializer=init_fn,
                                             activation=tf.nn.selu)
            # self.emb_query = tf.layers.dense(self.emb_query, 16, kernel_initializer=init_fn, activation=tf.nn.selu)

        # encode [item embedding + item metadata]
        with tf.variable_scope("item_encoding") as scope:
            # candidate_movie embedding
            self.candidate_emb = tf.nn.embedding_lookup(
                self.w_candidate_movie_id, self.candidate_movie_id)
            # genres embedding
            self.emb_genres = tf.nn.embedding_lookup(self.w_genres,
                                                     tf.to_int32(self.genres))
            genres_mask = tf.expand_dims(
                tf.nn.l2_normalize(
                    tf.to_float(tf.sequence_mask(self.genres_len)), 1), -1)
            self.emb_genres = tf.reduce_sum(self.emb_genres * genres_mask, 1)

            self.emb_item = tf.concat([
                self.candidate_emb, self.emb_genres,
                self.avg_rating[:, tf.newaxis], self.year[:, tf.newaxis]
            ], 1)
            self.candidate_bias = tf.matmul(
                self.emb_item, self.b_candidate_movie_id[:, tf.newaxis])
            self.emb_item = tf.layers.dense(self.emb_item,
                                            128,
                                            kernel_initializer=init_fn,
                                            activation=tf.nn.selu)
            self.emb_item = tf.layers.dense(self.emb_item,
                                            64,
                                            kernel_initializer=init_fn,
                                            activation=tf.nn.selu)
            self.emb_item = tf.layers.dense(self.emb_item,
                                            32,
                                            kernel_initializer=init_fn,
                                            activation=tf.nn.selu)
            # self.emb_item = tf.layers.dense(self.emb_item, 16, kernel_initializer=init_fn, activation=tf.nn.selu)

        # elements wise dot of user and item embedding
        with tf.variable_scope("gmf") as scope:
            self.gmf = tf.reduce_sum(self.emb_query * self.emb_item,
                                     1,
                                     keep_dims=True)
            self.gmf = tf.add(self.gmf, self.b_global)
            self.gmf = tf.add(self.gmf, self.query_bias)
            self.gmf = tf.add(self.gmf, self.candidate_bias)
            self.infer = tf.nn.sigmoid(self.gmf, name="infer")

            # one query for all items, for predict speed
            self.pred = tf.matmul(self.emb_query, tf.transpose(self.emb_item)) + \
                        tf.reshape(self.candidate_bias, (1, -1)) + \
                        self.query_bias + \
                        self.b_global
            self.pred = tf.nn.sigmoid(self.pred, name='pred')

        # Provide an estimator spec for `ModeKeys.PREDICT`
        if mode == tf.estimator.ModeKeys.PREDICT:
            export_outputs = {
                'outputs':
                tf.estimator.export.PredictOutput({
                    'emb_query': self.emb_query,
                    'emb_item': self.emb_item,
                    'predictions': self.infer
                })
            }
            return tf.estimator.EstimatorSpec(mode=mode,
                                              predictions=self.pred,
                                              export_outputs=export_outputs)

        with tf.variable_scope("loss") as scope:
            # self.alter_rating = tf.to_float(self.label >= 4)[:, tf.newaxis]
            self.ans = tf.to_float(self.labels)[:, tf.newaxis]
            self.loss = tf.reduce_mean(
                tf.nn.sigmoid_cross_entropy_with_logits(labels=self.ans,
                                                        logits=self.gmf))
            tf.summary.scalar('loss', self.loss)

        with tf.variable_scope("metrics") as scope:
            self.auc = tf.metrics.auc(tf.cast(self.labels, tf.bool),
                                      tf.reshape(self.infer, [-1]))
            # tf.summary.scalar('auc', self.auc)

        self.train_op = None
        self.global_step = tf.train.get_or_create_global_step()
        if mode == tf.estimator.ModeKeys.TRAIN:
            with tf.variable_scope("train"):
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                with tf.control_dependencies(update_ops):
                    self.train_op = tf.train.AdamOptimizer().minimize(
                        self.loss, self.global_step)
                    # self.train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize(self.loss)
        # self.merge = tf.summary.merge_all()

        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=self.loss,
                                          train_op=self.train_op,
                                          eval_metric_ops={'auc': self.auc},
                                          evaluation_hooks=[])
Example #41
0
def model_fn(features, labels, mode, params):
    """Bulid Model function f(x) for Estimator."""
    #------hyperparameters----
    field_size = params["field_size"]
    feature_size = params["feature_size"]
    embedding_size = params["embedding_size"]
    l2_reg = params["l2_reg"]
    learning_rate = params["learning_rate"]
    #optimizer = params["optimizer"]
    layers = map(int, params["attention_layers"].split(','))
    dropout = map(float, params["dropout"].split(','))

    #------bulid weights------
    Global_Bias = tf.get_variable(name='bias', shape=[1], initializer=tf.constant_initializer(0.0))
    Feat_Bias = tf.get_variable(name='linear', shape=[feature_size], initializer=tf.glorot_normal_initializer())
    Feat_Emb = tf.get_variable(name='emb', shape=[feature_size,embedding_size], initializer=tf.glorot_normal_initializer())

    #------build feaure-------
    feat_ids  = features['feat_ids']
    feat_ids = tf.reshape(feat_ids,shape=[-1,field_size])
    feat_vals = features['feat_vals']
    feat_vals = tf.reshape(feat_vals,shape=[-1,field_size])

    #------build f(x)------
    with tf.variable_scope("Linear-part"):
        feat_wgts = tf.nn.embedding_lookup(Feat_Bias, feat_ids) # None * F * 1
        y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals),1)

    with tf.variable_scope("Pairwise-Interaction-Layer"):
        embeddings = tf.nn.embedding_lookup(Feat_Emb, feat_ids) # None * F * K
        feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1])
        embeddings = tf.multiply(embeddings, feat_vals) #vij*xi

        num_interactions = field_size*(field_size-1)/2
        element_wise_product_list = []
        for i in range(0, field_size):
            for j in range(i+1, field_size):
                element_wise_product_list.append(tf.multiply(embeddings[:,i,:], embeddings[:,j,:]))
        element_wise_product = tf.stack(element_wise_product_list) 								# (F*(F-1)) * None * K
        element_wise_product = tf.transpose(element_wise_product, perm=[1,0,2]) 				# None * (F*(F-1)) * K
        #interactions = tf.reduce_sum(element_wise_product, 2, name="interactions")

    with tf.variable_scope("Attention-part"):
        deep_inputs = tf.reshape(element_wise_product, shape=[-1, embedding_size]) 				# (None * (F*(F-1))) * K
        for i in range(len(layers)):
            deep_inputs = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=layers[i], \
                weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='mlp%d' % i)

        aij = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \
            weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='attention_out')# (None * (F*(F-1))) * 1

        #aij_reshape = tf.reshape(aij, shape=[-1, num_interactions, 1])							# None * (F*(F-1)) * 1
        aij_softmax = tf.nn.softmax(tf.reshape(aij, shape=[-1, num_interactions, 1]), dim=1, name='attention_soft')
        if mode == tf.estimator.ModeKeys.TRAIN:
            aij_softmax = tf.nn.dropout(aij_softmax, keep_prob=dropout[0])

    with tf.variable_scope("Attention-based-Pooling"):
        y_emb = tf.reduce_sum(tf.multiply(aij_softmax, element_wise_product), 1) 				# None * K
        if mode == tf.estimator.ModeKeys.TRAIN:
            y_emb = tf.nn.dropout(y_emb, keep_prob=dropout[1])

        y_d = tf.contrib.layers.fully_connected(inputs=y_emb, num_outputs=1, activation_fn=tf.identity, \
            weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='deep_out')		# None * 1
        y_deep = tf.reshape(y_d,shape=[-1])

    with tf.variable_scope("AFM-out"):
        #y_bias = Global_Bias * tf.ones_like(labels, dtype=tf.float32)  # None * 1  warning;这里不能用label,否则调用predict/export函数会出错,train/evaluate正常;初步判断estimator做了优化,用不到label时不传
        y_bias = Global_Bias * tf.ones_like(y_deep, dtype=tf.float32)   # None * 1
        y = y_bias + y_linear + y_deep
        pred = tf.sigmoid(y)

    predictions={"prob": pred}
    export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)}
    # Provide an estimator spec for `ModeKeys.PREDICT`
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                export_outputs=export_outputs)

    #------bulid loss------
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \
        l2_reg * tf.nn.l2_loss(Feat_Bias) + l2_reg * tf.nn.l2_loss(Feat_Emb)

    # Provide an estimator spec for `ModeKeys.EVAL`
    eval_metric_ops = {
        "auc": tf.metrics.auc(labels, pred)
    }
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                loss=loss,
                eval_metric_ops=eval_metric_ops)

    #------bulid optimizer------
    if FLAGS.optimizer == 'Adam':
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8)
    elif FLAGS.optimizer == 'Adagrad':
        optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=1e-8)
    elif FLAGS.optimizer == 'Momentum':
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95)
    elif FLAGS.optimizer == 'ftrl':
        optimizer = tf.train.FtrlOptimizer(learning_rate)

    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

    # Provide an estimator spec for `ModeKeys.TRAIN` modes
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                loss=loss,
                train_op=train_op)
Example #42
0
def model_fn(features, labels, mode, params):
    """Bulid Model function f(x) for Estimator."""
    #------hyperparameters----
    field_size = params["field_size"]
    feature_size = params["feature_size"]
    embedding_size = params["embedding_size"]
    l2_reg = params["l2_reg"]
    learning_rate = params["learning_rate"]
    #optimizer = params["optimizer"]
    layers = map(int, params["deep_layers"].split(','))
    dropout = map(float, params["dropout"].split(','))

    #------bulid weights------
    Global_Bias = tf.get_variable(name='bias', shape=[1], initializer=tf.constant_initializer(0.0))
    Feat_Bias = tf.get_variable(name='linear', shape=[feature_size], initializer=tf.glorot_normal_initializer())
    Feat_Emb = tf.get_variable(name='emb', shape=[feature_size,embedding_size], initializer=tf.glorot_normal_initializer())

    #------build feaure-------
    feat_ids  = features['feat_ids']
    feat_ids = tf.reshape(feat_ids,shape=[-1,field_size])
    feat_vals = features['feat_vals']
    feat_vals = tf.reshape(feat_vals,shape=[-1,field_size])

    #------build f(x)------
    with tf.variable_scope("Linear-part"):
        feat_wgts = tf.nn.embedding_lookup(Feat_Bias, feat_ids) 		# None * F * 1
        y_linear = tf.reduce_sum(tf.multiply(feat_wgts, feat_vals),1)

    with tf.variable_scope("BiInter-part"):
        embeddings = tf.nn.embedding_lookup(Feat_Emb, feat_ids) 		# None * F * K
        feat_vals = tf.reshape(feat_vals, shape=[-1, field_size, 1])
        embeddings = tf.multiply(embeddings, feat_vals) 				# vij * xi
        sum_square_emb = tf.square(tf.reduce_sum(embeddings,1))
        square_sum_emb = tf.reduce_sum(tf.square(embeddings),1)
        deep_inputs = 0.5*tf.subtract(sum_square_emb, square_sum_emb)	# None * K

    with tf.variable_scope("Deep-part"):
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_phase = True
        else:
            train_phase = False

        if mode == tf.estimator.ModeKeys.TRAIN:
            deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[0]) 						# None * K
        for i in range(len(layers)):
            deep_inputs = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=layers[i], \
                weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='mlp%d' % i)

            if FLAGS.batch_norm:
                deep_inputs = batch_norm_layer(deep_inputs, train_phase=train_phase, scope_bn='bn_%d' %i)   #放在RELU之后 https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md#bn----before-or-after-relu
            if mode == tf.estimator.ModeKeys.TRAIN:
                deep_inputs = tf.nn.dropout(deep_inputs, keep_prob=dropout[i])                              #Apply Dropout after all BN layers and set dropout=0.8(drop_ratio=0.2)
                #deep_inputs = tf.layers.dropout(inputs=deep_inputs, rate=dropout[i], training=mode == tf.estimator.ModeKeys.TRAIN)

        y_deep = tf.contrib.layers.fully_connected(inputs=deep_inputs, num_outputs=1, activation_fn=tf.identity, \
            weights_regularizer=tf.contrib.layers.l2_regularizer(l2_reg), scope='deep_out')
        y_d = tf.reshape(y_deep,shape=[-1])

    with tf.variable_scope("NFM-out"):
        #y_bias = Global_Bias * tf.ones_like(labels, dtype=tf.float32)  # None * 1  warning;这里不能用label,否则调用predict/export函数会出错,train/evaluate正常;初步判断estimator做了优化,用不到label时不传
        y_bias = Global_Bias * tf.ones_like(y_d, dtype=tf.float32)     	# None * 1
        y = y_bias + y_linear + y_d
        pred = tf.sigmoid(y)

    predictions={"prob": pred}
    export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)}
    # Provide an estimator spec for `ModeKeys.PREDICT`
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                export_outputs=export_outputs)

    #------bulid loss------
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y, labels=labels)) + \
        l2_reg * tf.nn.l2_loss(Feat_Bias) + l2_reg * tf.nn.l2_loss(Feat_Emb)

    # Provide an estimator spec for `ModeKeys.EVAL`
    eval_metric_ops = {
        "auc": tf.metrics.auc(labels, pred)
    }
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                loss=loss,
                eval_metric_ops=eval_metric_ops)

    #------bulid optimizer------
    if FLAGS.optimizer == 'Adam':
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8)
    elif FLAGS.optimizer == 'Adagrad':
        optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate, initial_accumulator_value=1e-8)
    elif FLAGS.optimizer == 'Momentum':
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.95)
    elif FLAGS.optimizer == 'ftrl':
        optimizer = tf.train.FtrlOptimizer(learning_rate)

    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())

    # Provide an estimator spec for `ModeKeys.TRAIN` modes
    if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                loss=loss,
                train_op=train_op)