Beispiel #1
0
def model():
    print("building model ...")
    with tf.variable_scope('train'):
        print("building model ...")
        X_pl = tf.placeholder(tf.float32, [None, num_features])
        X_expand = tf.expand_dims(X_pl, axis=2)
        print("X_pl", X_pl.get_shape())
        t_pl = tf.placeholder(tf.int32, [None,])
        print("t_pl", t_pl.get_shape())
        is_training_pl = tf.placeholder(tf.bool)
        cell_fw = tf.nn.rnn_cell.GRUCell(205)
        cell_bw = tf.nn.rnn_cell.GRUCell(205)
        seq_len = tf.reduce_sum(tf.ones(tf.shape(X_pl), dtype=tf.int32), axis=1)
        _, enc_states = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_fw,
            cell_bw=cell_bw, inputs=X_expand, sequence_length=seq_len,
            dtype=tf.float32)
        enc_states = tf.concat(1, enc_states)
        enc_states_drop = dropout(enc_states, is_training=is_training_pl) 
        l1 = fully_connected(enc_states_drop, 200, activation_fn=None)
        l1 = batch_norm(l1, is_training=is_training_pl)
        l1_relu = relu(l1)
        l1_dropout = dropout(l1_relu, is_training=is_training_pl)
        l2 = fully_connected(l1_dropout, 200, activation_fn=None)
        l2 = batch_norm(l2, is_training=is_training_pl)
        l2_relu = relu(l2)
        l_out = fully_connected(l2_relu, num_outputs=num_classes, activation_fn=None)
        l_out_softmax = tf.nn.softmax(l_out)
        tf.contrib.layers.summarize_variables()

    with tf.variable_scope('metrics'):
        loss = sparse_softmax_cross_entropy_with_logits(l_out, t_pl)
        print("loss", loss.get_shape())
        loss = tf.reduce_mean(loss)
        print("loss", loss.get_shape())
        tf.summary.scalar('train/loss', loss)
        argmax = tf.to_int32(tf.argmax(l_out, 1))
        print("argmax", argmax.get_shape())
        correct = tf.to_float(tf.equal(argmax, t_pl))
        print("correct,", correct.get_shape())
        accuracy = tf.reduce_mean(correct)
        print("accuracy", accuracy.get_shape())

    with tf.variable_scope('optimizer'):
        print("building optimizer ...")
        global_step = tf.Variable(0, name='global_step', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        grads_and_vars = optimizer.compute_gradients(loss)
        gradients, variables = zip(*grads_and_vars)
        clipped_gradients, global_norm = (
            tf.clip_by_global_norm(gradients, clip_norm))
        clipped_grads_and_vars = zip(clipped_gradients, variables)

        tf.summary.scalar('train/global_gradient_norm', global_norm)

        train_op = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step)

    return X_pl, t_pl, is_training_pl, l_out, l_out_softmax, loss, accuracy, train_op, global_step
Beispiel #2
0
 def define_sequence_model(self):
     seed=12345
     np.random.seed(12345)
     layer_list=[]
     with self.graph.as_default() as g:
         utt_length=tf.placeholder(tf.int32,shape=(None))
         g.add_to_collection(name="utt_length",value=utt_length)
         with tf.name_scope("input"):
              input_layer=tf.placeholder(dtype=tf.float32,shape=(None,None,self.n_in),name="input_layer")
              if self.dropout_rate!=0.0:
                 print "Using dropout to avoid overfitting and the dropout rate is",self.dropout_rate
                 is_training_drop=tf.placeholder(dtype=tf.bool,shape=(),name="is_training_drop")
                 input_layer_drop=dropout(input_layer,self.dropout_rate,is_training=is_training_drop)
                 layer_list.append(input_layer_drop)
                 g.add_to_collection(name="is_training_drop",value=is_training_drop)
              else:
                 layer_list.append(input_layer)
         g.add_to_collection("input_layer",layer_list[0])
         with tf.name_scope("hidden_layer"):
            basic_cell=[]
            if "tanh" in self.hidden_layer_type:
                is_training_batch=tf.placeholder(dtype=tf.bool,shape=(),name="is_training_batch")
                bn_params={"is_training":is_training_batch,"decay":0.99,"updates_collections":None}
                g.add_to_collection("is_training_batch",is_training_batch)
            for i in xrange(len(self.hidden_layer_type)):
                if self.dropout_rate!=0.0:
                    if self.hidden_layer_type[i]=="tanh":
                        new_layer=fully_connected(layer_list[-1],self.hidden_layer_size[i],activation_fn=tf.nn.tanh,normalizer_fn=batch_norm,normalizer_params=bn_params)
                        new_layer_drop=dropout(new_layer,self.dropout_rate,is_training=is_training_drop)
                        layer_list.append(new_layer_drop)
                    if self.hidden_layer_type[i]=="lstm":
                        basic_cell.append(MyDropoutWrapper(BasicLSTMCell(num_units=self.hidden_layer_size[i]),self.dropout_rate,self.dropout_rate,is_training=is_training_drop))
                    if self.hidden_layer_type[i]=="gru":
                        basic_cell.append(MyDropoutWrapper(GRUCell(num_units=self.hidden_layer_size[i]),self.dropout_rate,self.dropout_rate,is_training=is_training_drop))
                else:
                    if self.hidden_layer_type[i]=="tanh":
                       new_layer=fully_connected(layer_list[-1],self.hidden_layer_size[i],activation_fn=tf.nn.tanh,normalizer_fn=batch_norm,normalizer_params=bn_params)
                       layer_list.append(new_layer)
                    if self.hidden_layer_type[i]=="lstm":
                       basic_cell.append(LayerNormBasicLSTMCell(num_units=self.hidden_layer_size[i]))
                    if self.hidden_layer_type[i]=="gru":
                       basic_cell.append(LayerNormGRUCell(num_units=self.hidden_layer_size[i]))
            multi_cell=MultiRNNCell(basic_cell)
            rnn_outputs,rnn_states=tf.nn.dynamic_rnn(multi_cell,layer_list[-1],dtype=tf.float32,sequence_length=utt_length)
            layer_list.append(rnn_outputs)
         with tf.name_scope("output_layer"):
              if self.output_type=="linear" :
                  output_layer=tf.layers.dense(rnn_outputs,self.n_out)
               #  stacked_rnn_outputs=tf.reshape(rnn_outputs,[-1,self.n_out])
               #  stacked_outputs=tf.layers.dense(stacked_rnn_outputs,self.n_out)
               #  output_layer=tf.reshape(stacked_outputs,[-1,utt_length,self.n_out])
              g.add_to_collection(name="output_layer",value=output_layer)
         with tf.name_scope("training_op"):
              if self.optimizer=="adam":
                  self.training_op=tf.train.AdamOptimizer()
  def _init_body(self, scope):
    with tf.variable_scope(scope):

      word_level_inputs = tf.reshape(self.inputs_embedded, [
        self.document_size * self.sentence_size,
        self.word_size,
        self.embedding_size
      ])
      word_level_lengths = tf.reshape(
        self.word_lengths, [self.document_size * self.sentence_size])

      with tf.variable_scope('word') as scope:
        word_encoder_output, _ = bidirectional_rnn(
          self.word_cell, self.word_cell,
          word_level_inputs, word_level_lengths,
          scope=scope)

        with tf.variable_scope('attention') as scope:
          word_level_output = task_specific_attention(
            word_encoder_output,
            self.word_output_size,
            scope=scope)

        with tf.variable_scope('dropout'):
          word_level_output = layers.dropout(
            word_level_output, keep_prob=self.dropout_keep_proba,
            is_training=self.is_training,
          )

      # sentence_level

      sentence_inputs = tf.reshape(
        word_level_output, [self.document_size, self.sentence_size, self.word_output_size])

      with tf.variable_scope('sentence') as scope:
        sentence_encoder_output, _ = bidirectional_rnn(
          self.sentence_cell, self.sentence_cell, sentence_inputs, self.sentence_lengths, scope=scope)

        with tf.variable_scope('attention') as scope:
          sentence_level_output = task_specific_attention(
            sentence_encoder_output, self.sentence_output_size, scope=scope)

        with tf.variable_scope('dropout'):
          sentence_level_output = layers.dropout(
            sentence_level_output, keep_prob=self.dropout_keep_proba,
            is_training=self.is_training,
          )

      with tf.variable_scope('classifier'):
        self.logits = layers.fully_connected(
          sentence_level_output, self.classes, activation_fn=None)

        self.prediction = tf.argmax(self.logits, axis=-1)
Beispiel #4
0
def conv_model(X, Y_, mode):
    XX = tf.reshape(X, [-1, 28, 28, 1])
    biasInit = tf.constant_initializer(0.1, dtype=tf.float32)
    Y1 = layers.conv2d(XX,  num_outputs=6,  kernel_size=[6, 6], biases_initializer=biasInit)
    Y2 = layers.conv2d(Y1, num_outputs=12, kernel_size=[5, 5], stride=2, biases_initializer=biasInit)
    Y3 = layers.conv2d(Y2, num_outputs=24, kernel_size=[4, 4], stride=2, biases_initializer=biasInit)
    Y4 = layers.flatten(Y3)
    Y5 = layers.relu(Y4, 200, biases_initializer=biasInit)
    # to deactivate dropout on the dense layer, set keep_prob=1
    Y5d = layers.dropout(Y5, keep_prob=0.75, noise_shape=None, is_training=mode==learn.ModeKeys.TRAIN)
    Ylogits = layers.linear(Y5d, 10)
    predict = tf.nn.softmax(Ylogits)
    classes = tf.cast(tf.argmax(predict, 1), tf.uint8)

    loss = conv_model_loss(Ylogits, Y_, mode)
    train_op = conv_model_train_op(loss, mode)
    eval_metrics = conv_model_eval_metrics(classes, Y_, mode)

    return learn.ModelFnOps(
        mode=mode,
        # You can name the fields of your predictions dictionary as you like.
        predictions={"predictions": predict, "classes": classes},
        loss=loss,
        train_op=train_op,
        eval_metric_ops=eval_metrics
    )
 def _dnn_logits(self, features, is_training=False):
   net = layers.input_from_feature_columns(
       features,
       self._get_dnn_feature_columns(),
       weight_collections=[self._dnn_weight_collection])
   for layer_id, num_hidden_units in enumerate(self._dnn_hidden_units):
     net = layers.legacy_fully_connected(
         net,
         num_hidden_units,
         activation_fn=self._dnn_activation_fn,
         weight_collections=[self._dnn_weight_collection],
         bias_collections=[self._dnn_weight_collection],
         name="hiddenlayer_%d" % layer_id)
     if self._dnn_dropout is not None and is_training:
       net = layers.dropout(
           net,
           keep_prob=(1.0 - self._dnn_dropout))
     self._add_hidden_layer_summary(net, "hiddenlayer_%d" % layer_id)
   logit = layers.legacy_fully_connected(
       net,
       self._num_label_columns(),
       weight_collections=[self._dnn_weight_collection],
       bias_collections=[self._dnn_weight_collection],
       name="dnn_logit")
   self._add_hidden_layer_summary(logit, "dnn_logit")
   return logit
def model_fn(x, target, mode, params):
    """Model function for Estimator."""

    y_ = tf.cast(target, tf.float32)

    x_image = tf.reshape(x, [-1, 28, 28, 1])

    # first convolutional layer
    h_conv1 = layers.convolution2d(x_image, 32, [5,5])
    h_pool1 = layers.max_pool2d(h_conv1, [2,2])

    # second convolutional layer
    h_conv2 = layers.convolution2d(h_pool1, 64, [5,5])
    h_pool2 = layers.max_pool2d(h_conv2, [2,2])

    # densely connected layer
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    h_fc1 = layers.fully_connected(h_pool2_flat, 1024)
    h_fc1_drop = layers.dropout(
        h_fc1, keep_prob=params["dropout"],
        is_training=(mode == ModeKeys.TRAIN))

    # readout layer
    y_conv = layers.fully_connected(h_fc1_drop, 10, activation_fn=None)

    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(y_conv, y_))
    train_op = tf.contrib.layers.optimize_loss(
        loss=cross_entropy,
        global_step=tf.contrib.framework.get_global_step(),
        learning_rate=params["learning_rate"],
        optimizer="Adam")

    predictions = tf.argmax(y_conv, 1)
    return predictions, cross_entropy, train_op
 def dnn_logits_fn():
   """Builds the logits from the input layer."""
   previous_layer = input_layer
   for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
     with variable_scope.variable_scope(
         "hiddenlayer_%d" % layer_id,
         values=(previous_layer,)) as hidden_layer_scope:
       net = layers.fully_connected(
           previous_layer,
           num_hidden_units,
           activation_fn=dnn_activation_fn,
           variables_collections=[dnn_parent_scope],
           scope=hidden_layer_scope)
       if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
         net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
     _add_hidden_layer_summary(net, hidden_layer_scope.name)
     previous_layer = net
   with variable_scope.variable_scope(
       "logits", values=(previous_layer,)) as logits_scope:
     dnn_logits = layers.fully_connected(
         previous_layer,
         head.logits_dimension,
         activation_fn=None,
         variables_collections=[dnn_parent_scope],
         scope=logits_scope)
   _add_hidden_layer_summary(dnn_logits, logits_scope.name)
   return dnn_logits
Beispiel #8
0
 def define_feedforward_model(self):
     layer_list=[]
     with self.graph.as_default() as g:
         is_training_batch=tf.placeholder(tf.bool,shape=(),name="is_training_batch")
         bn_params={"is_training":is_training_batch,"decay":0.99,"updates_collections":None}
         g.add_to_collection("is_training_batch",is_training_batch)
         with tf.name_scope("input"):
             input_layer=tf.placeholder(dtype=tf.float32,shape=(None,self.n_in),name="input_layer")
             if self.dropout_rate!=0.0:
                print "Using dropout to avoid overfitting and the dropout rate is",self.dropout_rate
                is_training_drop=tf.placeholder(dtype=tf.bool,shape=(),name="is_training_drop")
                input_layer_drop=dropout(input_layer,self.dropout_rate,is_training=is_training_drop)
                layer_list.append(input_layer_drop)
                g.add_to_collection(name="is_training_drop",value=is_training_drop)
             else:
                layer_list.append(input_layer)
         g.add_to_collection("input_layer",layer_list[0])
         for i in xrange(len(self.hidden_layer_size)):
             with tf.name_scope("hidden_layer_"+str(i+1)):
               if self.dropout_rate!=0.0:
                   last_layer=layer_list[-1]
                   if self.hidden_layer_type[i]=="tanh":
                      new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.tanh,normalizer_fn=batch_norm,\
                                 normalizer_params=bn_params)
                   if self.hidden_layer_type[i]=="sigmoid":
                       new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.sigmoid,normalizer_fn=batch_norm,\
                                 normalizer_params=bn_params)
                   new_layer_drop=dropout(new_layer,self.dropout_rate,is_training=is_training_drop)
                   layer_list.append(new_layer_drop)
               else:
                   last_layer=layer_list[-1]
                   if self.hidden_layer_type[i]=="tanh":
                      new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.tanh,normalizer_fn=batch_norm,\
                                normalizer_params=bn_params)
                   if self.hidden_layer_type[i]=="sigmoid":
                      new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.sigmoid,normalizer_fn=batch_norm,\
                                normalizer_params=bn_params)
                   layer_list.append(new_layer)
         with tf.name_scope("output_layer"):
             if self.output_type=="linear":
                output_layer=fully_connected(layer_list[-1],self.n_out,activation_fn=None)
             if self.output_type=="tanh":
                output_layer=fully_connected(layer_list[-1],self.n_out,activation_fn=tf.nn.tanh)
             g.add_to_collection(name="output_layer",value=output_layer)
         with tf.name_scope("training_op"):
              if self.optimizer=="adam":
                 self.training_op=tf.train.AdamOptimizer()
  def build_model(self, features, feature_columns, is_training):
    """See base class."""
    self._feature_columns = feature_columns

    input_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=self._num_ps_replicas,
            min_slice_size=64 << 20))
    with variable_scope.variable_scope(
        self._scope + "/input_from_feature_columns",
        values=features.values(),
        partitioner=input_layer_partitioner) as scope:
      net = layers.input_from_feature_columns(
          features,
          self._get_feature_columns(),
          weight_collections=[self._scope],
          trainable=self._trainable,
          scope=scope)

    hidden_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=self._num_ps_replicas))
    for layer_id, num_hidden_units in enumerate(self._hidden_units):
      with variable_scope.variable_scope(
          self._scope + "/hiddenlayer_%d" % layer_id,
          values=[net],
          partitioner=hidden_layer_partitioner) as scope:
        net = layers.fully_connected(
            net,
            num_hidden_units,
            activation_fn=self._activation_fn,
            variables_collections=[self._scope],
            trainable=self._trainable,
            scope=scope)
        if self._dropout is not None and is_training:
          net = layers.dropout(
              net,
              keep_prob=(1.0 - self._dropout))
      self._add_hidden_layer_summary(net, scope.name)

    with variable_scope.variable_scope(
        self._scope + "/logits",
        values=[net],
        partitioner=hidden_layer_partitioner) as scope:
      logits = layers.fully_connected(
          net,
          self._num_label_columns,
          activation_fn=None,
          variables_collections=[self._scope],
          trainable=self._trainable,
          scope=scope)
    self._add_hidden_layer_summary(logits, "logits")
    return logits
Beispiel #10
0
def model():
    tf.set_random_seed(1)
    print("building model ...")
    with tf.variable_scope('train'):
        print("building model ...")
        X_pl = tf.placeholder(tf.float32, [None, num_features])
        print("X_pl", X_pl.get_shape())
        t_pl = tf.placeholder(tf.int32, [None,])
        print("t_pl", t_pl.get_shape())
        is_training_pl = tf.placeholder(tf.bool)
        X_bn = batch_norm(X_pl, is_training=is_training_pl)
        print("X_bn", X_bn.get_shape())
        l1 = fully_connected(X_pl, num_outputs=100, activation_fn=relu)#, normalizer_fn=batch_norm)
        print("l1", l1.get_shape())
        l1_drop = dropout(l1, is_training=is_training_pl)
        print("l1_drop", l1_drop.get_shape())
        l_out = fully_connected(l1_drop, num_outputs=num_classes, activation_fn=None)
        print("l_out", l_out.get_shape())
        l_out_softmax = tf.nn.softmax(l_out)
        tf.contrib.layers.summarize_variables()

    with tf.variable_scope('metrics'):
        loss = sparse_softmax_cross_entropy_with_logits(l_out, t_pl)
        print("loss", loss.get_shape())
        loss = tf.reduce_mean(loss)
        print("loss", loss.get_shape())
        tf.summary.scalar('train/loss', loss)
        argmax = tf.to_int32(tf.argmax(l_out, 1))
        print("argmax", argmax.get_shape())
        correct = tf.to_float(tf.equal(argmax, t_pl))
        print("correct,", correct.get_shape())
        accuracy = tf.reduce_mean(correct)
        print("accuracy", accuracy.get_shape())

    with tf.variable_scope('optimizer'):
        print("building optimizer ...")
        global_step = tf.Variable(0, name='global_step', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        grads_and_vars = optimizer.compute_gradients(loss)
        gradients, variables = zip(*grads_and_vars)
        clipped_gradients, global_norm = (
            tf.clip_by_global_norm(gradients, clip_norm))
        clipped_grads_and_vars = zip(clipped_gradients, variables)

        tf.summary.scalar('train/global_gradient_norm', global_norm)

        train_op = optimizer.apply_gradients(clipped_grads_and_vars, global_step=global_step)

    return X_pl, t_pl, is_training_pl, l_out, l_out_softmax, loss, accuracy, train_op, global_step
Beispiel #11
0
    def general_module_end_operations(self, tensor, dropout_on, strided_max_pool_on):
        """
        Common end of module operations.

        :param tensor: The tensor being processed.
        :type tensor: tf.Tensor
        :param dropout_on: Whether to include dropout or not.
        :type dropout_on: bool
        :param strided_max_pool_on: Whether to include a strided max pool at the end of the module.
        :type strided_max_pool_on: bool
        :return: The processed tensor.
        :rtype: tf.Tensor
        """
        if strided_max_pool_on:
            tensor = max_pool2d(tensor, kernel_size=3, stride=2, padding='VALID')
        if dropout_on:
            tensor = dropout(tensor, self.dropout_keep_probability_tensor)
        return tensor
Beispiel #12
0
def conv_model(feature, target, mode):
  """2-layer convolution model."""
  # Convert the target to a one-hot tensor of shape (batch_size, 10) and
  # with a on-value of 1 for each one-hot vector of length 10.
  target = tf.one_hot(tf.cast(target, tf.int32), 10, 1, 0)

  # Reshape feature to 4d tensor with 2nd and 3rd dimensions being
  # image width and height final dimension being the number of color channels.
  feature = tf.reshape(feature, [-1, 28, 28, 1])

  # First conv layer will compute 32 features for each 5x5 patch
  with tf.variable_scope('conv_layer1'):
    h_conv1 = layers.convolution(feature, 32, kernel_size=[5, 5],
                                 activation_fn=tf.nn.relu)
    h_pool1 = max_pool_2x2(h_conv1)

  # Second conv layer will compute 64 features for each 5x5 patch.
  with tf.variable_scope('conv_layer2'):
    h_conv2 = layers.convolution(h_pool1, 64, kernel_size=[5, 5],
                                 activation_fn=tf.nn.relu)
    h_pool2 = max_pool_2x2(h_conv2)
    # reshape tensor into a batch of vectors
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])

  # Densely connected layer with 1024 neurons.
  h_fc1 = layers.dropout(
      layers.fully_connected(
          h_pool2_flat, 1024, activation_fn=tf.nn.relu), keep_prob=0.5,
      is_training=mode == tf.contrib.learn.ModeKeys.TRAIN)

  # Compute logits (1 per class) and compute loss.
  logits = layers.fully_connected(h_fc1, 10, activation_fn=None)
  loss = tf.contrib.losses.softmax_cross_entropy(logits, target)

  # Create a tensor for training op.
  train_op = layers.optimize_loss(
      loss, tf.contrib.framework.get_global_step(), optimizer='SGD',
      learning_rate=0.001)

  return tf.argmax(logits, 1), loss, train_op
  def build_model(self, features, feature_columns, is_training):
    """See base class."""
    features = self._get_feature_dict(features)
    self._feature_columns = feature_columns

    net = layers.input_from_feature_columns(
        features,
        self._get_feature_columns(),
        weight_collections=[self._weight_collection_name])
    for layer_id, num_hidden_units in enumerate(self._hidden_units):
      with variable_scope.variable_op_scope(
          [net], "hiddenlayer_%d" % layer_id,
          partitioner=partitioned_variables.min_max_variable_partitioner(
              max_partitions=self._config.num_ps_replicas)) as scope:
        net = layers.fully_connected(
            net,
            num_hidden_units,
            activation_fn=self._activation_fn,
            variables_collections=[self._weight_collection_name],
            scope=scope)
        if self._dropout is not None and is_training:
          net = layers.dropout(
              net,
              keep_prob=(1.0 - self._dropout))
      self._add_hidden_layer_summary(net, scope.name)
    with variable_scope.variable_op_scope(
        [net], "dnn_logits",
        partitioner=partitioned_variables.min_max_variable_partitioner(
            max_partitions=self._config.num_ps_replicas)) as scope:
      logits = layers.fully_connected(
          net,
          self._num_label_columns,
          activation_fn=None,
          variables_collections=[self._weight_collection_name],
          scope=scope)
    self._add_hidden_layer_summary(logits, "dnn_logits")
    return logits
 def _dnn_logits(self, features, is_training=False):
     net = layers.input_from_feature_columns(
         features, self._get_dnn_feature_columns(), weight_collections=[self._dnn_weight_collection]
     )
     for layer_id, num_hidden_units in enumerate(self._dnn_hidden_units):
         with variable_scope.variable_op_scope(
             [net],
             "hiddenlayer_%d" % layer_id,
             partitioner=partitioned_variables.min_max_variable_partitioner(
                 max_partitions=self._config.num_ps_replicas
             ),
         ) as scope:
             net = layers.fully_connected(
                 net,
                 num_hidden_units,
                 activation_fn=self._dnn_activation_fn,
                 variables_collections=[self._dnn_weight_collection],
                 scope=scope,
             )
             if self._dnn_dropout is not None and is_training:
                 net = layers.dropout(net, keep_prob=(1.0 - self._dnn_dropout))
         self._add_hidden_layer_summary(net, scope.name)
     with variable_scope.variable_op_scope(
         [net],
         "dnn_logit",
         partitioner=partitioned_variables.min_max_variable_partitioner(max_partitions=self._config.num_ps_replicas),
     ) as scope:
         logit = layers.fully_connected(
             net,
             self._target_column.num_label_columns,
             activation_fn=None,
             variables_collections=[self._dnn_weight_collection],
             scope=scope,
         )
     self._add_hidden_layer_summary(logit, "dnn_logit")
     return logit
    def __init__(self, max_seq_len, max_sent_len, num_classes, vocab_size,
                 embedding_size, max_grad_norm, dropout_keep_proba,
                 learning_rate):
        # Parameters
        self.learning_rate = learning_rate
        self.vocab_size = vocab_size
        self.num_classes = num_classes
        self.max_seq_len = max_seq_len
        self.embedding_size = embedding_size
        self.word_encoder_num_hidden = max_seq_len
        self.word_output_size = max_seq_len
        self.sentence_encoder_num_hidden = max_sent_len
        self.sentence_output_size = max_sent_len
        self.max_grad_norm = max_grad_norm
        self.dropout_keep_proba = dropout_keep_proba

        # tf graph input
        self.input_x = tf.placeholder(shape=[None, None, None],
                                      dtype=tf.int32,
                                      name="input_x")
        self.input_y = tf.placeholder(shape=[None, self.num_classes],
                                      dtype=tf.int32,
                                      name="input_y")
        self.word_lengths = tf.placeholder(shape=[None, None],
                                           dtype=tf.int32,
                                           name="word_lengths")
        self.sentence_lengths = tf.placeholder(shape=[
            None,
        ],
                                               dtype=tf.int32,
                                               name="sentence_lengths")
        self.is_training = tf.placeholder(dtype=tf.bool, name="is_training")

        # input_x dims
        (self.document_size, self.sentence_size,
         self.word_size) = tf.unstack(tf.shape(self.input_x))

        with tf.device("/gpu:0"), tf.name_scope("embedding_layer"):
            w = tf.Variable(
                tf.random_uniform([self.vocab_size, self.embedding_size], -1.0,
                                  1.0),
                dtype=tf.float32,
                name="w"
            )  # TODO check if this needs to be marked as untrainable
            self.input_x_embedded = tf.nn.embedding_lookup(w, self.input_x)

        # reshape input_x after embedding
        self.input_x_embedded = tf.reshape(self.input_x_embedded, [
            self.document_size * self.sentence_size, self.word_size,
            self.embedding_size
        ])
        self.input_x_embedded_lengths = tf.reshape(
            self.word_lengths, [self.document_size * self.sentence_size])

        with tf.variable_scope("word_level"):
            self.word_encoder_outputs = self.bidirectional_RNN(
                num_hidden=self.word_encoder_num_hidden,
                inputs=self.input_x_embedded)
            word_level_output = self.attention(
                inputs=self.word_encoder_outputs,
                output_size=self.word_output_size)

            with tf.variable_scope("dropout"):
                print('self.is_training: {}'.format(self.is_training))
                word_level_output = layers.dropout(
                    word_level_output,
                    keep_prob=self.dropout_keep_proba,
                    is_training=self.is_training)

        # reshape word_level output
        self.sentence_encoder_inputs = tf.reshape(
            word_level_output,
            [self.document_size, self.sentence_size, self.word_output_size])

        with tf.variable_scope("sentence_level"):
            self.sentence_encoder_outputs = self.bidirectional_RNN(
                num_hidden=self.sentence_encoder_num_hidden,
                inputs=self.sentence_encoder_inputs)
            sentence_level_output = self.attention(
                inputs=self.sentence_encoder_outputs,
                output_size=self.sentence_output_size)
            with tf.variable_scope("dropout"):
                sentence_level_output = layers.dropout(
                    sentence_level_output,
                    keep_prob=self.dropout_keep_proba,
                    is_training=self.is_training)

        # Final model prediction
        with tf.variable_scope("classifier_output"):
            self.logits = layers.fully_connected(
                sentence_level_output, self.num_classes,
                activation_fn=None)  # trainable=self.is_training)
            self.predictions = tf.argmax(self.logits,
                                         axis=1,
                                         name="predictions")

        # Calculate mean cross-entropy loss
        with tf.variable_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                labels=self.input_y, logits=self.logits)
            self.loss = tf.reduce_mean(losses)
            tf.summary.scalar("Loss", self.loss)

        # Accuracy
        with tf.variable_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, axis=1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   "float"),
                                           name="accuracy")
            tf.summary.scalar("Accuracy", self.accuracy)
def _dnn_tree_combined_model_fn(
    features, labels, mode, head, dnn_hidden_units,
    dnn_feature_columns, tree_learner_config, num_trees,
    tree_examples_per_layer,
    config=None, dnn_optimizer="Adagrad",
    dnn_activation_fn=nn.relu, dnn_dropout=None,
    dnn_input_layer_partitioner=None,
    dnn_input_layer_to_tree=True, dnn_steps_to_train=10000,
    tree_feature_columns=None,
    tree_center_bias=True):
  """DNN and GBDT combined model_fn.

  Args:
    features: `dict` of `Tensor` objects.
    labels: Labels used to train on.
    mode: Mode we are in. (TRAIN/EVAL/INFER)
    head: A `Head` instance.
    dnn_hidden_units: List of hidden units per layer.
    dnn_feature_columns: An iterable containing all the feature columns
      used by the model's DNN.
    tree_learner_config: A config for the tree learner.
    num_trees: Number of trees to grow model to after training DNN.
    tree_examples_per_layer: Number of examples to accumulate before
      growing the tree a layer. This value has a big impact on model
      quality and should be set equal to the number of examples in
      training dataset if possible. It can also be a function that computes
      the number of examples based on the depth of the layer that's
      being built.
    config: `RunConfig` of the estimator.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN. If `None`, will use the Adagrad
      optimizer with default learning rate of 0.001.
    dnn_activation_fn: Activation function applied to each layer of the DNN.
      If `None`, will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability to drop out a given
      unit in the DNN.
    dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
      Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    dnn_input_layer_to_tree: Whether to provide the DNN's input layer
    as a feature to the tree.
    dnn_steps_to_train: Number of steps to train dnn for before switching
      to gbdt.
    tree_feature_columns: An iterable containing all the feature columns
      used by the model's boosted trees. If dnn_input_layer_to_tree is
      set to True, these features are in addition to dnn_feature_columns.
    tree_center_bias: Whether a separate tree should be created for
      first fitting the bias.

  Returns:
    A `ModelFnOps` object.
  Raises:
    ValueError: if inputs are not valid.
  """
  if not isinstance(features, dict):
    raise ValueError("features should be a dictionary of `Tensor`s. "
                     "Given type: {}".format(type(features)))

  if not dnn_feature_columns:
    raise ValueError("dnn_feature_columns must be specified")

  # Build DNN Logits.
  dnn_parent_scope = "dnn"
  dnn_partitioner = dnn_input_layer_partitioner or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=config.num_ps_replicas,
          min_slice_size=64 << 20))

  with variable_scope.variable_scope(
      dnn_parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=dnn_partitioner):

    with variable_scope.variable_scope(
        "input_from_feature_columns",
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner) as input_layer_scope:
      input_layer = layers.input_from_feature_columns(
          columns_to_tensors=features,
          feature_columns=dnn_feature_columns,
          weight_collections=[dnn_parent_scope],
          scope=input_layer_scope)
    previous_layer = input_layer
    for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
      with variable_scope.variable_scope(
          "hiddenlayer_%d" % layer_id,
          values=(previous_layer,)) as hidden_layer_scope:
        net = layers.fully_connected(
            previous_layer,
            num_hidden_units,
            activation_fn=dnn_activation_fn,
            variables_collections=[dnn_parent_scope],
            scope=hidden_layer_scope)
        if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
          net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
      _add_hidden_layer_summary(net, hidden_layer_scope.name)
      previous_layer = net
    with variable_scope.variable_scope(
        "logits",
        values=(previous_layer,)) as logits_scope:
      dnn_logits = layers.fully_connected(
          previous_layer,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[dnn_parent_scope],
          scope=logits_scope)
    _add_hidden_layer_summary(dnn_logits, logits_scope.name)

    def _dnn_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizers.optimize_loss(
          loss=loss,
          global_step=training_util.get_global_step(),
          learning_rate=_DNN_LEARNING_RATE,
          optimizer=_get_optimizer(dnn_optimizer),
          name=dnn_parent_scope,
          variables=ops.get_collection(
              ops.GraphKeys.TRAINABLE_VARIABLES,
              scope=dnn_parent_scope),
          # Empty summaries to prevent optimizers from logging training_loss.
          summaries=[])

  # Build Tree Logits.
  global_step = training_util.get_global_step()
  with ops.device(global_step.device):
    ensemble_handle = model_ops.tree_ensemble_variable(
        stamp_token=0,
        tree_ensemble_config="",  # Initialize an empty ensemble.
        name="ensemble_model")

  tree_features = features.copy()
  if dnn_input_layer_to_tree:
    tree_features["dnn_input_layer"] = input_layer
    tree_feature_columns.append(layers.real_valued_column("dnn_input_layer"))
  gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
      is_chief=config.is_chief,
      num_ps_replicas=config.num_ps_replicas,
      ensemble_handle=ensemble_handle,
      center_bias=tree_center_bias,
      examples_per_layer=tree_examples_per_layer,
      learner_config=tree_learner_config,
      feature_columns=tree_feature_columns,
      logits_dimension=head.logits_dimension,
      features=tree_features)

  with ops.name_scope("gbdt"):
    predictions_dict = gbdt_model.predict(mode)
    tree_logits = predictions_dict["predictions"]

    def _tree_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      update_op = gbdt_model.train(loss, predictions_dict, labels)
      with ops.control_dependencies(
          [update_op]), (ops.colocate_with(global_step)):
        update_op = state_ops.assign_add(global_step, 1).op
        return update_op

  tree_train_logits = dnn_logits + tree_logits

  def _no_train_op_fn(loss):
    """Returns a no-op."""
    del loss
    return control_flow_ops.no_op()

  model_fn_ops = head.create_model_fn_ops(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_no_train_op_fn,
      logits=tree_train_logits)
  dnn_train_op = head.create_model_fn_ops(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_dnn_train_op_fn,
      logits=dnn_logits).train_op
  tree_train_op = head.create_model_fn_ops(
      features=tree_features,
      mode=mode,
      labels=labels,
      train_op_fn=_tree_train_op_fn,
      logits=tree_train_logits).train_op

  if tree_center_bias:
    num_trees += 1
  finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor()

  model_fn_ops.training_hooks.extend([
      trainer_hooks.SwitchTrainOp(
          dnn_train_op, dnn_steps_to_train, tree_train_op),
      trainer_hooks.StopAfterNTrees(
          num_trees, attempted_trees, finalized_trees)])

  return model_fn_ops
 def my_drop_out(output):
   return tf.where(self.is_training, tcl.dropout(output, keep_prob = keep_prob_, is_training=True), output)
Beispiel #18
0
def _dnn_model_fn(features, labels, mode, params):
  """Deep Neural Net model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `_Head` instance.
      * hidden_units: List of hidden units per layer.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use the Adagrad
          optimizer with a default learning rate of 0.05.
      * activation_fn: Activation function applied to each layer. If `None`,
          will use `tf.nn.relu`.
      * dropout: When not `None`, the probability we will drop out a given
          coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * num_ps_replicas: The number of parameter server replicas.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
  head = params["head"]
  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  optimizer = params.get("optimizer") or "Adagrad"
  activation_fn = params.get("activation_fn")
  dropout = params.get("dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  num_ps_replicas = params.get("num_ps_replicas", 0)

  features = _get_feature_dict(features)
  parent_scope = "dnn"

  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))
  with variable_scope.variable_scope(
      parent_scope + "/input_from_feature_columns",
      values=features.values(),
      partitioner=input_layer_partitioner) as scope:
    net = layers.input_from_feature_columns(
        columns_to_tensors=features,
        feature_columns=feature_columns,
        weight_collections=[parent_scope],
        scope=scope)

  hidden_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))
  for layer_id, num_hidden_units in enumerate(hidden_units):
    with variable_scope.variable_scope(
        parent_scope + "/hiddenlayer_%d" % layer_id,
        values=[net],
        partitioner=hidden_layer_partitioner) as scope:
      net = layers.fully_connected(
          net,
          num_hidden_units,
          activation_fn=activation_fn,
          variables_collections=[parent_scope],
          scope=scope)
      if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
        net = layers.dropout(
            net,
            keep_prob=(1.0 - dropout))
    _add_hidden_layer_summary(net, scope.name)

  with variable_scope.variable_scope(
      parent_scope + "/logits",
      values=[net],
      partitioner=hidden_layer_partitioner) as scope:
    logits = layers.fully_connected(
        net,
        head.logits_dimension,
        activation_fn=None,
        variables_collections=[parent_scope],
        scope=scope)
  _add_hidden_layer_summary(logits, scope.name)

  def _train_op_fn(loss):
    """Returns the op to optimize the loss."""
    return optimizers.optimize_loss(
        loss=loss,
        global_step=contrib_variables.get_global_step(),
        learning_rate=_LEARNING_RATE,
        optimizer=_get_optimizer(optimizer),
        clip_gradients=gradient_clip_norm,
        name=parent_scope,
        # Empty summaries to prevent optimizers from logging the training_loss.
        summaries=[])

  return head.head_ops(features, labels, mode, _train_op_fn, logits)
Beispiel #19
0
def main(unused_argv):
  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True)
  if FLAGS.download_only:
    sys.exit(0)


  # Sanity check on the number of workers and the worker index
  #if FLAGS.worker_index >= FLAGS.num_workers:
  # raise ValueError("Worker index %d exceeds number of workers %d " %
  #                   (FLAGS.worker_index, FLAGS.num_workers))

  # Sanity check on the number of parameter servers
  if FLAGS.num_parameter_servers <= 0:
    raise ValueError("Invalid num_parameter_servers value: %d" %
                     FLAGS.num_parameter_servers)
  # air
  ps_hosts = re.findall(r'[\w\.:]+', FLAGS.ps_hosts)
  worker_hosts = re.findall(r'[\w\.:]+', FLAGS.worker_hosts)
  server = tf.train.Server({"ps":ps_hosts,"worker":worker_hosts}, job_name = FLAGS.job_name, task_index = FLAGS.worker_index)
  
  print("Worker GRPC URL: %s" % server.target)
  print("Worker index = %d" % FLAGS.worker_index)
  print("Number of workers = %d" % FLAGS.num_workers)

  if FLAGS.job_name == "ps":
    server.join()
  
  # air
  else:
    is_chief = (FLAGS.worker_index == 0)

  if FLAGS.sync_replicas:
    if FLAGS.replicas_to_aggregate is None:
      replicas_to_aggregate = FLAGS.num_workers
    else:
      replicas_to_aggregate = FLAGS.replicas_to_aggregate

  # Construct device setter object
  device_setter = get_device_setter(FLAGS.num_parameter_servers,
                                    FLAGS.num_workers)

  # The device setter will automatically place Variables ops on separate
  # parameter servers (ps). The non-Variable ops will be placed on the workers.
  with tf.device(device_setter):
    global_step = tf.Variable(0, name="global_step", trainable=False)
    '''
    # Variables of the hidden layer
    hid_w = tf.Variable(
        tf.truncated_normal([IMAGE_PIXELS * IMAGE_PIXELS, FLAGS.hidden_units],
                            stddev=1.0 / IMAGE_PIXELS), name="hid_w")
    hid_b = tf.Variable(tf.zeros([FLAGS.hidden_units]), name="hid_b")

    # Variables of the softmax layer
    sm_w = tf.Variable(
        tf.truncated_normal([FLAGS.hidden_units, 10],
                            stddev=1.0 / math.sqrt(FLAGS.hidden_units)),
        name="sm_w")
    sm_b = tf.Variable(tf.zeros([10]), name="sm_b")
    '''
    #air
    '''
    W1 = tf.Variable(tf.truncated_normal([784,1024], stddev=0.01))
    b1 = tf.Variable(tf.zeros([1024]))
    W2 = tf.Variable(tf.truncated_normal([1024,1024], stddev=0.01))
    b2 = tf.Variable(tf.zeros([1024]))
    W3 = tf.Variable(tf.truncated_normal([1024,512], stddev=0.01))
    b3 = tf.Variable(tf.zeros([512]))
    W4 = tf.Variable(tf.truncated_normal([512,10], stddev=0.01))
    b4 = tf.Variable(tf.zeros([10])) '''
    with tf.name_scope('input'):
        x = tf.placeholder(tf.float32, shape=[None, 784], name="x-input")
        # target 10 output classes
        y_ = tf.placeholder(tf.float32, shape=[None, 10], name="y-input")
        prob = tf.placeholder(tf.float32, name='keep_prob')
        x_image = tf.reshape(x, [-1,28,28,1])

    stack1_conv1 = layers.convolution2d(x_image,
                                            64,
                                            [3,3],
                                            weights_regularizer=layers.l2_regularizer(0.1),
                                            biases_regularizer=layers.l2_regularizer(0.1),
                                            scope='stack1_Conv1')
    stack1_conv2 = layers.convolution2d(stack1_conv1,
                                            64,
                                            [3,3],
                                            weights_regularizer=layers.l2_regularizer(0.1),
                                            biases_regularizer=layers.l2_regularizer(0.1),
                                            scope='stack1_Conv2')
    stack1_pool = layers.max_pool2d(stack1_conv2,
                                        [2,2],
                                        padding='SAME',
                                        scope='stack1_Pool')
    stack3_pool_flat = layers.flatten(stack1_pool, scope='stack3_pool_flat')
    fcl1 = layers.fully_connected(stack3_pool_flat, 
                                      512, 
                                      weights_regularizer=layers.l2_regularizer(0.1), 
                                      biases_regularizer=layers.l2_regularizer(0.1), 
                                      scope='FCL1')
    fcl1_d = layers.dropout(fcl1, keep_prob=prob, scope='dropout1')
    fcl2 = layers.fully_connected(fcl1_d, 
                                      128, 
                                      weights_regularizer=layers.l2_regularizer(0.1), 
                                      biases_regularizer=layers.l2_regularizer(0.1), 
                                      scope='FCL2')
    fcl2_d = layers.dropout(fcl2, keep_prob=prob, scope='dropout2')
    y, cross_entropy = skflow.models.logistic_regression(fcl2_d, y_, init_stddev=0.01)

    '''with tf.name_scope('Softmax'):
        fcl_softmax = layers.fully_connected(fcl2_d, 
                                      10, 
                                      weights_regularizer=layers.l2_regularizer(0.1), 
                                      biases_regularizer=layers.l2_regularizer(0.1), 
                                      scope='Softmax')
        y = tf.nn.softmax(fcl_softmax, name='y-output')
        cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.clip_by_value(y, 1e-10, 1.0)), reduction_indices=[1]))'''

        

    with tf.name_scope('train'):
        start_l_rate = 0.001
        decay_step = 1000
        decay_rate = 0.5
        learning_rate = tf.train.exponential_decay(start_l_rate, global_step, decay_step, decay_rate, staircase=False)
        grad_op = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
        '''rep_op = tf.train.SyncReplicasOptimizer(grad_op, 
                                                    replicas_to_aggregate=len(workers),
                                                    replica_id=FLAGS.task_index, 
                                                    total_num_replicas=len(workers))'''
        train_op = tf.contrib.layers.optimize_loss(loss=cross_entropy, 
                                                       global_step=global_step, 
                                                       learning_rate=0.001, 
                                                       optimizer=grad_op, 
                                                       clip_gradients=1)
    #air

    # Ops: located on the worker specified with FLAGS.worker_index
    #x = tf.placeholder(tf.float32, [None, IMAGE_PIXELS * IMAGE_PIXELS])
    #y = tf.placeholder(tf.float32, [None, 10])
    #y_ = None

    '''
    hid_lin = tf.nn.xw_plus_b(x, hid_w, hid_b)
    hid = tf.nn.relu(hid_lin)

    y = tf.nn.softmax(tf.nn.xw_plus_b(hid, sm_w, sm_b))
    cross_entropy = -tf.reduce_sum(y_*tf.log(tf.clip_by_value(y, 1e-10, 1.0)))
    '''
    #air
    '''
    h1 = tf.nn.sigmoid(tf.matmul(x, W1) + b1)
    h1d = tf.nn.dropout(h1, 0.7)
    h2 = tf.nn.sigmoid(tf.matmul(h1d, W2) + b2)
    h2d = tf.nn.dropout(h2, 0.7)
    h3 = tf.nn.sigmoid(tf.matmul(h2d, W3) + b3)
    h3d = tf.nn.dropout(h3, 0.7)
    y_ = tf.nn.softmax(tf.matmul(h3d, W4) + b4)

    cost = -tf.reduce_sum(y*tf.log(tf.clip_by_value(y_, 1e-10, 1.0)))
    #air
    opt = tf.train.AdamOptimizer(FLAGS.learning_rate)'''
    '''if FLAGS.sync_replicas:
      opt = tf.train.SyncReplicasOptimizer(
          opt,
          replicas_to_aggregate=replicas_to_aggregate,
          total_num_replicas=FLAGS.num_workers,
          replica_id=FLAGS.worker_index,
          name="mnist_sync_replicas")'''

    '''train_step = opt.minimize(cost,
                              global_step=global_step)'''

    '''if FLAGS.sync_replicas and is_chief:
      # Initial token and chief queue runners required by the sync_replicas mode
      chief_queue_runner = opt.get_chief_queue_runner()
      init_tokens_op = opt.get_init_tokens_op()'''

#air
    correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#air

    init_op = tf.initialize_all_variables()
    #train_dir = tempfile.mkdtemp()
    sv = tf.train.Supervisor(is_chief=is_chief,
                             #logdir=train_dir,
                             init_op=init_op,
                             recovery_wait_secs=1,
                             global_step=global_step)

    sess_config = tf.ConfigProto(
        allow_soft_placement=True,
        log_device_placement=False,
        device_filters=["/job:ps", "/job:worker/task:%d" % FLAGS.worker_index])

    # The chief worker (worker_index==0) session will prepare the session,
    # while the remaining workers will wait for the preparation to complete.
    if is_chief:
      print("Worker %d: Initializing session..." % FLAGS.worker_index)
    else:
      print("Worker %d: Waiting for session to be initialized..." %
            FLAGS.worker_index)

    '''sess = sv.prepare_or_wait_for_session(FLAGS.worker_grpc_url,
                                          config=sess_config)'''
    sess = sv.prepare_or_wait_for_session(server.target,
                                          config=sess_config)

    print("Worker %d: Session initialization complete." % FLAGS.worker_index)

    '''if FLAGS.sync_replicas and is_chief:
      # Chief worker will start the chief queue runner and call the init op
      print("Starting chief queue runner and running init_tokens_op")
      sv.start_queue_runners(sess, [chief_queue_runner])
      sess.run(init_tokens_op)'''

    # Perform training
    time_begin = time.time()
    print("Training begins @ %s" % time.ctime(time_begin))

    local_step = 1
    while True:
      # Training feed
      batch_xs, batch_ys = mnist.train.next_batch(FLAGS.batch_size)
      train_feed = {x: batch_xs,
                    y_: batch_ys,
                    prob: 0.8}

      _, step, loss = sess.run([train_op, global_step, cross_entropy], feed_dict=train_feed)

      now = time.time()
      if(local_step % 2 == 0):
          print("%s: Worker %d: training step %d done (global step: %d), loss: %.6f" %
              (time.ctime(now), FLAGS.worker_index, local_step, step+1, loss))

      if step+1 >= FLAGS.train_steps:
        break
      local_step += 1

    time_end = time.time()
    print("Training ends @ %s" % time.ctime(time_end))
    training_time = time_end - time_begin
    print("Training elapsed time: %f s" % training_time)


    acc_acu = 0.
    for i in xrange(int(10000/1000)):
        test_x, test_y = mnist.test.next_batch(1000)
        #print(test_x.shape)
        acc_batch = sess.run(accuracy, feed_dict={x: test_x, y_: test_y, prob: 1.0})
        print(acc_batch)
        acc_acu += acc_batch
    acc = acc_acu/10.0
    print ("test accuracy %g" % acc)
    sv.stop()
Beispiel #20
0
 def define_sequence_model(self):
     seed = 12345
     np.random.seed(12345)
     layer_list = []
     with self.graph.as_default() as g:
         utt_length = tf.placeholder(tf.int32, shape=(None))
         g.add_to_collection(name="utt_length", value=utt_length)
         with tf.name_scope("input"):
             input_layer = tf.placeholder(dtype=tf.float32,
                                          shape=(None, None, self.n_in),
                                          name="input_layer")
             if self.dropout_rate != 0.0:
                 print "Using dropout to avoid overfitting and the dropout rate is", self.dropout_rate
                 is_training_drop = tf.placeholder(dtype=tf.bool,
                                                   shape=(),
                                                   name="is_training_drop")
                 input_layer_drop = dropout(input_layer,
                                            self.dropout_rate,
                                            is_training=is_training_drop)
                 layer_list.append(input_layer_drop)
                 g.add_to_collection(name="is_training_drop",
                                     value=is_training_drop)
             else:
                 layer_list.append(input_layer)
         g.add_to_collection("input_layer", layer_list[0])
         with tf.name_scope("hidden_layer"):
             basic_cell = []
             if "tanh" in self.hidden_layer_type:
                 is_training_batch = tf.placeholder(
                     dtype=tf.bool, shape=(), name="is_training_batch")
                 bn_params = {
                     "is_training": is_training_batch,
                     "decay": 0.99,
                     "updates_collections": None
                 }
                 g.add_to_collection("is_training_batch", is_training_batch)
             for i in xrange(len(self.hidden_layer_type)):
                 if self.dropout_rate != 0.0:
                     if self.hidden_layer_type[i] == "tanh":
                         new_layer = fully_connected(
                             layer_list[-1],
                             self.hidden_layer_size[i],
                             activation_fn=tf.nn.tanh,
                             normalizer_fn=batch_norm,
                             normalizer_params=bn_params)
                         new_layer_drop = dropout(
                             new_layer,
                             self.dropout_rate,
                             is_training=is_training_drop)
                         layer_list.append(new_layer_drop)
                     if self.hidden_layer_type[i] == "lstm":
                         basic_cell.append(
                             MyDropoutWrapper(BasicLSTMCell(
                                 num_units=self.hidden_layer_size[i]),
                                              self.dropout_rate,
                                              self.dropout_rate,
                                              is_training=is_training_drop))
                     if self.hidden_layer_type[i] == "gru":
                         basic_cell.append(
                             MyDropoutWrapper(GRUCell(
                                 num_units=self.hidden_layer_size[i]),
                                              self.dropout_rate,
                                              self.dropout_rate,
                                              is_training=is_training_drop))
                 else:
                     if self.hidden_layer_type[i] == "tanh":
                         new_layer = fully_connected(
                             layer_list[-1],
                             self.hidden_layer_size[i],
                             activation_fn=tf.nn.tanh,
                             normalizer_fn=batch_norm,
                             normalizer_params=bn_params)
                         layer_list.append(new_layer)
                     if self.hidden_layer_type[i] == "lstm":
                         basic_cell.append(
                             LayerNormBasicLSTMCell(
                                 num_units=self.hidden_layer_size[i]))
                     if self.hidden_layer_type[i] == "gru":
                         basic_cell.append(
                             LayerNormGRUCell(
                                 num_units=self.hidden_layer_size[i]))
             multi_cell = MultiRNNCell(basic_cell)
             rnn_outputs, rnn_states = tf.nn.dynamic_rnn(
                 multi_cell,
                 layer_list[-1],
                 dtype=tf.float32,
                 sequence_length=utt_length)
             layer_list.append(rnn_outputs)
         with tf.name_scope("output_layer"):
             if self.output_type == "linear":
                 output_layer = tf.layers.dense(rnn_outputs, self.n_out)
             #  stacked_rnn_outputs=tf.reshape(rnn_outputs,[-1,self.n_out])
             #  stacked_outputs=tf.layers.dense(stacked_rnn_outputs,self.n_out)
             #  output_layer=tf.reshape(stacked_outputs,[-1,utt_length,self.n_out])
             g.add_to_collection(name="output_layer", value=output_layer)
         with tf.name_scope("training_op"):
             if self.optimizer == "adam":
                 self.training_op = tf.train.AdamOptimizer()
Beispiel #21
0
    def define_feedforward_model_utt(self):
        """
            utterance index embedding
            last dim of input should be index
            TO DO LIST:
            embedding matrix size is fixed not fit to data
        """
        layer_list = []
        with self.graph.as_default() as g:
            self.global_step = tf.Variable(0, trainable=False)
            self.is_training_batch = tf.placeholder(tf.bool, shape=(), name="is_training_batch")
            # bn_params={"is_training":is_training_batch,"decay":0.99,"updates_collections":None}
            # g.add_to_collection("is_training_batch", is_training_batch)

            with tf.name_scope("input"):
                # shape (N, 319)
                self.input_lin_layer = tf.placeholder(dtype=tf.float32, shape=(None, self.n_in), name="input_layer")
                # embedding shape (UTT, 10)
                self.utt_embeddings = tf.get_variable("utt-embeddings", [1000, 10], dtype=tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.5))
                # label (N, 1)
                self.utt_index_t = tf.placeholder(dtype=tf.int32, shape=(None, 1), name="utt_index")

                # embedding result (N, 1, 10)
                embedding_utt = tf.nn.embedding_lookup(self.utt_embeddings, self.utt_index_t)
                # concatenate embedding result and linguistic feature , shape (N, 329)
                # shape (N, 10)
                embedding_utt = tf.squeeze(embedding_utt, axis=-2)
                self.input_layer = tf.concat([self.input_lin_layer, embedding_utt], 1)

                if self.dropout_rate != 0.0:
                    print("Using dropout to avoid overfitting and the dropout rate is", self.dropout_rate)
                    is_training_drop = tf.placeholder(dtype=tf.bool, shape=(), name="is_training_drop")
                    input_layer_drop = dropout(self.input_layer, self.dropout_rate, is_training=is_training_drop)
                    layer_list.append(input_layer_drop)
                    g.add_to_collection(name="is_training_drop", value=is_training_drop)
                else:
                    layer_list.append(self.input_layer)

            # hidden layer
            for i in range(len(self.hidden_layer_size)):
                with tf.name_scope("hidden_layer_" + str(i + 1)):
                    if self.dropout_rate != 0.0:
                        last_layer = layer_list[-1]
                        if self.hidden_layer_type[i] == "tanh":
                            new_layer=fully_connected(last_layer, self.hidden_layer_size[i], activation_fn=None)
                            new_layer = tf.contrib.layers.batch_norm(new_layer,is_training=self.is_training_batch)
                            new_layer = tf.nn.tanh(new_layer)
                        if self.hidden_layer_type[i]=="sigmoid":
                            new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.sigmoid)
                        if self.hidden_layer_type[i]=="relu":
                            new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.relu)
                        if self.hidden_layer_type[i]=="selu":
                            new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.selu)
                        new_layer_drop=dropout(new_layer,self.dropout_rate,is_training=is_training_drop)
                        layer_list.append(new_layer_drop)
                    else:
                        # pdb.set_trace()
                        last_layer = layer_list[-1]
                        if self.hidden_layer_type[i] == "tanh":
                            new_layer = fully_connected(last_layer, self.hidden_layer_size[i], activation_fn=None)
                            new_layer = tf.nn.tanh(new_layer)
                            tf.summary.histogram("%s th layer activation" % str(i), new_layer)
                        if self.hidden_layer_type[i] == "sigmoid":
                            new_layer = fully_connected(last_layer, self.hidden_layer_size[i], activation_fn=tf.nn.sigmoid)
                        if self.hidden_layer_type[i] == "relu":
                            new_layer = fully_connected(last_layer, self.hidden_layer_size[i], activation_fn=tf.nn.relu)
                        if self.hidden_layer_type[i] == "selu":
                            new_layer = fully_connected(last_layer, self.hidden_layer_size[i], activation_fn=tf.nn.selu)
                        layer_list.append(new_layer)

            with tf.name_scope("output_layer"):
                if self.output_type == "linear":
                    self.output_layer = fully_connected(layer_list[-1], self.n_out, activation_fn=None)
                if self.output_type == "tanh":
                    self.output_layer = fully_connected(layer_list[-1], self.n_out, activation_fn=tf.nn.tanh)
    def __init__(self,
                 feature_num,
                 class_num,
                 is_training,
                 step=1e-4,
                 size=64,
                 batch_size=100):
        self.weight_decay = 5.0
        self.bn_params = {
            # Decay for the moving averages.
            'decay': 0.999,
            'center': True,
            'scale': True,
            # epsilon to prevent 0s in variance.
            'epsilon': 0.001,
            # None to force the updates during train_op
            'updates_collections': None,
            'is_training': is_training
        }

        self.batch_size = batch_size
        self.feature_num = feature_num
        self.class_num = class_num

        self.X = tf.placeholder(tf.float32, [None, feature_num])
        self.y_ = tf.placeholder(tf.float32, [None, class_num])

        with tf.contrib.framework.arg_scope(
            [layers.convolution2d],
                kernel_size=3,
                stride=1,
                padding='SAME',
                activation_fn=tf.nn.relu,
                normalizer_fn=layers.batch_norm,
                #normalizer_params=self.bn_params,
                #weights_initializer=layers.variance_scaling_initializer(),
                weights_regularizer=layers.l2_regularizer(self.weight_decay)):
            self.X = tf.reshape(self.X, [-1, size, size, 3])
            self.keep_prob = tf.placeholder(tf.float32)

            net = layers.convolution2d(self.X, num_outputs=8)
            net = layers.max_pool2d(net, kernel_size=2)
            net = layers.relu(net, num_outputs=8)

            net = layers.convolution2d(net, num_outputs=16)
            net = layers.convolution2d(net, num_outputs=16)
            net = layers.max_pool2d(net, kernel_size=2)
            net = layers.relu(net, num_outputs=16)

            net = layers.convolution2d(net, num_outputs=32)
            net = layers.convolution2d(net, num_outputs=32)
            net = layers.max_pool2d(net, kernel_size=2)
            net = layers.dropout(net, keep_prob=self.keep_prob)
            net = layers.relu(net, num_outputs=32)

            net = layers.convolution2d(net, num_outputs=64)
            net = layers.convolution2d(net, num_outputs=64)
            net = layers.max_pool2d(net, kernel_size=2)
            net = layers.dropout(net, keep_prob=self.keep_prob)
            net = layers.relu(net, num_outputs=64)

            net = layers.flatten(net, [-1, 4 * 4 * 32])
            net = layers.fully_connected(net,
                                         num_outputs=64,
                                         activation_fn=tf.nn.relu)
            net = layers.dropout(net, keep_prob=self.keep_prob)

            net = layers.fully_connected(net, num_outputs=self.class_num)
            self.y = layers.softmax(net)

        self.loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(net, self.y_))
        self.optimizer = tf.train.RMSPropOptimizer(step).minimize(self.loss)

        pred = tf.equal(tf.argmax(self.y, 1), tf.argmax(self.y_, 1))
        self.acc = tf.reduce_mean(tf.cast(pred, tf.float32))

        self.sess = tf.Session()
Beispiel #23
0
    def __init__(self, max_seq_len, max_sent_len, num_classes, vocab_size,
                 embedding_size, max_grad_norm, dropout_proba, learn_rate):
        # Params
        self.learning_rate = learn_rate
        self.vocab_size = vocab_size
        self.num_classes = num_classes
        self.max_seq_len = max_seq_len
        self.embedding_size = embedding_size
        self.word_encoder_num_hidden = max_seq_len
        self.word_output_size = max_seq_len
        self.sentence_encoder_num_hidden = max_sent_len
        self.sentence_output_size = max_sent_len
        self.max_grad_norm = max_grad_norm
        self.dropout_keep_proba = dropout_proba

        # Input
        self.input_x = tf.placeholder(shape=[None, None, None],
                                      dtype=tf.int32,
                                      name='input_x')
        self.input_y = tf.placeholder(shape=[None, self.num_classes],
                                      dtype=tf.int32,
                                      name='input_y')
        self.word_lengths = tf.placeholder(shape=[None, None],
                                           dtype=tf.int32,
                                           name='word_lengths')
        self.sentence_lengths = tf.placeholder(shape=[
            None,
        ],
                                               dtype=tf.int32,
                                               name='sentence_lengths')
        self.is_training = tf.placeholder(dtype=tf.bool, name='is_training')

        # Input_x dim
        self.document_size, self.sentence_size, self.word_size = tf.unstack(
            tf.shape(self.input_x))

        with tf.device('/gpu:0'), tf.name_scope('embedding_layer'):
            w = tf.Variable(tf.random_uniform(
                [self.vocab_size, self.embedding_size], -1., 1.),
                            dtype=tf.float32,
                            name='W')
            self.input_x_embedded = tf.nn.embedding_lookup(w, self.input_x)

        # reshape input_x after embedding
        self.input_x_embedded = tf.reshape(self.input_x_embedded, [
            self.document_size * self.sentence_size, self.word_size,
            self.embedding_size
        ])
        self.input_x_embedded_lengths = tf.reshape(
            self.word_lengths, [self.document_size * self.sentence_size])

        with tf.variable_scope("word_level"):
            self.word_encoder_outputs = self.bidirectional_RNN(
                num_hidden=self.word_encoder_num_hidden,
                inputs=self.input_x_embedded)
            word_level_output = self.attention(
                inputs=self.word_encoder_outputs,
                output_size=self.word_output_size)

            with tf.variable_scope('dropout'):
                print('self.is_training:{}'.format(self.is_training))
                word_level_output = layers.dropout(
                    word_level_output,
                    keep_prob=self.dropout_keep_proba,
                    is_training=self.is_training)

        # reshape word level output
        self.sentence_encoder_inputs = tf.reshape(
            word_level_output,
            [self.document_size, self.sentence_size, self.word_output_size])

        with tf.variable_scope('sentence_level'):
            self.sentence_encoder_outputs = self.bidirectional_RNN(
                num_hidden=self.sentence_encoder_num_hidden,
                inputs=self.sentence_encoder_inputs)
            sentence_level_output = self.attention(
                inputs=self.sentence_encoder_outputs,
                output_size=self.sentence_output_size)
            with tf.variable_scope('dropout'):
                sentence_level_output = layers.dropout(
                    sentence_level_output,
                    keep_prob=self.dropout_keep_proba,
                    is_trainin=self.is_training)
        # Final model prediction
        with tf.variable_scope('classifier_output'):
            self.logits = layers.fully_connected(sentence_level_output,
                                                 self.num_classes,
                                                 activation_fn=None)
            self.predictions = tf.argmax(self.logits,
                                         axis=1,
                                         name='predictions')

        # Calculate mean corss-entropy loss
        with tf.variable_scope('loss'):
            losses = tf.nn.softmax_cross_entropy_with_logits(
                labels=self.input_y, logits=self.logits)
            self.loss = tf.reduce_mean(losses)
            tf.summary.scalar('Loss', self.loss)

        # Accuracy
        with tf.variable_scope('accuracy'):
            correct_predictions = tf.equal(self.predictions,
                                           tf.argmax(self.input_y, axis=1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                   'float'),
                                           name='accuracy')
            tf.summary.scalar('Accuracy', self.accuracy)
Beispiel #24
0
 def define_feedforward_model(self):
     """
         the basic deep feedforward dnn model
     """
     layer_list=[]
     with self.graph.as_default() as g:
         #pdb.set_trace()
         self.global_step  = tf.Variable(0,trainable=False)
         g.add_to_collection(name='global_step',value=self.global_step)
         is_training_batch=tf.placeholder(tf.bool,shape=(),name="is_training_batch")
         # bn_params={"is_training":is_training_batch,"decay":0.99,"updates_collections":None}
         g.add_to_collection("is_training_batch",is_training_batch)
         with tf.name_scope("input"):
             input_layer=tf.placeholder(dtype=tf.float32,shape=(None,self.n_in),name="input_layer")
             if self.dropout_rate!=0.0:
                 print("Using dropout to avoid overfitting and the dropout rate is",self.dropout_rate)
                 is_training_drop=tf.placeholder(dtype=tf.bool,shape=(),name="is_training_drop")
                 input_layer_drop=dropout(input_layer,self.dropout_rate,is_training=is_training_drop)
                 layer_list.append(input_layer_drop)
                 g.add_to_collection(name="is_training_drop",value=is_training_drop)
             else:
                 layer_list.append(input_layer)
         g.add_to_collection("input_layer",layer_list[0])
        
         for i in range(len(self.hidden_layer_size)):
             with tf.name_scope("hidden_layer_"+str(i+1)):
                 if self.dropout_rate!=0.0:
                     last_layer=layer_list[-1]
                     if self.hidden_layer_type[i]=="tanh":
                         new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=None)
                         new_layer = tf.contrib.layers.batch_norm(new_layer,is_training=is_training_batch)
                         new_layer = tf.nn.tanh(new_layer)
                     if self.hidden_layer_type[i]=="sigmoid":
                         new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.sigmoid)
                     if self.hidden_layer_type[i]=="relu":
                         new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.relu)
                     if self.hidden_layer_type[i]=="selu":
                         new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.selu)
                     new_layer_drop=dropout(new_layer,self.dropout_rate,is_training=is_training_drop)
                     layer_list.append(new_layer_drop)
                 else:
                     # pdb.set_trace()
                     last_layer=layer_list[-1]
                     if self.hidden_layer_type[i]=="tanh":
                         new_layer = fully_connected(last_layer, self.hidden_layer_size[i], activation_fn=None)
                         # new_layer = tf.contrib.layers.batch_norm(new_layer, is_training=is_training_batch)
                         new_layer = tf.nn.tanh(new_layer)
                     if self.hidden_layer_type[i]=="sigmoid":
                         new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.sigmoid)
                     if self.hidden_layer_type[i]=="relu":
                         new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.relu)
                     if self.hidden_layer_type[i]=="selu":
                         new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.selu)
                     layer_list.append(new_layer)
         # pdb.set_trace()
         with tf.name_scope("output_layer"):
             if self.output_type=="linear":
                 output_layer=fully_connected(layer_list[-1],self.n_out,activation_fn=None)
             if self.output_type=="tanh":
                 output_layer=fully_connected(layer_list[-1],self.n_out,activation_fn=tf.nn.tanh)
             g.add_to_collection(name="output_layer",value=output_layer)
### DENOISING AUTOENCODERS
# force AE to learn useful features by adding noise to the input
# add noise to the inputs, reconstruction loss calculated based on the original inputs
# v1: add Gaussian noise
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
X_noisy = X + tf.random_normal(tf.shape(X))
[...]
hidden1 = activation(tf.matmul(X_noisy, weights1) + biases1)
[...]
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))
# v2: "dropout" (inputs randomly switched off)
from tensorflow.contrib.layers import dropout
keep_prob = 0.7
is_training = tf.placeholder_with_default(False, shape=(), name="is_training")
X = tf.placeholder(tf.float32, shape=[None, n_inputs])
X_drop = dropout(X, keep_prob, is_training=is_training)
[...]
hidden1 = activation(tf.matmul(X_drop, weights1) + biases1)
[...]
reconstruction_loss = tf.reduce_mean(tf.square(outputs - X))
[...]
# -> during training, do not forget to set is_training to True
sess.run(training_op, feed_dict={X: X_batch, is_training: True})
# for testing, need to be False, but no need to set explicitly as set by default

### SPARSE AUTOENCODERS
# add terms to cost function; e.g. limit the number of significant active neurons
# 1) compute actual sparsity = average activation of each neuron in the coding layer over whole training batch
# 2) penalize neurons that are too active: add sparsity loss to cost function
# for sparsity loss, better to use KL divergence, stronger gradients than MSE
Beispiel #26
0
def _dnn_model_fn(features, labels, mode, params, config=None):
  """Deep Neural Net model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `_Head` instance.
      * hidden_units: List of hidden units per layer.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use the Adagrad
          optimizer with a default learning rate of 0.05.
      * activation_fn: Activation function applied to each layer. If `None`,
          will use `tf.nn.relu`. Note that a string containing the unqualified
          name of the op may also be provided, e.g., "relu", "tanh", or
          "sigmoid".
      * dropout: When not `None`, the probability we will drop out a given
          coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * embedding_lr_multipliers: Optional. A dictionary from
          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
          multiply with learning rate for the embedding variables.
      * input_layer_min_slice_size: Optional. The min slice size of input layer
          partitions. If not provided, will use the default of 64M.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
  head = params["head"]
  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  optimizer = params.get("optimizer") or "Adagrad"
  activation_fn = _get_activation_fn(params.get("activation_fn"))
  dropout = params.get("dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  input_layer_min_slice_size = (
      params.get("input_layer_min_slice_size") or 64 << 20)
  num_ps_replicas = config.num_ps_replicas if config else 0
  embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})

  features = _get_feature_dict(features)
  parent_scope = "dnn"

  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas)
  with variable_scope.variable_scope(
      parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=partitioner):
    input_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas,
            min_slice_size=input_layer_min_slice_size))
    with variable_scope.variable_scope(
        "input_from_feature_columns",
        values=tuple(six.itervalues(features)),
        partitioner=input_layer_partitioner) as input_layer_scope:
      if all([
          isinstance(fc, feature_column._FeatureColumn)  # pylint: disable=protected-access
          for fc in feature_columns
      ]):
        net = layers.input_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            weight_collections=[parent_scope],
            scope=input_layer_scope)
      else:
        net = fc_core.input_layer(
            features=features,
            feature_columns=feature_columns,
            weight_collections=[parent_scope])

    for layer_id, num_hidden_units in enumerate(hidden_units):
      with variable_scope.variable_scope(
          "hiddenlayer_%d" % layer_id,
          values=(net,)) as hidden_layer_scope:
        net = layers.fully_connected(
            net,
            num_hidden_units,
            activation_fn=activation_fn,
            variables_collections=[parent_scope],
            scope=hidden_layer_scope)
        if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
          net = layers.dropout(net, keep_prob=(1.0 - dropout))
      _add_hidden_layer_summary(net, hidden_layer_scope.name)

    with variable_scope.variable_scope(
        "logits",
        values=(net,)) as logits_scope:
      logits = layers.fully_connected(
          net,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[parent_scope],
          scope=logits_scope)
    _add_hidden_layer_summary(logits, logits_scope.name)

    def _train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizers.optimize_loss(
          loss=loss,
          global_step=training_util.get_global_step(),
          learning_rate=_LEARNING_RATE,
          optimizer=_get_optimizer(optimizer),
          gradient_multipliers=(
              dnn_linear_combined._extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                  embedding_lr_multipliers, parent_scope,
                  input_layer_scope.name)),
          clip_gradients=gradient_clip_norm,
          name=parent_scope,
          # Empty summaries to prevent optimizers from logging training_loss.
          summaries=[])

    return head.create_model_fn_ops(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_train_op_fn,
        logits=logits)
def build_larger_lenet5(X_train_shape,
                        n_outputs,
                        use_batch_norm=False,
                        use_dropout=False):
    lprint('Mimicing LeNet-5')
    print('X_train_shape', X_train_shape)
    X = tf.placeholder(tf.float32,
                       shape=(None, X_train_shape[1], X_train_shape[2],
                              X_train_shape[3]),
                       name='X')
    y = tf.placeholder(tf.int64, shape=(None), name='y')

    #fake_is_training = tf.placeholder(tf.bool, shape=(), name='is_training')
    last_output = X
    layers = []
    he_init = tf.contrib.layers.variance_scaling_initializer()
    norm_fn = None
    norm_params = None
    is_training = tf.placeholder(tf.bool, shape=(), name='is_training')
    keep_prob = 0.5
    lprint('Use Batch Normalization:', use_batch_norm)
    lprint('Use Dropout:', use_dropout, ', keep_prob:', keep_prob)
    if use_batch_norm:
        norm_fn = batch_norm
        norm_params = {
            'is_training': is_training,
            'decay': 0.99,
            'updates_collections': None
        }

    with tf.name_scope('cnn'):
        with tf.contrib.framework.arg_scope(
            [fully_connected, conv2d],
                activation_fn=tf.nn.relu,
                #normalizer_fn=norm_fn,
                #normalizer_params=norm_params,
                weights_initializer=he_init):
            C1 = conv2d(inputs=X,
                        num_outputs=64,
                        kernel_size=5,
                        stride=1,
                        padding='SAME',
                        normalizer_fn=norm_fn,
                        normalizer_params=norm_params)
            P1 = tf.nn.max_pool(C1,
                                ksize=[1, 3, 3, 1],
                                strides=[1, 2, 2, 1],
                                padding='SAME')
            C2 = conv2d(inputs=P1,
                        num_outputs=64,
                        kernel_size=5,
                        stride=1,
                        padding='SAME',
                        normalizer_fn=norm_fn,
                        normalizer_params=norm_params)
            P2 = tf.nn.max_pool(C2,
                                ksize=[1, 3, 3, 1],
                                strides=[1, 2, 2, 1],
                                padding='SAME')
            C3 = conv2d(inputs=P1,
                        num_outputs=128,
                        kernel_size=4,
                        stride=1,
                        padding='SAME',
                        normalizer_fn=norm_fn,
                        normalizer_params=norm_params)
            P3 = tf.nn.max_pool(C2,
                                ksize=[1, 2, 2, 1],
                                strides=[1, 2, 2, 1],
                                padding='SAME')
            lprint('Pool3 shape:', P3)
            pool_shape = P2.get_shape().as_list()
            #shaped = tf.reshape(last_pool, [-1, 10])
            reshape = tf.reshape(
                P3, [-1, pool_shape[1] * pool_shape[2] * pool_shape[3]])
            F1 = fully_connected(reshape, 2048)
            if use_dropout:
                hidden_drop = dropout(F1, keep_prob, is_training=is_training)
                last_output = hidden_drop
            else:
                last_output = F1
        logits = fully_connected(last_output,
                                 n_outputs,
                                 scope='outputs',
                                 activation_fn=None,
                                 weights_initializer=he_init)
    lprint(C1)
    lprint(last_output)
    lprint(logits)
    return X, y, logits, is_training
Beispiel #28
0
keep_prob_h2 = 0.9
keep_prob_h3 = 1

# Regularizador: L2
Ln_reg = tf.contrib.layers.l2_regularizer(lambda_ln)

# Pesos por clase: clase 0, clase 1
class_weights = tf.constant([1.0, 2.04])

# Definicion de la arquitectura de red a: batch norm
with tf.variable_scope("dnn"):
    with tf.contrib.framework.arg_scope([fully_connected],
                                        normalizer_fn=batch_norm,
                                        normalizer_params=bn_params,
                                        weights_regularizer=Ln_reg):
        X_drop = dropout(X, keep_prob_h1, is_training=phase)
        hidden1 = fully_connected(inputs=X_drop,
                                  num_outputs=n_hidden1,
                                  scope='hidden1')
        hidden1_drop = dropout(hidden1, keep_prob_h1, is_training=phase)
        hidden2 = fully_connected(inputs=hidden1_drop,
                                  num_outputs=n_hidden2,
                                  scope='hidden2')
        hidden2_drop = dropout(hidden2, keep_prob_h2, is_training=phase)
        hidden3 = fully_connected(inputs=hidden2_drop,
                                  num_outputs=n_hidden3,
                                  scope='hidden3')
        hidden3_drop = dropout(hidden3, keep_prob_h3, is_training=phase)
        logits = fully_connected(inputs=hidden3_drop,
                                 num_outputs=n_outputs,
                                 scope='outputs')
Beispiel #29
0
def build_model(x_pl, input_width, input_height, output_dim,
                batch_size):
    
    # make distributed representation of input image for localization network
    loc_l1 = pool(x_pl, kernel_size=[2, 2], scope="localization_l1")
    loc_l2 = conv(loc_l1, num_outputs=8, kernel_size=[5, 5], stride=[1, 1], padding="SAME", scope="localization_l2")
    loc_l3 = pool(loc_l2, kernel_size=[2, 2], scope="localization_l3")
    loc_l4 = conv(loc_l3, num_outputs=8, kernel_size=[5, 5], stride=[1, 1], padding="SAME", scope="localization_l4")
    loc_l4_flatten = flatten(loc_l4, scope="localization_l4-flatten")
    loc_l5 = dense(loc_l4_flatten, num_outputs=50, activation_fn=relu, scope="localization_l5")
    
    # set up weights for transformation (notice we always need 6 output neurons)
    with tf.name_scope("localization"):
        W_loc_out = tf.get_variable("localization_loc-out", [50, 6], initializer=tf.constant_initializer(0.0))
        initial = np.array([[0.45, 0, 0], [0, 0.45, 0]])
        initial = initial.astype('float32')
        initial = initial.flatten()
        b_loc_out = tf.Variable(initial_value=initial, name='b-loc-out')
        loc_out = tf.matmul(loc_l5, W_loc_out) + b_loc_out


    # spatial transformer
    l_trans1 = transformer(x_pl, loc_out, out_size=(OUT_HEIGHT, OUT_WIDTH))
    l_trans1.set_shape([None, OUT_HEIGHT, OUT_WIDTH, NUM_COL_CHANNELS])

    print( "Transformer network output shape: ", l_trans1.get_shape())

    # classification network
    #Blok 1
    conv_l11 = conv(l_trans1, num_outputs=64, kernel_size=[3, 3])
    conv_l12 = conv(conv_l11, num_outputs=64, kernel_size=[3, 3])
    pool_l13 = pool(conv_l12, kernel_size=[2, 2], stride=[2,2])
    #Blok 2   
    #conv_l21 = conv(pool_l13, num_outputs=128, kernel_size=[3, 3])
    #conv_l22 = conv(conv_l21, num_outputs=128, kernel_size=[3, 3])
    #pool_l23 = pool(conv_l22, kernel_size=[2, 2], stride=[2,2])
    #Blok 3    
    #conv_l31 = conv(pool_l13, num_outputs=128, kernel_size=[3, 3])
    conv_l32 = conv(pool_l13, num_outputs=64, kernel_size=[3, 3])
    conv_l33 = conv(conv_l32, num_outputs=64, kernel_size=[3, 3])
    pool_l34 = pool(conv_l33, kernel_size=[2, 2], stride=[2,2])
    #Blok 4    
    conv_l41 = conv(pool_l34, num_outputs=128, kernel_size=[3, 3])
    conv_l42 = conv(conv_l41, num_outputs=128, kernel_size=[3, 3])
    conv_l43 = conv(conv_l42, num_outputs=128, kernel_size=[3, 3])
    pool_l44 = pool(conv_l43, kernel_size=[2, 2], stride=[2,2])
    #Blok 5   
    conv_l51 = conv(pool_l44, num_outputs=256, kernel_size=[3, 3])
    conv_l52 = conv(conv_l51, num_outputs=256, kernel_size=[3, 3])
    conv_l53 = conv(conv_l52, num_outputs=256, kernel_size=[3, 3])
    pool_l54 = pool(conv_l53, kernel_size=[2, 2], stride=[2,2])
     
    dense_flatten = flatten(pool_l54)
    dense_1 = dense(dense_flatten, num_outputs=2048, activation_fn=relu)
    dropout_l4 =dropout(dense_1)
    dense_2 = dense(dropout_l4, num_outputs=2048, activation_fn=relu)
    dropout_l5 =dropout(dense_2)
    logit = dense(dropout_l5, num_outputs=output_dim, activation_fn=None)
    l_out = tf.nn.softmax(logit)

    return l_out,logit,l_trans1, loc_out
def model(inputs,
          dropout_keep_prob=0.5,
          num_classes=43,
          is_training=True,
          scope=''):
    """
    This is the implementation of the current model: 
        2DConvolution
        Inception module 
        Inceptiuon Module
        Max Pooling
        Fully Connected Layer, Relu, Xavier initialization
        Dropout
        Fully Connected Layer, Relu, Xavier initialization
        Dropout
        Fully Connected Layer, Relu, Xavier initialization
        Dropout
        Softmax

    `inputs` Input data 
    `dropout_keep_prob` : Float, The probability that each element is kept.
    `num_classes` : Integer, Number of data classes.
    `is_training` : Bool,  indicating whether or not the model is in training mode. 
                    If so, dropout is applied and values scaled. Otherwise, inputs is returned.
    `scope` : String, scope of the current model
    """

    with tf.name_scope(scope, "model", [inputs]):
        with ops.arg_scope([layers.max_pool2d], padding='SAME'):
            end_points['conv0'] = layers.conv2d(inputs,
                                                64, [7, 7],
                                                stride=2,
                                                scope='conv0')

            with tf.variable_scope("inception_3a"):
                end_points['inception_3a'] = get_inception_layer(
                    end_points['conv0'], 64, 96, 128, 16, 32, 32)

            with tf.variable_scope("inception_3b"):
                end_points['inception_3b'] = get_inception_layer(
                    end_points['inception_3a'], 128, 128, 192, 32, 96, 64)

            end_points['pool2'] = layers.max_pool2d(end_points['inception_3b'],
                                                    [3, 3],
                                                    scope='pool2')

            #print(end_points['pool2'].shape)

            end_points['reshape'] = tf.reshape(end_points['pool2'],
                                               [-1, 8 * 8 * 480])

            end_points['fully_2'] = layers.fully_connected(
                end_points['reshape'],
                200,
                activation_fn=tf.nn.relu,
                scope='fully_2')
            end_points['dropout1'] = layers.dropout(end_points['fully_2'],
                                                    dropout_keep_prob,
                                                    is_training=is_training)

            end_points['fully_3'] = layers.fully_connected(
                end_points['dropout1'],
                400,
                activation_fn=tf.nn.relu,
                scope='fully_3')
            end_points['dropout2'] = layers.dropout(end_points['fully_3'],
                                                    dropout_keep_prob,
                                                    is_training=is_training)

            end_points['fully_4'] = layers.fully_connected(
                end_points['dropout2'],
                300,
                activation_fn=tf.nn.relu,
                scope='fully_4')
            end_points['dropout3'] = layers.dropout(end_points['fully_4'],
                                                    dropout_keep_prob,
                                                    is_training=is_training)

            end_points['logits'] = layers.fully_connected(
                end_points['dropout3'],
                num_classes,
                activation_fn=None,
                scope='logits')
            end_points['predictions'] = tf.nn.softmax(end_points['logits'],
                                                      name='predictions')

    return end_points['logits'], end_points
Beispiel #31
0
def build_model(embedding, options):
    """ Builds the entire computational graph used for training
    """
    # description string: #words x #samples
    with tf.device('/gpu:0'):
        with tf.variable_scope('input'):
            x = tf.placeholder(tf.int64, shape=[None, None, None],
                               name='x')  # 3D vector batch,news and sequence(before embedding)40*32*13
            x_mask = tf.placeholder(tf.float32, shape=[None, None], name='x_mask')  # mask batch,news
            y = tf.placeholder(tf.int64, shape=[None], name='y')
            x_d1 = tf.placeholder(tf.int64, shape=[None, None, None, None], name='x_d1')
            x_d1_mask = tf.placeholder(tf.float32, shape=[None, None, None], name='x_d1_mask')
            x_d2 = tf.placeholder(tf.int64, shape=[None, None, None, None], name='x_d2')
            x_d2_mask = tf.placeholder(tf.float32, shape=[None, None, None], name='x_d2_mask')
            final_mask = tf.placeholder(tf.float32, shape=[None, None], name='final_mask')
            tech = tf.placeholder(tf.float32, shape=[None, None,7], name='technical') #shape is batch time unit
            # final_mask shape is day*n_samples
            ##TODO important    
            keep_prob = tf.placeholder(tf.float32, [], name='keep_prob')
            is_training = tf.placeholder(tf.bool, name='is_training')
            ##TODO important
            sequence_mask = tf.cast(tf.abs(tf.sign(x)), tf.float32)  # 3D
            sequence_d1_mask = tf.cast(tf.abs(tf.sign(x_d1)), tf.float32)  # 4D
            sequence_d2_mask = tf.cast(tf.abs(tf.sign(x_d2)), tf.float32)  # 4D
            n_timesteps = tf.shape(x)[0]  # time steps
            n_samples = tf.shape(x)[1]  # n samples
            # # word embedding
            ##TODO word embedding
            emb = tf.nn.embedding_lookup(embedding, x)
            emb_d1 = tf.nn.embedding_lookup(embedding, x_d1)
            emb_d2 = tf.nn.embedding_lookup(embedding, x_d2)
            '''if options['use_dropout']:
            emb = layers.dropout(emb, keep_prob=keep_prob, is_training=is_training)
            '''
    with tf.device('/gpu:0'):
        # fed into the input of BILSTM from the official document
        ##TODO word level LSTM
        with tf.name_scope('news'):
            att = news(emb, sequence_mask, x_mask, keep_prob, is_training, options)
        ##TODO att shape 32*600 att_day1 32*3*600 att_day2 32*4*600
        with tf.name_scope('day1'):
            att_day1 = days(emb_d1, sequence_d1_mask, x_d1_mask, keep_prob, is_training, options)
        # TODO bilstm layers
        # Change the time step and batch
    with tf.device('/gpu:0'):
        with tf.name_scope('day2'):
            att_day2 = days(emb_d2, sequence_d2_mask, x_d2_mask, keep_prob, is_training, options)
        with tf.name_scope('final'):
            final = tf.concat([att_day2, att_day1, tf.expand_dims(att, 1)], 1)
            '''if options['use_dropout']:
                final = layers.dropout(final, keep_prob=keep_prob, is_training=is_training)
            '''
            # final shape is 8*32*600
            if options['last_layer'] == 'LSTM':
                final = bilstm_filter(final, final_mask, keep_prob, prefix='day_lstm', dim=100,
                                    is_training=is_training)  # output shape: batch,time_step,2*lstm_unit(concate) 32*7*600
                #tech_ind = lstm_filter(tech, tf.ones(shape=[tf.shape(tech)[0],tf.shape(tech)[1]]), keep_prob, prefix='tech_lstm', dim=50,
                #                    is_training=is_training)
                ##TODO day level attention
                att_final = attention_v2(tf.concat(final, 2), final_mask, name='day_attention', keep=keep_prob,r=4,
                                is_training=is_training)  # already masked after attention
                ##TODO take day lstm average
                # att_final = tf.reduce_mean(tf.concat(final,2),1)
                # tech_att = tf.reduce_mean(tf.concat(tech_ind,2),1)
                ##TODO take the lasts
                #tech_att=tech_ind[:,-1,:]
                #att_final = tf.concat([att_final,tech_att],axis=1)
                logit = tf.layers.dense(att_final, 100, activation=tf.nn.tanh, use_bias=True,
                                        kernel_initializer=layers.xavier_initializer(uniform=True, seed=None,
                                                                                    dtype=tf.float32),
                                        name='ff', reuse=tf.AUTO_REUSE)
                # logit = tf.layers.batch_normalization(logit, training=is_training)
                # logit=tf.nn.tanh(logit)

                '''
                # logit1 = tf.reduce_sum(tf.concat(final,2) * tf.expand_dims(final_mask,-1),0) / tf.expand_dims(tf.reduce_sum(final_mask,0),1)
                # logit2 = tf.reduce_max(ctx3 * tf.expand_dims(x1_mask,2),0)
                '''
            if options['last_layer'] == 'CNN':
                att_ctx = tf.concat([att_day1, tf.expand_dims(att, 1)], 1)
                xavier = layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32)
                conv1 = tf.layers.conv1d(att_ctx, filters=options['CNN_filter'],
                                        kernel_size=options['CNN_kernel'], padding='same', strides=1,
                                        activation=tf.nn.relu, kernel_initializer=xavier, name='conv1')
                conv2 = tf.layers.conv1d(final, filters=options['CNN_filter'],
                                        kernel_size=options['CNN_kernel'], padding='same',
                                        strides=1, activation=tf.nn.relu,
                                        kernel_initializer=xavier,
                                        name='conv2')

                pool1 = tf.layers.max_pooling1d(conv1, pool_size=2, strides=2, padding='same',
                                                data_format='channels_last', name='pool1')
                pool2 = tf.layers.max_pooling1d(conv2, pool_size=2, strides=2, padding='same',
                                                data_format='channels_last', name='pool2')
                d1size = math.ceil(options['delay1'] / 2) * options['CNN_filter']
                d2size = math.ceil(options['delay2'] / 2) * options['CNN_filter']
                pool1_flat = tf.reshape(pool1, [-1, d1size])
                pool2_flat = tf.reshape(pool2, [-1, d2size])
                cnn_final = tf.concat([att, pool1_flat, pool2_flat], -1)
                logit = tf.layers.dense(cnn_final, 300, activation=tf.nn.tanh, use_bias=True,
                                        kernel_initializer=layers.xavier_initializer(uniform=True, seed=None,
                                                                                    dtype=tf.float32),
                                        name='ff', reuse=tf.AUTO_REUSE)
                # logit = tf.layers.batch_normalization(logit, training=is_training)
                # logit=tf.nn.tanh(logit)

            if options['use_dropout']:
                logit = layers.dropout(logit, keep_prob=keep_prob, is_training=is_training,seed=None)
            pred = tf.layers.dense(logit, 2, activation=None, use_bias=True,
                                kernel_initializer=layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32),
                                name='fout', reuse=tf.AUTO_REUSE)
            logger.info('Building f_cost...')
            # todo not same
            labels = tf.one_hot(y, depth=2, axis=1)
            # labels = y
            preds = tf.nn.softmax(pred, 1,name='softmax')
            # preds = tf.nn.sigmoid(pred)
            # pred=tf.reshape(pred,[-1])
            cost = tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=labels)
            # cost = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=labels,logits=pred),1)
            # cost = -tf.reduce_sum((tf.cast(labels, tf.float32) * tf.log(preds + 1e-8)),axis=1)
            #cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=y)
        logger.info('Done')
        '''
        logit1 = tf.reduce_sum(ctx1 * tf.expand_dims(x_mask, 2), 0) / tf.expand_dims(tf.reduce_sum(x_mask, 0), 1)
        logit2 = tf.reduce_max(ctx1 * tf.expand_dims(x_mask, 2), 0)
        logit = tf.concat([logit1, logit2], 1)
        '''

        with tf.variable_scope('logging'):
            tf.summary.scalar('current_cost', tf.reduce_mean(cost))
            tf.summary.histogram('predicted_value', preds)
            summary = tf.summary.merge_all()

    return is_training, cost, x, x_mask, y, n_timesteps, preds, summary
Beispiel #32
0
def _dnn_classifier_model_fn(features, targets, mode, params):
    """Deep Neural Net model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * hidden_units: List of hidden units per layer.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * activation_fn: Activation function applied to each layer. If `None`,
          will use `tf.nn.relu`.
      * dropout: When not `None`, the probability we will drop out a given
          coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
    hidden_units = params["hidden_units"]
    feature_columns = params["feature_columns"]
    n_classes = params["n_classes"]
    weight_column_name = params["weight_column_name"]
    optimizer = params["optimizer"]
    activation_fn = params["activation_fn"]
    dropout = params["dropout"]
    gradient_clip_norm = params["gradient_clip_norm"]
    enable_centered_bias = params["enable_centered_bias"]
    num_ps_replicas = params["num_ps_replicas"]

    features = _get_feature_dict(features)
    parent_scope = "dnn"
    num_label_columns = 1 if n_classes == 2 else n_classes
    if n_classes == 2:
        loss_fn = loss_ops.sigmoid_cross_entropy
    else:
        loss_fn = loss_ops.sparse_softmax_cross_entropy

    input_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas, min_slice_size=64 << 20))
    with variable_scope.variable_scope(
            parent_scope + "/input_from_feature_columns",
            values=features.values(),
            partitioner=input_layer_partitioner) as scope:
        net = layers.input_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            weight_collections=[parent_scope],
            scope=scope)

    hidden_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    for layer_id, num_hidden_units in enumerate(hidden_units):
        with variable_scope.variable_scope(
                parent_scope + "/hiddenlayer_%d" % layer_id,
                values=[net],
                partitioner=hidden_layer_partitioner) as scope:
            net = layers.fully_connected(net,
                                         num_hidden_units,
                                         activation_fn=activation_fn,
                                         variables_collections=[parent_scope],
                                         scope=scope)
            if dropout is not None and mode == estimator.ModeKeys.TRAIN:
                net = layers.dropout(net, keep_prob=(1.0 - dropout))
        _add_hidden_layer_summary(net, scope.name)

    with variable_scope.variable_scope(
            parent_scope + "/logits",
            values=[net],
            partitioner=hidden_layer_partitioner) as scope:
        logits = layers.fully_connected(net,
                                        num_label_columns,
                                        activation_fn=None,
                                        variables_collections=[parent_scope],
                                        scope=scope)
    _add_hidden_layer_summary(logits, scope.name)

    if enable_centered_bias:
        logits = nn.bias_add(logits, _centered_bias(num_label_columns))

    if mode == estimator.ModeKeys.TRAIN:
        loss = loss_fn(logits,
                       targets,
                       weight=_get_weight_tensor(features, weight_column_name))

        train_ops = [
            optimizers.optimize_loss(
                loss=loss,
                global_step=contrib_variables.get_global_step(),
                learning_rate=_LEARNING_RATE,
                optimizer=_get_optimizer(optimizer),
                clip_gradients=gradient_clip_norm,
                name=parent_scope)
        ]
        if enable_centered_bias:
            train_ops.append(
                _centered_bias_step(targets, loss_fn, num_label_columns))

        return None, loss, control_flow_ops.group(*train_ops)

    elif mode == estimator.ModeKeys.EVAL:
        predictions = _predictions(logits=logits, n_classes=n_classes)

        weight = _get_weight_tensor(features, weight_column_name)
        training_loss = loss_fn(logits, targets, weight=weight)
        loss = _rescale_eval_loss(training_loss, weight)

        return predictions, loss, []

    else:  # mode == estimator.ModeKeys.INFER:
        predictions = _predictions(logits=logits, n_classes=n_classes)

        return predictions, None, []
bn_params = {
    'is_training': train_mode,
    'decay': 0.9,
    'updates_collections': None
}

# We can build short code using 'arg_scope' to avoid duplicate code
# same function with different arguments
with arg_scope([fully_connected],
               activation_fn=tf.nn.relu,
               weights_initializer=xavier_init,
               biases_initializer=None,
               normalizer_fn=batch_norm,
               normalizer_params=bn_params):
    hidden_layer1 = fully_connected(X, hidden_output_size, scope="h1")
    h1_drop = dropout(hidden_layer1, keep_prob, is_training=train_mode)
    hidden_layer2 = fully_connected(h1_drop, hidden_output_size, scope="h2")
    h2_drop = dropout(hidden_layer2, keep_prob, is_training=train_mode)
    hidden_layer3 = fully_connected(h2_drop, hidden_output_size, scope="h3")
    h3_drop = dropout(hidden_layer3, keep_prob, is_training=train_mode)
    hidden_layer4 = fully_connected(h3_drop, hidden_output_size, scope="h4")
    h4_drop = dropout(hidden_layer4, keep_prob, is_training=train_mode)
    hypothesis = fully_connected(h4_drop,
                                 final_output_size,
                                 activation_fn=None,
                                 scope="hypothesis")

# define cost/loss & optimizer
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
Beispiel #34
0
                                     weights_regularizer=layers.l2_regularizer(1.0),
                                     biases_regularizer=layers.l2_regularizer(1.0),
                                     scope='stack3_Conv3')
 stack3_pool = layers.max_pool2d(stack3_conv3, 
                                 [2,2],
                                 padding='SAME',
                                 scope='stack3_Pool')'''
 stack3_pool_flat = layers.flatten(stack1_pool,
                                   scope='stack3_pool_flat')
 fcl1 = layers.fully_connected(
     stack3_pool_flat,
     512,
     weights_regularizer=layers.l2_regularizer(0.1),
     biases_regularizer=layers.l2_regularizer(0.1),
     scope='FCL1')
 fcl1_d = layers.dropout(fcl1, keep_prob=0.5, scope='dropout1')
 fcl2 = layers.fully_connected(
     fcl1_d,
     128,
     weights_regularizer=layers.l2_regularizer(0.1),
     biases_regularizer=layers.l2_regularizer(0.1),
     scope='FCL2')
 fcl2_d = layers.dropout(fcl2, keep_prob=0.5, scope='dropout2')
 y, cross_entropy = skflow.models.logistic_regression(fcl2_d,
                                                      y_,
                                                      init_stddev=0.01)
 '''train_op = tf.contrib.layers.optimize_loss(loss=cross_entropy, 
                                                global_step=global_step, 
                                                learning_rate=0.001, 
                                                optimizer='Adam', 
                                                clip_gradients=1, 
def _dnn_tree_combined_model_fn(
    features, labels, mode, head, dnn_hidden_units,
    dnn_feature_columns, tree_learner_config, num_trees,
    tree_examples_per_layer,
    config=None, dnn_optimizer="Adagrad",
    dnn_activation_fn=nn.relu, dnn_dropout=None,
    dnn_input_layer_partitioner=None,
    dnn_input_layer_to_tree=True, dnn_steps_to_train=10000,
    tree_feature_columns=None,
    tree_center_bias=True):
  """DNN and GBDT combined model_fn.

  Args:
    features: `dict` of `Tensor` objects.
    labels: Labels used to train on.
    mode: Mode we are in. (TRAIN/EVAL/INFER)
    head: A `Head` instance.
    dnn_hidden_units: List of hidden units per layer.
    dnn_feature_columns: An iterable containing all the feature columns
      used by the model's DNN.
    tree_learner_config: A config for the tree learner.
    num_trees: Number of trees to grow model to after training DNN.
    tree_examples_per_layer: Number of examples to accumulate before
      growing the tree a layer. This value has a big impact on model
      quality and should be set equal to the number of examples in
      training dataset if possible. It can also be a function that computes
      the number of examples based on the depth of the layer that's
      being built.
    config: `RunConfig` of the estimator.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN. If `None`, will use the Adagrad
      optimizer with default learning rate of 0.001.
    dnn_activation_fn: Activation function applied to each layer of the DNN.
      If `None`, will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability to drop out a given
      unit in the DNN.
    dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
      Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    dnn_input_layer_to_tree: Whether to provide the DNN's input layer
    as a feature to the tree.
    dnn_steps_to_train: Number of steps to train dnn for before switching
      to gbdt.
    tree_feature_columns: An iterable containing all the feature columns
      used by the model's boosted trees. If dnn_input_layer_to_tree is
      set to True, these features are in addition to dnn_feature_columns.
    tree_center_bias: Whether a separate tree should be created for
      first fitting the bias.

  Returns:
    A `ModelFnOps` object.
  Raises:
    ValueError: if inputs are not valid.
  """
  if not isinstance(features, dict):
    raise ValueError("features should be a dictionary of `Tensor`s. "
                     "Given type: {}".format(type(features)))

  if not dnn_feature_columns:
    raise ValueError("dnn_feature_columns must be specified")

  # Build DNN Logits.
  dnn_parent_scope = "dnn"
  dnn_partitioner = dnn_input_layer_partitioner or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=config.num_ps_replicas,
          min_slice_size=64 << 20))

  with variable_scope.variable_scope(
      dnn_parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=dnn_partitioner):

    with variable_scope.variable_scope(
        "input_from_feature_columns",
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner) as input_layer_scope:
      input_layer = layers.input_from_feature_columns(
          columns_to_tensors=features,
          feature_columns=dnn_feature_columns,
          weight_collections=[dnn_parent_scope],
          scope=input_layer_scope)
    previous_layer = input_layer
    for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
      with variable_scope.variable_scope(
          "hiddenlayer_%d" % layer_id,
          values=(previous_layer,)) as hidden_layer_scope:
        net = layers.fully_connected(
            previous_layer,
            num_hidden_units,
            activation_fn=dnn_activation_fn,
            variables_collections=[dnn_parent_scope],
            scope=hidden_layer_scope)
        if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
          net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
      _add_hidden_layer_summary(net, hidden_layer_scope.name)
      previous_layer = net
    with variable_scope.variable_scope(
        "logits",
        values=(previous_layer,)) as logits_scope:
      dnn_logits = layers.fully_connected(
          previous_layer,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[dnn_parent_scope],
          scope=logits_scope)
    _add_hidden_layer_summary(dnn_logits, logits_scope.name)

    def _dnn_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizers.optimize_loss(
          loss=loss,
          global_step=training_util.get_global_step(),
          learning_rate=_DNN_LEARNING_RATE,
          optimizer=_get_optimizer(dnn_optimizer),
          name=dnn_parent_scope,
          variables=ops.get_collection(
              ops.GraphKeys.TRAINABLE_VARIABLES,
              scope=dnn_parent_scope),
          # Empty summaries to prevent optimizers from logging training_loss.
          summaries=[])

  # Build Tree Logits.
  global_step = training_util.get_global_step()
  with ops.device(global_step.device):
    ensemble_handle = model_ops.tree_ensemble_variable(
        stamp_token=0,
        tree_ensemble_config="",  # Initialize an empty ensemble.
        name="ensemble_model")

  tree_features = features.copy()
  if dnn_input_layer_to_tree:
    tree_features["dnn_input_layer"] = input_layer
    tree_feature_columns.append(layers.real_valued_column("dnn_input_layer"))
  gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
      is_chief=config.is_chief,
      num_ps_replicas=config.num_ps_replicas,
      ensemble_handle=ensemble_handle,
      center_bias=tree_center_bias,
      examples_per_layer=tree_examples_per_layer,
      learner_config=tree_learner_config,
      feature_columns=tree_feature_columns,
      logits_dimension=head.logits_dimension,
      features=tree_features)

  with ops.name_scope("gbdt"):
    predictions_dict = gbdt_model.predict(mode)
    tree_logits = predictions_dict["predictions"]

    def _tree_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      update_op = gbdt_model.train(loss, predictions_dict, labels)
      with ops.control_dependencies(
          [update_op]), (ops.colocate_with(global_step)):
        update_op = state_ops.assign_add(global_step, 1).op
        return update_op

  tree_train_logits = dnn_logits + tree_logits

  def _no_train_op_fn(loss):
    """Returns a no-op."""
    del loss
    return control_flow_ops.no_op()

  model_fn_ops = head.create_model_fn_ops(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_no_train_op_fn,
      logits=tree_train_logits)
  dnn_train_op = head.create_model_fn_ops(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_dnn_train_op_fn,
      logits=dnn_logits).train_op
  tree_train_op = head.create_model_fn_ops(
      features=tree_features,
      mode=mode,
      labels=labels,
      train_op_fn=_tree_train_op_fn,
      logits=tree_train_logits).train_op

  if tree_center_bias:
    num_trees += 1
  finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor()

  model_fn_ops.training_hooks.extend([
      trainer_hooks.SwitchTrainOp(
          dnn_train_op, dnn_steps_to_train, tree_train_op),
      trainer_hooks.StopAfterNTrees(
          num_trees, attempted_trees, finalized_trees)])

  return model_fn_ops
Beispiel #36
0
    def __init__(self,
                 num_classes=20,
                 pretrained_embed=None,
                 embedding_size=100,
                 hidden_size=64,
                 dropout_keep_proba=0.8,
                 max_word_num=200,
                 train_embed=True):

        self.num_classes = int(num_classes)
        self.embedding_size = int(embedding_size)
        self.pretrained_embed = pretrained_embed  # [vocab_size, embedding_size]
        self.hidden_size = int(hidden_size)
        self.dropout_keep_proba = dropout_keep_proba
        self.max_word_num = int(max_word_num)
        self.train_embed = train_embed

        with tf.variable_scope('placeholder'):
            self.input_x = tf.placeholder(dtype=tf.int32,
                                          shape=[None, self.max_word_num],
                                          name='input_x_rnn')
            if self.num_classes > 0:
                self.input_y = tf.placeholder(dtype=tf.float32,
                                              shape=[None, self.num_classes],
                                              name='input_y_label')
            else:
                self.input_y = tf.placeholder(dtype=tf.float32,
                                              shape=[
                                                  None,
                                              ],
                                              name='input_y_label')
            self.is_training = tf.placeholder(dtype=tf.bool,
                                              name='is_training')

        with tf.variable_scope("word_embedding"):
            word_embedding_valid = tf.Variable(
                initial_value=self.pretrained_embed,
                trainable=self.train_embed,
                dtype=tf.float32)
            word_embedding_pad = tf.constant(value=0,
                                             dtype=tf.float32,
                                             shape=[1, self.embedding_size])
            self.word_embedding_mat = tf.concat(
                [word_embedding_pad, word_embedding_valid], axis=0)
            #shape: [batch_size, max_word_num, embedding_size]
            self.embedded_input = tf.nn.embedding_lookup(
                self.word_embedding_mat, self.input_x)

        with tf.variable_scope("doc2vec"):
            # doc_encoded: [batch_size, max_word_num, hidden_size*2]
            doc_encoded = self.BidirectionalGRUEncoder(self.embedded_input,
                                                       self.hidden_size,
                                                       name='bi-gru')
            print("bi-GRU out shape: ", doc_encoded.shape)
            # doc_vec: [batch_size, hidden_size*2]
            doc_vec, self.weights = self.AttentionLayer(doc_encoded,
                                                        self.hidden_size,
                                                        name='attention')
            print("attention out shape: ", doc_vec.shape)
            doc_vec_dropped = layers.dropout(doc_vec,
                                             keep_prob=self.dropout_keep_proba,
                                             is_training=self.is_training)
            if self.num_classes > 0:
                out = layers.fully_connected(inputs=doc_vec_dropped,
                                             num_outputs=self.num_classes,
                                             activation_fn=None)
            else:
                out = layers.fully_connected(inputs=doc_vec_dropped,
                                             num_outputs=1,
                                             activation_fn=None)
            print("logit shape: ", out.shape)

        if self.num_classes > 0:
            with tf.variable_scope('cross_entro_loss'):
                # cross-entropy loss
                self.cross_entro = tf.losses.softmax_cross_entropy(
                    onehot_labels=self.input_y,
                    logits=out,
                    reduction=tf.losses.Reduction.MEAN)
        else:
            with tf.variable_scope('mse_loss'):
                # mse loss
                self.mse = tf.losses.mean_squared_error(
                    labels=self.input_y,
                    predictions=tf.squeeze(out),
                    reduction=tf.losses.Reduction.MEAN)

        self.predict = tf.argmax(out, axis=1, name='predict')

        if self.num_classes > 0:
            with tf.variable_scope('accuracy'):
                self.label = tf.argmax(self.input_y, axis=1, name='label')
                self.acc = tf.reduce_mean(
                    tf.cast(tf.equal(self.predict, self.label), tf.float32))
def _dnn_linear_combined_model_fn(features, labels, mode, params):
  """Deep Neural Net and Linear combined model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * linear_feature_columns: An iterable containing all the feature columns
          used by the Linear model.
      * linear_optimizer: string, `Optimizer` object, or callable that defines
          the optimizer to use for training the Linear model.
      * joint_linear_weights: If True a single (possibly partitioned) variable
          will be used to store the linear model weights. It's faster, but
          requires all columns are sparse and have the 'sum' combiner.
      * dnn_feature_columns: An iterable containing all the feature columns used
          by the DNN model.
      * dnn_optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training the DNN model.
      * dnn_hidden_units: List of hidden units per DNN layer.
      * dnn_activation_fn: Activation function applied to each DNN layer. If
          `None`, will use `tf.nn.relu`.
      * dnn_dropout: When not `None`, the probability we will drop out a given
          DNN coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * num_ps_replicas: The number of parameter server replicas.

  Returns:
    `estimator.ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time.
  """
  head = params["head"]
  linear_feature_columns = params.get("linear_feature_columns")
  linear_optimizer = params.get("linear_optimizer")
  joint_linear_weights = params.get("joint_linear_weights")
  dnn_feature_columns = params.get("dnn_feature_columns")
  dnn_optimizer = params.get("dnn_optimizer")
  dnn_hidden_units = params.get("dnn_hidden_units")
  dnn_activation_fn = params.get("dnn_activation_fn")
  dnn_dropout = params.get("dnn_dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  num_ps_replicas = params["num_ps_replicas"]

  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        "Either linear_feature_columns or dnn_feature_columns must be defined.")

  features = _get_feature_dict(features)

  # Build DNN Logits.
  dnn_parent_scope = "dnn"

  if not dnn_feature_columns:
    dnn_logits = None
  else:
    input_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas,
            min_slice_size=64 << 20))
    with variable_scope.variable_scope(
        dnn_parent_scope + "/input_from_feature_columns",
        values=features.values(),
        partitioner=input_layer_partitioner) as scope:
      net = layers.input_from_feature_columns(
          columns_to_tensors=features,
          feature_columns=dnn_feature_columns,
          weight_collections=[dnn_parent_scope],
          scope=scope)

    hidden_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
      with variable_scope.variable_scope(
          dnn_parent_scope + "/hiddenlayer_%d" % layer_id,
          values=[net],
          partitioner=hidden_layer_partitioner) as scope:
        net = layers.fully_connected(
            net,
            num_hidden_units,
            activation_fn=dnn_activation_fn,
            variables_collections=[dnn_parent_scope],
            scope=scope)
        if dnn_dropout is not None and mode == estimator.ModeKeys.TRAIN:
          net = layers.dropout(
              net,
              keep_prob=(1.0 - dnn_dropout))
      # TODO(b/31209633): Consider adding summary before dropout.
      _add_hidden_layer_summary(net, scope.name)

    with variable_scope.variable_scope(
        dnn_parent_scope + "/logits",
        values=[net],
        partitioner=hidden_layer_partitioner) as scope:
      dnn_logits = layers.fully_connected(
          net,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[dnn_parent_scope],
          scope=scope)
    _add_hidden_layer_summary(dnn_logits, scope.name)

  # Build Linear logits.
  linear_parent_scope = "linear"

  if not linear_feature_columns:
    linear_logits = None
  else:
    linear_partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas,
        min_slice_size=64 << 20)
    with variable_scope.variable_scope(
        linear_parent_scope,
        values=features.values(),
        partitioner=linear_partitioner) as scope:
      if joint_linear_weights:
        linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=linear_feature_columns,
            num_outputs=head.logits_dimension,
            weight_collections=[linear_parent_scope],
            scope=scope)
      else:
        linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=linear_feature_columns,
            num_outputs=head.logits_dimension,
            weight_collections=[linear_parent_scope],
            scope=scope)

  # Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

  def _make_training_op(training_loss):
    """Training op for the DNN linear combined model."""
    train_ops = []
    if dnn_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=contrib_variables.get_global_step(),
              learning_rate=_DNN_LEARNING_RATE,
              optimizer=_get_optimizer(dnn_optimizer),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(dnn_parent_scope),
              name=dnn_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[]))
    if linear_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=contrib_variables.get_global_step(),
              learning_rate=_linear_learning_rate(len(linear_feature_columns)),
              optimizer=_get_optimizer(linear_optimizer),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(linear_parent_scope),
              name=linear_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[]))

    return control_flow_ops.group(*train_ops)

  return head.head_ops(
      features, labels, mode, _make_training_op, logits=logits)
Beispiel #38
0
def build_model(embedding, options):
    """ Builds the entire computational graph used for training
    """
    # description string: #words x #samples
    with tf.device('/gpu:0'):
        with tf.variable_scope('input'):
            x = tf.placeholder(
                tf.int64, shape=[None, None, None], name='x'
            )  # 3D vector batch,N and instances(before embedding)40*32*13
            x_mask = tf.placeholder(tf.float32,
                                    shape=[None, None],
                                    name='x_mask')  # mask batch,N
            y = tf.placeholder(tf.int64, shape=[None], name='y')  #group actual
            tech = tf.placeholder(tf.float32,
                                  shape=[None, None, 7],
                                  name='technical')  #shape is batch time unit
            ##TODO important
            keep_prob = tf.placeholder(tf.float32, [], name='keep_prob')
            is_training = tf.placeholder(tf.bool, name='is_training')
            #alpha_balance = tf.placeholder(tf.float32,[],name = 'alpha_balance')
            ##TODO important
            sequence_mask = tf.cast(tf.abs(tf.sign(x)), tf.float32)  # 3D
            n_timesteps = tf.shape(x)[0]  # time steps
            ##TODO word embedding
            emb = tf.nn.embedding_lookup(embedding, x)

    with tf.device('/gpu:0'):
        # fed into the input of BILSTM from the official document
        with tf.name_scope('sentence_enc'):
            batch = tf.shape(emb)[0]  #32
            N = tf.shape(emb)[1]  #40 N instances in a group
            word = tf.shape(emb)[2]  #13
            ##TODO make instances prediction through attention encoding and MLP
            with tf.variable_scope(name_or_scope='sentence_enc',
                                   reuse=tf.AUTO_REUSE):
                word_level_inputs = tf.reshape(
                    emb, [batch * N, word, options['dim_word']])
                word_level_mask = tf.reshape(sequence_mask, [batch * N, word])
                ##TODO word level LSTM
                word_encoder_out = bilstm_filter(
                    word_level_inputs,
                    word_level_mask,
                    keep_prob,
                    prefix='sequence_encode',
                    dim=options['dim'],
                    is_training=is_training
                )  # output shape: batch*news,sequence,2*lstm_units(32*40)*12*600
                word_encoder_out = tf.concat(
                    word_encoder_out, 2) * tf.expand_dims(word_level_mask, -1)
                ################################### TODO word-attention
                word_level_output = attention_v2(word_encoder_out,
                                                 word_level_mask,
                                                 name='word_attention',
                                                 keep=keep_prob,
                                                 r=10,
                                                 is_training=is_training)

                if options['use_dropout']:
                    word_level_output = layers.dropout(word_level_output,
                                                       keep_prob=keep_prob,
                                                       is_training=is_training,
                                                       seed=None)
                #32*N,D

                att = tf.reshape(word_level_output,
                                 [batch, N, 2 * options['dim']])
                ##TODO att shape 32*40*200
        with tf.name_scope('instance_prediction'):
            temp = tf.layers.dense(
                word_level_output,
                150,
                activation=tf.nn.tanh,
                use_bias=True,
                kernel_initializer=layers.xavier_initializer(uniform=True,
                                                             seed=None,
                                                             dtype=tf.float32),
                name='inst_temp',
                reuse=tf.AUTO_REUSE)
            if options['use_dropout']:
                temp = layers.dropout(temp,
                                      keep_prob=keep_prob,
                                      is_training=is_training,
                                      seed=None)

            pred_sig_ = tf.layers.dense(
                temp,
                1,
                activation=None,
                use_bias=True,
                kernel_initializer=layers.xavier_initializer(uniform=True,
                                                             seed=None,
                                                             dtype=tf.float32),
                name='inst_pred',
                reuse=tf.AUTO_REUSE)
            inst_pred = tf.nn.tanh(pred_sig_)  #32*N,1 NOT 32,N,1, float32
            ##tf.sigmoid 改成 tf.relu
            L = tf.reshape(inst_pred, [batch, N, 1])
            L_input = L * tf.expand_dims(x_mask, -1)  # mask before attention
            #coef = tf.concat([L for i in range(2*options['dim'])], 2)
            coef = tf.concat([L_input for i in range(2 * options['dim'])], 2)

        with tf.name_scope('Group_prediction'):
            bag_repre = tf.multiply(coef, att)
            bag_repre = tf.reduce_mean(bag_repre, axis=1)  #32,200
            tech_ind = lstm_filter(
                tech,
                tf.ones(shape=[tf.shape(tech)[0],
                               tf.shape(tech)[1]]),
                keep_prob,
                prefix='tech_lstm',
                dim=50,
                is_training=is_training)  #32,N,50
            #TODO take day lstm average
            tech_att = tf.reduce_mean(tf.concat(tech_ind, 2), 1)  #32,50
            bag_repre = tf.concat([bag_repre, tech_att], axis=1)  #32,250
            ##TODO take the lasts
            #tech_att=tech_ind[:,-1,:]
            logit = tf.layers.dense(
                bag_repre,
                300,
                activation=tf.nn.tanh,
                use_bias=True,
                kernel_initializer=layers.xavier_initializer(uniform=True,
                                                             seed=None,
                                                             dtype=tf.float32),
                name='group_mil',
                reuse=tf.AUTO_REUSE)

            ##TODO new cost
            logger.info('Building f_cost...')
            """x_simil = Euclidean_distance(att)  #32,N,N   有placeholder
            l_diff = instance_diff(L) #32,N,N   有placeholder
            simil_cost = tf.reduce_sum(tf.multiply(x_simil,l_diff),[1,2])/tf.cast(N*N,tf.float32)  #32,
            group_cost = tf.cast(tf.square(y-group_pred),tf.float32)  #32
            # cost由int64变为float32
            total_cost = simil_cost + alpha_balance * group_cost  #[32,1]
            cost = tf.reshape(total_cost,(1,-1))  #1,32"""
            group_ = tf.layers.dense(
                logit,
                2,
                activation=None,
                use_bias=True,
                kernel_initializer=layers.xavier_initializer(uniform=True,
                                                             seed=None,
                                                             dtype=tf.float32),
                name='fout',
                reuse=tf.AUTO_REUSE)  #32,2
            labels = tf.one_hot(y, depth=2, axis=1)  #32,2
            group_pred = tf.nn.softmax(group_, 1, name='softmax')  #32,2
            cost = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=group_, labels=labels)  #1,32"""
            """pred = tf.layers.dense(logit, 2, activation=None, use_bias=True,
                                kernel_initializer=layers.xavier_initializer(uniform=True, seed=None, dtype=tf.float32),
                                name='fout', reuse=tf.AUTO_REUSE)#32,2
            labels = tf.one_hot(y, depth=2, axis=1)#32,2
            preds = tf.nn.softmax(pred, 1,name='softmax')  #32,2
            cost = tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=labels)  #1,32"""

        logger.info('Done')

        with tf.variable_scope('logging'):
            tf.summary.scalar('current_cost', tf.reduce_mean(cost))
            tf.summary.histogram('predicted_value', group_pred)
            summary = tf.summary.merge_all()

    return is_training, cost, x, x_mask, y, n_timesteps, group_pred, summary
def get_conv_model(features, labels, mode, params):
    parent_scope = "cnn"  # TODO Need to have two: one for expand, one for conquer

    # features = _get_feature_dict(features)
    head = params.get("head")
    feature_columns = params.get("feature_columns")
    activation_fn = params.get("activation_fn")
    dropout = params.get("dropout")
    learning_rate = params.get("learning_rate")
    optimizer = params.get("optimizer")

    # with variable_scope.variable_scope(
    #                 parent_scope + "/input_from_feature_columns",
    #         values=features.values()) as scope:
    #     net = layers.input_from_feature_columns(
    #         columns_to_tensors=features,
    #         feature_columns=feature_columns,
    #         weight_collections=[parent_scope],
    #         scope=scope)

    with variable_scope.variable_scope(
                    parent_scope + "/convlayer_1",
            values=[features]) as scope:
        net = layers.conv2d(features,
                            num_outputs=32,
                            kernel_size=3,
                            variables_collections=[parent_scope],
                            scope=scope)
        net = layers.max_pool2d(net, 2,
                                stride=1,
                                padding='SAME')

    with variable_scope.variable_scope(
                    parent_scope + "/convlayer_2",
            values=[features]) as scope:
        net = layers.conv2d(features,
                            num_outputs=64,
                            kernel_size=5,
                            padding='VALID',
                            variables_collections=[parent_scope],
                            scope=scope)
        # net = layers.max_pool2d(net, 1,
        #                         stride=1,
        #                         padding='SAME')
    #
    # with variable_scope.variable_scope(
    #                 parent_scope + "/max_pool_1",
    #         values=[net]) as scope:

    shape = net.get_shape()
    net = tf.reshape(net, [-1, shape[3].value], name="reshape_1")

    hidden_units = [256, 128]
    for layer_id, num_hidden_units in enumerate(hidden_units):
        with variable_scope.variable_scope(
                        parent_scope + "/hiddenlayer_%d" % layer_id,
                values=[net]) as scope:
            net = layers.fully_connected(
                net,
                num_hidden_units,
                activation_fn=activation_fn,
                variables_collections=[parent_scope],
                scope=scope)
            if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
                net = layers.dropout(
                    net,
                    keep_prob=(1.0 - dropout))

    with variable_scope.variable_scope(
                    parent_scope + "/logits",
            values=[net]) as scope:
        logits = layers.fully_connected(
            net,
            head.logits_dimension,
            activation_fn=None,
            variables_collections=[parent_scope],
            scope=scope)

    def _train_op_fn(loss):
        """Returns the op to optimize the loss."""
        return optimizers.optimize_loss(
            loss=loss,
            global_step=contrib_variables.get_global_step(),
            learning_rate=learning_rate,
            optimizer=optimizer,
            name=parent_scope,
            # Empty summaries to prevent optimizers from logging the training_loss.
            summaries=[])

    return head.head_ops(features, labels, mode, _train_op_fn, logits)
def _dnn_tree_combined_model_fn(
    features,
    labels,
    mode,
    head,
    dnn_hidden_units,
    dnn_feature_columns,
    tree_learner_config,
    num_trees,
    tree_examples_per_layer,
    config=None,
    dnn_optimizer="Adagrad",
    dnn_activation_fn=nn.relu,
    dnn_dropout=None,
    dnn_input_layer_partitioner=None,
    dnn_input_layer_to_tree=True,
    dnn_steps_to_train=10000,
    predict_with_tree_only=False,
    tree_feature_columns=None,
    tree_center_bias=False,
    dnn_to_tree_distillation_param=None,
    use_core_versions=False,
    output_type=model.ModelBuilderOutputType.MODEL_FN_OPS):
  """DNN and GBDT combined model_fn.

  Args:
    features: `dict` of `Tensor` objects.
    labels: Labels used to train on.
    mode: Mode we are in. (TRAIN/EVAL/INFER)
    head: A `Head` instance.
    dnn_hidden_units: List of hidden units per layer.
    dnn_feature_columns: An iterable containing all the feature columns
      used by the model's DNN.
    tree_learner_config: A config for the tree learner.
    num_trees: Number of trees to grow model to after training DNN.
    tree_examples_per_layer: Number of examples to accumulate before
      growing the tree a layer. This value has a big impact on model
      quality and should be set equal to the number of examples in
      training dataset if possible. It can also be a function that computes
      the number of examples based on the depth of the layer that's
      being built.
    config: `RunConfig` of the estimator.
    dnn_optimizer: string, `Optimizer` object, or callable that defines the
      optimizer to use for training the DNN. If `None`, will use the Adagrad
      optimizer with default learning rate of 0.001.
    dnn_activation_fn: Activation function applied to each layer of the DNN.
      If `None`, will use `tf.nn.relu`.
    dnn_dropout: When not `None`, the probability to drop out a given
      unit in the DNN.
    dnn_input_layer_partitioner: Partitioner for input layer of the DNN.
      Defaults to `min_max_variable_partitioner` with `min_slice_size` 64 << 20.
    dnn_input_layer_to_tree: Whether to provide the DNN's input layer
    as a feature to the tree.
    dnn_steps_to_train: Number of steps to train dnn for before switching
      to gbdt.
    predict_with_tree_only: Whether to use only the tree model output as the
      final prediction.
    tree_feature_columns: An iterable containing all the feature columns
      used by the model's boosted trees. If dnn_input_layer_to_tree is
      set to True, these features are in addition to dnn_feature_columns.
    tree_center_bias: Whether a separate tree should be created for
      first fitting the bias.
    dnn_to_tree_distillation_param: A Tuple of (float, loss_fn), where the
      float defines the weight of the distillation loss, and the loss_fn, for
      computing distillation loss, takes dnn_logits, tree_logits and weight
      tensor. If the entire tuple is None, no distillation will be applied. If
      only the loss_fn is None, we will take the sigmoid/softmax cross entropy
      loss be default. When distillation is applied, `predict_with_tree_only`
      will be set to True.
    use_core_versions: Whether feature columns and loss are from the core (as
      opposed to contrib) version of tensorflow.

  Returns:
    A `ModelFnOps` object.
  Raises:
    ValueError: if inputs are not valid.
  """
  if not isinstance(features, dict):
    raise ValueError("features should be a dictionary of `Tensor`s. "
                     "Given type: {}".format(type(features)))

  if not dnn_feature_columns:
    raise ValueError("dnn_feature_columns must be specified")

  if dnn_to_tree_distillation_param:
    if not predict_with_tree_only:
      logging.warning("update predict_with_tree_only to True since distillation"
                      "is specified.")
      predict_with_tree_only = True

  # Build DNN Logits.
  dnn_parent_scope = "dnn"
  dnn_partitioner = dnn_input_layer_partitioner or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=config.num_ps_replicas, min_slice_size=64 << 20))

  if (output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC and
      not use_core_versions):
    raise ValueError("You must use core versions with Estimator Spec")

  with variable_scope.variable_scope(
      dnn_parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=dnn_partitioner):

    with variable_scope.variable_scope(
        "input_from_feature_columns",
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner) as input_layer_scope:
      if use_core_versions:
        input_layer = feature_column_lib.input_layer(
            features=features,
            feature_columns=dnn_feature_columns,
            weight_collections=[dnn_parent_scope])
      else:
        input_layer = layers.input_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=dnn_feature_columns,
            weight_collections=[dnn_parent_scope],
            scope=input_layer_scope)
    previous_layer = input_layer
    for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
      with variable_scope.variable_scope(
          "hiddenlayer_%d" % layer_id,
          values=(previous_layer,)) as hidden_layer_scope:
        net = layers.fully_connected(
            previous_layer,
            num_hidden_units,
            activation_fn=dnn_activation_fn,
            variables_collections=[dnn_parent_scope],
            scope=hidden_layer_scope)
        if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
          net = layers.dropout(net, keep_prob=(1.0 - dnn_dropout))
      _add_hidden_layer_summary(net, hidden_layer_scope.name)
      previous_layer = net
    with variable_scope.variable_scope(
        "logits", values=(previous_layer,)) as logits_scope:
      dnn_logits = layers.fully_connected(
          previous_layer,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[dnn_parent_scope],
          scope=logits_scope)
    _add_hidden_layer_summary(dnn_logits, logits_scope.name)

    def _dnn_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizers.optimize_loss(
          loss=loss,
          global_step=training_util.get_global_step(),
          learning_rate=_DNN_LEARNING_RATE,
          optimizer=_get_optimizer(dnn_optimizer),
          name=dnn_parent_scope,
          variables=ops.get_collection(
              ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope),
          # Empty summaries to prevent optimizers from logging training_loss.
          summaries=[])

  # Build Tree Logits.
  global_step = training_util.get_global_step()
  with ops.device(global_step.device):
    ensemble_handle = model_ops.tree_ensemble_variable(
        stamp_token=0,
        tree_ensemble_config="",  # Initialize an empty ensemble.
        name="ensemble_model")

  tree_features = features.copy()
  if dnn_input_layer_to_tree:
    tree_features["dnn_input_layer"] = input_layer
    tree_feature_columns.append(layers.real_valued_column("dnn_input_layer"))
  gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
      is_chief=config.is_chief,
      num_ps_replicas=config.num_ps_replicas,
      ensemble_handle=ensemble_handle,
      center_bias=tree_center_bias,
      examples_per_layer=tree_examples_per_layer,
      learner_config=tree_learner_config,
      feature_columns=tree_feature_columns,
      logits_dimension=head.logits_dimension,
      features=tree_features,
      use_core_columns=use_core_versions)

  with ops.name_scope("gbdt"):
    predictions_dict = gbdt_model.predict(mode)
    tree_logits = predictions_dict["predictions"]

    def _tree_train_op_fn(loss):
      """Returns the op to optimize the loss."""
      if dnn_to_tree_distillation_param:
        loss_weight, loss_fn = dnn_to_tree_distillation_param
        weight_tensor = head_lib._weight_tensor(  # pylint: disable=protected-access
            features, head.weight_column_name)
        dnn_logits_fixed = array_ops.stop_gradient(dnn_logits)

        if loss_fn is None:
          # we create the loss_fn similar to the head loss_fn for
          # multi_class_head used previously as the default one.
          n_classes = 2 if head.logits_dimension == 1 else head.logits_dimension
          loss_fn = distillation_loss.create_dnn_to_tree_cross_entropy_loss_fn(
              n_classes)

        dnn_to_tree_distillation_loss = loss_weight * loss_fn(
            dnn_logits_fixed, tree_logits, weight_tensor)
        summary.scalar("dnn_to_tree_distillation_loss",
                       dnn_to_tree_distillation_loss)
        loss += dnn_to_tree_distillation_loss

      update_op = gbdt_model.train(loss, predictions_dict, labels)
      with ops.control_dependencies(
          [update_op]), (ops.colocate_with(global_step)):
        update_op = state_ops.assign_add(global_step, 1).op
        return update_op

  if predict_with_tree_only:
    if mode == model_fn.ModeKeys.TRAIN or mode == model_fn.ModeKeys.INFER:
      tree_train_logits = tree_logits
    else:
      tree_train_logits = control_flow_ops.cond(
          global_step > dnn_steps_to_train,
          lambda: tree_logits,
          lambda: dnn_logits)
  else:
    tree_train_logits = dnn_logits + tree_logits

  def _no_train_op_fn(loss):
    """Returns a no-op."""
    del loss
    return control_flow_ops.no_op()

  if tree_center_bias:
    num_trees += 1
  finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor()

  if output_type == model.ModelBuilderOutputType.MODEL_FN_OPS:
    if use_core_versions:
      model_fn_ops = head.create_estimator_spec(
          features=features,
          mode=mode,
          labels=labels,
          train_op_fn=_no_train_op_fn,
          logits=tree_train_logits)
      dnn_train_op = head.create_estimator_spec(
          features=features,
          mode=mode,
          labels=labels,
          train_op_fn=_dnn_train_op_fn,
          logits=dnn_logits)
      dnn_train_op = estimator_utils.estimator_spec_to_model_fn_ops(
          dnn_train_op).train_op

      tree_train_op = head.create_estimator_spec(
          features=tree_features,
          mode=mode,
          labels=labels,
          train_op_fn=_tree_train_op_fn,
          logits=tree_train_logits)
      tree_train_op = estimator_utils.estimator_spec_to_model_fn_ops(
          tree_train_op).train_op

      model_fn_ops = estimator_utils.estimator_spec_to_model_fn_ops(
          model_fn_ops)
    else:
      model_fn_ops = head.create_model_fn_ops(
          features=features,
          mode=mode,
          labels=labels,
          train_op_fn=_no_train_op_fn,
          logits=tree_train_logits)
      dnn_train_op = head.create_model_fn_ops(
          features=features,
          mode=mode,
          labels=labels,
          train_op_fn=_dnn_train_op_fn,
          logits=dnn_logits).train_op
      tree_train_op = head.create_model_fn_ops(
          features=tree_features,
          mode=mode,
          labels=labels,
          train_op_fn=_tree_train_op_fn,
          logits=tree_train_logits).train_op

    # Add the hooks
    model_fn_ops.training_hooks.extend([
        trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train,
                                    tree_train_op),
        trainer_hooks.StopAfterNTrees(num_trees, attempted_trees,
                                      finalized_trees)
    ])
    return model_fn_ops

  elif output_type == model.ModelBuilderOutputType.ESTIMATOR_SPEC:
    fusion_spec = head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_no_train_op_fn,
        logits=tree_train_logits)
    dnn_spec = head.create_estimator_spec(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_dnn_train_op_fn,
        logits=dnn_logits)
    tree_spec = head.create_estimator_spec(
        features=tree_features,
        mode=mode,
        labels=labels,
        train_op_fn=_tree_train_op_fn,
        logits=tree_train_logits)

    training_hooks = [
        trainer_hooks.SwitchTrainOp(dnn_spec.train_op, dnn_steps_to_train,
                                    tree_spec.train_op),
        trainer_hooks.StopAfterNTrees(num_trees, attempted_trees,
                                      finalized_trees)
    ]
    fusion_spec = fusion_spec._replace(training_hooks=training_hooks +
                                       list(fusion_spec.training_hooks))
    return fusion_spec
Beispiel #41
0
    def _init_network(self):
        """Defines the tensorflow network."""
        # Placeholders for dataset
        self.state_data = tf.placeholder(tf.float32, (None, None, self.dX))
        self.K_data = tf.placeholder(tf.float32, (None, self.dU, self.dX))
        self.k_data = tf.placeholder(tf.float32, (None, self.dU))
        self.precision_data = tf.placeholder(tf.float32, (None, self.dU, self.dU))
        dataset = tf.data.Dataset.from_tensor_slices(
            (
                self.state_data,
                self.K_data,
                self.k_data,
                self.precision_data,
            )
        ).shuffle(10000).batch(self.batch_size).repeat()

        # Batch iterator
        self.iterator = dataset.make_initializable_iterator()
        state_batch, self.K_batch, self.k_batch, self.precision_batch = self.iterator.get_next()

        # Compose and normalize state batch
        state_batch = tf.concat(
            values=[
                state_batch,
            ], axis=1
        )

        # Other placeholders
        self.state_batch = tf.reshape(state_batch, (-1, self.dX))
        self.is_training = tf.placeholder(tf.bool, ())
        self.K_center = tf.placeholder(tf.float32, (self.dU, self.dX))
        self.K_scale = tf.placeholder(tf.float32, (self.dU, self.dX))

        with tf.variable_scope('state_normalization'):
            state_batch_normalized = tf.layers.batch_normalization(
                self.state_batch, training=self.is_training, center=False, scale=False, renorm=True
            )

        # Action estimator
        with tf.variable_scope('action_estimator'), arg_scope(
            [layers.fully_connected],
            activation_fn=tf.nn.leaky_relu,
            weights_regularizer=layers.l2_regularizer(scale=self.weight_decay)
        ):
            h = layers.fully_connected(state_batch_normalized, self.N_hidden)
            h = layers.fully_connected(h, self.N_hidden)
            h = layers.fully_connected(h, self.N_hidden)
            self.action_estimation = layers.fully_connected(h, self.dU, activation_fn=None)

        # Stabilizer estimator
        with tf.variable_scope('stabilizer_estimator'), arg_scope(
            [layers.fully_connected],
            activation_fn=tf.nn.leaky_relu,
            weights_regularizer=layers.l2_regularizer(scale=self.weight_decay),
        ):
            # Encoder
            h = layers.fully_connected(state_batch_normalized, self.N_hidden * self.dX)
            self.latent = layers.fully_connected(h, self.dZ, activation_fn=None)

            # Stabilizer Translation
            h = layers.fully_connected(self.latent, self.N_hidden * self.dX, biases_initializer=None)
            h = layers.dropout(h, keep_prob=1 - self.dropout_rate, is_training=self.is_training)
            h = layers.fully_connected(h, self.N_hidden * self.dX, biases_initializer=None)
            h = layers.dropout(h, keep_prob=1 - self.dropout_rate, is_training=self.is_training)
            self.stabilizer_estimation = tf.reshape(
                layers.fully_connected(h, self.dX * self.dU, activation_fn=None, biases_initializer=None),
                (-1, self.dU, self.dX)
            )

        self.action_regulation = tf.einsum(
            'inm,im->in',
            self.stabilizer_estimation * self.K_scale + self.K_center,  # Reverse K standardization
            self.state_batch,
        )
        self.action_out = self.action_estimation + self.action_regulation
def dnn_sampled_softmax_classifier_model_fn(features, target_indices,
                                            mode, params):
  """model_fn that uses candidate sampling.

  Args:
    features: Single Tensor or dict of Tensor (depends on data passed to `fit`)
    target_indices: A single Tensor of shape [batch_size, n_labels] containing
      the target indices.
    mode: Represents if this training, evaluation or prediction. See `ModeKeys`.
    params: A dict of hyperparameters that are listed below.
      hidden_units- List of hidden units per layer. All layers are fully
        connected. Ex. `[64, 32]` means first layer has 64 nodes and second one
        has 32.
      feature_columns- An iterable containing all the feature columns used by
        the model. All items in the set should be instances of classes derived
        from `FeatureColumn`.
      n_classes- number of target classes. It must be greater than 2.
      n_samples- number of sample target classes. Needs to be tuned - A good
        starting point could be 2% of n_classes.
      n_labels- number of labels in each example.
      top_k- The number of classes to predict.
      optimizer- An instance of `tf.Optimizer` used to train the model. If
        `None`, will use an Adagrad optimizer.
      dropout- When not `None`, the probability we will drop out a given
        coordinate.
      gradient_clip_norm- A float > 0. If provided, gradients are
        clipped to their global norm with this clipping ratio. See
        tf.clip_by_global_norm for more details.
      num_ps_replicas- The number of parameter server replicas.

  Returns:
    predictions: A single Tensor or a dict of Tensors.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """

  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  n_samples = params["n_samples"]
  n_labels = params["n_labels"]
  top_k = params["top_k"]
  optimizer = params["optimizer"]
  dropout = params["dropout"]
  gradient_clip_norm = params["gradient_clip_norm"]
  num_ps_replicas = params["num_ps_replicas"]

  parent_scope = "dnn_ss"

  # Setup the input layer partitioner.
  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))

  # Create the input layer.
  with variable_scope.variable_scope(
      parent_scope + "/input_from_feature_columns",
      features.values(),
      partitioner=input_layer_partitioner) as scope:
    net = layers.input_from_feature_columns(
        features,
        feature_columns,
        weight_collections=[parent_scope],
        scope=scope)

  # Setup the hidden layer partitioner.
  hidden_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))

  final_hidden_layer_dim = None
  # Create hidden layers using fully_connected.
  for layer_id, num_hidden_units in enumerate(hidden_units):
    with variable_scope.variable_scope(
        parent_scope + "/hiddenlayer_%d" % layer_id, [net],
        partitioner=hidden_layer_partitioner) as scope:
      net = layers.fully_connected(net,
                                   num_hidden_units,
                                   variables_collections=[parent_scope],
                                   scope=scope)
      final_hidden_layer_dim = num_hidden_units
      # Add dropout if it is enabled.
      if dropout is not None and mode == estimator.ModeKeys.TRAIN:
        net = layers.dropout(net, keep_prob=(1.0 - dropout))

  # Create the weights and biases for the logit layer.
  with variable_scope.variable_scope(
      parent_scope + "/logits", [net],
      partitioner=hidden_layer_partitioner) as scope:
    dtype = net.dtype.base_dtype
    weights_shape = [n_classes, final_hidden_layer_dim]
    weights = variables.model_variable(
        "weights",
        shape=weights_shape,
        dtype=dtype,
        initializer=initializers.xavier_initializer(),
        trainable=True,
        collections=[parent_scope])
    biases = variables.model_variable(
        "biases",
        shape=[n_classes,],
        dtype=dtype,
        initializer=init_ops.zeros_initializer,
        trainable=True,
        collections=[parent_scope])

  if mode == estimator.ModeKeys.TRAIN:
    # Call the candidate sampling APIs and calculate the loss.
    sampled_values = nn.learned_unigram_candidate_sampler(
        true_classes=math_ops.to_int64(target_indices),
        num_true=n_labels,
        num_sampled=n_samples,
        unique=True,
        range_max=n_classes)

    sampled_softmax_loss = nn.sampled_softmax_loss(
        weights=weights,
        biases=biases,
        inputs=net,
        labels=math_ops.to_int64(target_indices),
        num_sampled=n_samples,
        num_classes=n_classes,
        num_true=n_labels,
        sampled_values=sampled_values)

    loss = math_ops.reduce_mean(sampled_softmax_loss, name="loss")

    train_op = optimizers.optimize_loss(
        loss=loss, global_step=contrib_framework.get_global_step(),
        learning_rate=_DEFAULT_LEARNING_RATE,
        optimizer=_get_optimizer(optimizer), clip_gradients=gradient_clip_norm,
        name=parent_scope)
    return None, loss, train_op

  elif mode == estimator.ModeKeys.EVAL:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    # Since the targets have multiple labels, setup the target probabilities
    # as 1.0/n_labels for each of the labels.
    target_one_hot = array_ops.one_hot(
        indices=target_indices,
        depth=n_classes,
        on_value=1.0 / n_labels)
    target_one_hot = math_ops.reduce_sum(
        input_tensor=target_one_hot,
        reduction_indices=[1])

    loss = math_ops.reduce_mean(
        nn.softmax_cross_entropy_with_logits(logits, target_one_hot))

    return predictions, loss, None

  elif mode == estimator.ModeKeys.INFER:
    logits = nn.bias_add(standard_ops.matmul(net, array_ops.transpose(weights)),
                         biases)
    predictions = {}
    predictions[_PROBABILITIES] = nn.softmax(logits)
    predictions[_CLASSES] = math_ops.argmax(logits, 1)
    _, predictions[_TOP_K] = nn.top_k(logits, top_k)

    return predictions, None, None
    'is_training': train_mode,
    'decay': 0.9,
    'updates_collections': None
}

# We can build short code using 'arg_scope' to avoid duplicate code
# same function with different arguments
with arg_scope([fully_connected],
               activation_fn=tf.nn.relu,
               weights_initializer=xavier_init,
               biases_initializer=None,
               normalizer_fn=batch_norm,
               normalizer_params=bn_params
               ):
    hidden_layer1 = fully_connected(X, hidden_output_size, scope="h1")
    h1_drop = dropout(hidden_layer1, keep_prob, is_training=train_mode)
    hidden_layer2 = fully_connected(h1_drop, hidden_output_size, scope="h2")
    h2_drop = dropout(hidden_layer2, keep_prob, is_training=train_mode)
    hidden_layer3 = fully_connected(h2_drop, hidden_output_size, scope="h3")
    h3_drop = dropout(hidden_layer3, keep_prob, is_training=train_mode)
    hidden_layer4 = fully_connected(h3_drop, hidden_output_size, scope="h4")
    h4_drop = dropout(hidden_layer4, keep_prob, is_training=train_mode)
    hypothesis = fully_connected(h4_drop, final_output_size, activation_fn=None, scope="hypothesis")


# define cost/loss & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=hypothesis, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

# initialize
Beispiel #44
0
with tf.variable_scope("M-LSTM-1", reuse=tf.AUTO_REUSE):
    cell_1 = SkipLSTMCell(num_units=64)
    initial_state_1 = cell_1.trainable_initial_state(batch_size=batch_size)

    hidden_1 = conv1d(x_m_lstm,
                      num_outputs=1,
                      kernel_size=1,
                      padding='VALID',
                      stride=1,
                      weights_regularizer=l2_regularizer(scale=1.0e-3))
    rnn_outputs_1, _ = tf.nn.dynamic_rnn(cell_1,
                                         hidden_1,
                                         dtype=tf.float32,
                                         initial_state=initial_state_1)
    rnn_outputs_1 = rnn_outputs_1.h[:, -1, :]
    hidden_2 = dropout(inputs=rnn_outputs_1, keep_prob=0.7)
    output_1 = fully_connected(hidden_2, num_outputs=32)

# M-LSTM (2)
with tf.variable_scope("M-LSTM-2", reuse=tf.AUTO_REUSE):
    cell_2 = SkipLSTMCell(num_units=64)
    initial_state_2 = cell_2.trainable_initial_state(batch_size=batch_size)

    hidden_3 = conv1d(x_m_lstm,
                      num_outputs=1,
                      kernel_size=4,
                      padding='VALID',
                      stride=2,
                      weights_regularizer=l2_regularizer(scale=1.0e-3))
    rnn_outputs_2, _ = tf.nn.dynamic_rnn(cell_2,
                                         hidden_3,
def _dnn_linear_combined_model_fn(features, labels, mode, params):
  """Deep Neural Net and Linear combined model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * linear_feature_columns: An iterable containing all the feature columns
          used by the Linear model.
      * linear_optimizer: string, `Optimizer` object, or callable that defines
          the optimizer to use for training the Linear model.
      * joint_linear_weights: If True a single (possibly partitioned) variable
          will be used to store the linear model weights. It's faster, but
          requires all columns are sparse and have the 'sum' combiner.
      * dnn_feature_columns: An iterable containing all the feature columns used
          by the DNN model.
      * dnn_optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training the DNN model.
      * dnn_hidden_units: List of hidden units per DNN layer.
      * dnn_activation_fn: Activation function applied to each DNN layer. If
          `None`, will use `tf.nn.relu`.
      * dnn_dropout: When not `None`, the probability we will drop out a given
          DNN coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * num_ps_replicas: The number of parameter server replicas.

  Returns:
    `estimator.ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time.
  """
  head = params["head"]
  linear_feature_columns = params.get("linear_feature_columns")
  linear_optimizer = params.get("linear_optimizer")
  joint_linear_weights = params.get("joint_linear_weights")
  dnn_feature_columns = params.get("dnn_feature_columns")
  dnn_optimizer = params.get("dnn_optimizer")
  dnn_hidden_units = params.get("dnn_hidden_units")
  dnn_activation_fn = params.get("dnn_activation_fn")
  dnn_dropout = params.get("dnn_dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  num_ps_replicas = params["num_ps_replicas"]

  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        "Either linear_feature_columns or dnn_feature_columns must be defined.")

  features = _get_feature_dict(features)

  # Build DNN Logits.
  dnn_parent_scope = "dnn"

  if not dnn_feature_columns:
    dnn_logits = None
  else:
    input_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas,
            min_slice_size=64 << 20))
    with variable_scope.variable_scope(
        dnn_parent_scope + "/input_from_feature_columns",
        values=features.values(),
        partitioner=input_layer_partitioner) as scope:
      net = layers.input_from_feature_columns(
          columns_to_tensors=features,
          feature_columns=dnn_feature_columns,
          weight_collections=[dnn_parent_scope],
          scope=scope)

    hidden_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
      with variable_scope.variable_scope(
          dnn_parent_scope + "/hiddenlayer_%d" % layer_id,
          values=[net],
          partitioner=hidden_layer_partitioner) as scope:
        net = layers.fully_connected(
            net,
            num_hidden_units,
            activation_fn=dnn_activation_fn,
            variables_collections=[dnn_parent_scope],
            scope=scope)
        if dnn_dropout is not None and mode == estimator.ModeKeys.TRAIN:
          net = layers.dropout(
              net,
              keep_prob=(1.0 - dnn_dropout))
      # TODO(b/31209633): Consider adding summary before dropout.
      _add_hidden_layer_summary(net, scope.name)

    with variable_scope.variable_scope(
        dnn_parent_scope + "/logits",
        values=[net],
        partitioner=hidden_layer_partitioner) as scope:
      dnn_logits = layers.fully_connected(
          net,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[dnn_parent_scope],
          scope=scope)
    _add_hidden_layer_summary(dnn_logits, scope.name)

  # Build Linear logits.
  linear_parent_scope = "linear"

  if not linear_feature_columns:
    linear_logits = None
  else:
    linear_partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas,
        min_slice_size=64 << 20)
    with variable_scope.variable_scope(
        linear_parent_scope,
        values=features.values(),
        partitioner=linear_partitioner) as scope:
      if joint_linear_weights:
        linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=linear_feature_columns,
            num_outputs=head.logits_dimension,
            weight_collections=[linear_parent_scope],
            scope=scope)
      else:
        linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=linear_feature_columns,
            num_outputs=head.logits_dimension,
            weight_collections=[linear_parent_scope],
            scope=scope)

  # Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

  def _make_training_op(training_loss):
    """Training op for the DNN linear combined model."""
    train_ops = []
    if dnn_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=contrib_variables.get_global_step(),
              learning_rate=_DNN_LEARNING_RATE,
              optimizer=_get_optimizer(dnn_optimizer),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(dnn_parent_scope),
              name=dnn_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[]))
    if linear_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=contrib_variables.get_global_step(),
              learning_rate=_linear_learning_rate(len(linear_feature_columns)),
              optimizer=_get_optimizer(linear_optimizer),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(linear_parent_scope),
              name=linear_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[]))

    return control_flow_ops.group(*train_ops)

  return head.head_ops(
      features, labels, mode, _make_training_op, logits=logits)
    def __init__(self, word_embeddings, setting):

        self.vocab_size = setting.vocab_size
        self.len_sentence= len_sentence = setting.len_sentence
        self.num_epochs = setting.num_epochs
        self.num_classes = num_classes =setting.num_classes
        self.cnn_size = setting.cnn_size
        self.num_layers = setting.num_layers
        self.pos_size = setting.pos_size
        self.pos_num = setting.pos_num
        self.word_embedding = setting.word_embedding
        self.lr = setting.lr


        word_embedding = tf.get_variable(initializer=word_embeddings, name='word_embedding')
        pos1_embedding = tf.get_variable('pos1_embedding', [self.pos_num, self.pos_size])
        pos2_embedding = tf.get_variable('pos2_embedding', [self.pos_num, self.pos_size])
        #relation_embedding = tf.get_variable('relation_embedding', [self.num_classes, self.cnn_size])

        self.input_word = tf.placeholder(dtype=tf.int32, shape=[None, len_sentence], name='input_word')
        self.input_pos1 = tf.placeholder(dtype=tf.int32, shape=[None, len_sentence], name='input_pos1')
        self.input_pos2 = tf.placeholder(dtype=tf.int32, shape=[None, len_sentence], name='input_pos2')
        self.input_y = tf.placeholder(dtype=tf.float32, shape=[None, num_classes], name='input_y')
        self.keep_prob = tf.placeholder(tf.float32)


        self.input_word_ebd = tf.nn.embedding_lookup(word_embedding, self.input_word)
        self.input_pos1_ebd = tf.nn.embedding_lookup(pos1_embedding, self.input_pos1)
        self.input_pos2_ebd = tf.nn.embedding_lookup(pos2_embedding, self.input_pos2)


        self.inputs =  tf.concat(axis=2,values=[self.input_word_ebd,self.input_pos1_ebd,self.input_pos2_ebd])
        self.inputs = tf.reshape(self.inputs, [-1,self.len_sentence,self.word_embedding+self.pos_size*2,1] )


        
        conv = layers.conv2d(inputs =self.inputs ,num_outputs = self.cnn_size ,kernel_size = [3,60],stride=[1,60],padding='SAME')

     
        max_pool = layers.max_pool2d(conv,kernel_size = [70,1],stride=[1,1])
        self.sentence = tf.reshape(max_pool, [-1, self.cnn_size])

 
        tanh = tf.nn.tanh(self.sentence)
        drop = layers.dropout(tanh,keep_prob=self.keep_prob)

   
        self.outputs = layers.fully_connected(inputs = drop,num_outputs = self.num_classes,activation_fn = tf.nn.softmax)

        '''
        self.y_index =  tf.argmax(self.input_y,1,output_type=tf.int32)
        self.indexes = tf.range(0, tf.shape(self.outputs)[0]) * tf.shape(self.outputs)[1] + self.y_index
        self.responsible_outputs = - tf.reduce_mean(tf.log(tf.gather(tf.reshape(self.outputs, [-1]),self.indexes)))
        '''
        #loss
        self.cross_loss = -tf.reduce_mean( tf.log(tf.reduce_sum( self.input_y  * self.outputs ,axis=1)))
        self.reward =  tf.log(tf.reduce_sum( self.input_y  * self.outputs ,axis=1))

        self.l2_loss = tf.contrib.layers.apply_regularization(regularizer=tf.contrib.layers.l2_regularizer(0.0001),
                                                              weights_list=tf.trainable_variables())

        self.final_loss = self.cross_loss + self.l2_loss


        #accuracy
        self.pred = tf.argmax(self.outputs,axis=1)
        self.pred_prob = tf.reduce_max(self.outputs,axis=1)

        self.y_label = tf.argmax(self.input_y,axis=1)
        self.accuracy = tf.reduce_mean(tf.cast( tf.equal(self.pred,self.y_label), 'float'))

        #minimize loss
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.minimize(self.final_loss)


        self.tvars = tf.trainable_variables()

        # manual update parameters
        self.tvars_holders = []
        for idx, var in enumerate(self.tvars):
            placeholder = tf.placeholder(tf.float32, name=str(idx) + '_holder')
            self.tvars_holders.append(placeholder)

        self.update_tvar_holder = []
        for idx, var in enumerate(self.tvars):
            update_tvar = tf.assign(var, self.tvars_holders[idx])
            self.update_tvar_holder.append(update_tvar)
Beispiel #47
0
    def __call__(self, inputs, is_training=False, reuse=None):
        with tf.variable_scope(self.name, reuse=reuse):
            with arg_scope([layers.batch_norm],
                           scale=True,
                           fused=True,
                           data_format=self.data_format,
                           is_training=is_training):
                with arg_scope([layers.conv2d],
                               activation_fn=tf.nn.relu,
                               normalizer_fn=layers.batch_norm,
                               biases_initializer=None,
                               weights_regularizer=layers.l2_regularizer(
                                   self.weight_decay),
                               data_format=self.data_format):

                    if self.data_format == 'NCHW':
                        inputs = tf.transpose(inputs, [0, 3, 1, 2])

                    with tf.variable_scope('conv1'):
                        net = layers.conv2d(inputs,
                                            num_outputs=64,
                                            kernel_size=7,
                                            stride=2)
                        net = layers.max_pool2d(net,
                                                kernel_size=3,
                                                stride=2,
                                                padding='SAME',
                                                data_format=self.data_format)

                    with tf.variable_scope('conv2'):
                        net = layers.repeat(net, self.num_block[0],
                                            self.SEresBlock,
                                            self.num_outputs[0])

                    with tf.variable_scope('conv3'):
                        net = self.resBlock(net,
                                            num_outputs=self.num_outputs[1],
                                            stride=2)
                        net = layers.repeat(net, self.num_block[1] - 1,
                                            self.SEresBlock,
                                            self.num_outputs[1])

                    with tf.variable_scope('conv4'):
                        net = self.resBlock(net,
                                            num_outputs=self.num_outputs[2],
                                            stride=2)
                        net = layers.repeat(net, self.num_block[2] - 1,
                                            self.SEresBlock,
                                            self.num_outputs[2])

                    with tf.variable_scope('conv5'):
                        net = self.resBlock(net,
                                            num_outputs=self.num_outputs[3],
                                            stride=2)
                        net = layers.repeat(net, self.num_block[3] - 1,
                                            self.SEresBlock,
                                            self.num_outputs[3])

                    if self.data_format == 'NCHW':
                        net = tf.reduce_mean(net, [2, 3])
                        net = tf.reshape(net,
                                         [-1, net.get_shape().as_list()[1]])
                    else:
                        net = tf.reduce_mean(net, [1, 2])
                        net = tf.reshape(
                            net, [-1, net.get_shape().as_list()[-1]])

                    if is_training:
                        net = layers.dropout(net, keep_prob=0.5)

                    pre_logits = layers.fully_connected(
                        net,
                        num_outputs=128,
                        activation_fn=None,
                        weights_regularizer=layers.l2_regularizer(
                            self.weight_decay))

        return pre_logits
Beispiel #48
0
def _dnn_classifier_model_fn(features, targets, mode, params):
  """Deep Neural Net model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    targets: `Tensor` of shape [batch_size, 1] or [batch_size] target labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * hidden_units: List of hidden units per layer.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * n_classes: number of target classes.
      * weight_column_name: A string defining the weight feature column, or
          None if there are no weights.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training.
      * activation_fn: Activation function applied to each layer. If `None`,
          will use `tf.nn.relu`.
      * dropout: When not `None`, the probability we will drop out a given
          coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * enable_centered_bias: A bool. If True, estimator will learn a centered
          bias variable for each class. Rest of the model structure learns the
          residual after centered bias.
      * num_ps_replicas: The number of parameter server replicas.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  n_classes = params["n_classes"]
  weight_column_name = params["weight_column_name"]
  optimizer = params["optimizer"]
  activation_fn = params["activation_fn"]
  dropout = params["dropout"]
  gradient_clip_norm = params["gradient_clip_norm"]
  enable_centered_bias = params["enable_centered_bias"]
  num_ps_replicas = params["num_ps_replicas"]

  features = _get_feature_dict(features)
  parent_scope = "dnn"
  num_label_columns = 1 if n_classes == 2 else n_classes
  if n_classes == 2:
    loss_fn = loss_ops.sigmoid_cross_entropy
  else:
    loss_fn = loss_ops.sparse_softmax_cross_entropy

  input_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))
  with variable_scope.variable_scope(
      parent_scope + "/input_from_feature_columns",
      values=features.values(),
      partitioner=input_layer_partitioner) as scope:
    net = layers.input_from_feature_columns(
        columns_to_tensors=features,
        feature_columns=feature_columns,
        weight_collections=[parent_scope],
        scope=scope)

  hidden_layer_partitioner = (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas))
  for layer_id, num_hidden_units in enumerate(hidden_units):
    with variable_scope.variable_scope(
        parent_scope + "/hiddenlayer_%d" % layer_id,
        values=[net],
        partitioner=hidden_layer_partitioner) as scope:
      net = layers.fully_connected(
          net,
          num_hidden_units,
          activation_fn=activation_fn,
          variables_collections=[parent_scope],
          scope=scope)
      if dropout is not None and mode == estimator.ModeKeys.TRAIN:
        net = layers.dropout(
            net,
            keep_prob=(1.0 - dropout))
    _add_hidden_layer_summary(net, scope.name)

  with variable_scope.variable_scope(
      parent_scope + "/logits",
      values=[net],
      partitioner=hidden_layer_partitioner) as scope:
    logits = layers.fully_connected(
        net,
        num_label_columns,
        activation_fn=None,
        variables_collections=[parent_scope],
        scope=scope)
  _add_hidden_layer_summary(logits, scope.name)

  if enable_centered_bias:
    logits = nn.bias_add(logits, _centered_bias(num_label_columns))

  if mode == estimator.ModeKeys.TRAIN:
    targets = _reshape_targets(targets)
    weight = _get_weight_tensor(features, weight_column_name)
    training_loss = loss_fn(logits, targets, weight=weight)
    loss = _rescale_eval_loss(training_loss, weight)

    train_ops = [optimizers.optimize_loss(
        loss=training_loss,
        global_step=contrib_variables.get_global_step(),
        learning_rate=_LEARNING_RATE,
        optimizer=_get_optimizer(optimizer),
        clip_gradients=gradient_clip_norm,
        name=parent_scope,
        # Empty summaries to prevent optimizers from logging the training_loss.
        summaries=[])]
    if enable_centered_bias:
      train_ops.append(_centered_bias_step(targets, loss_fn, num_label_columns))

    logging_ops.scalar_summary("loss", loss)

    return None, loss, control_flow_ops.group(*train_ops)

  elif mode == estimator.ModeKeys.EVAL:
    predictions = _predictions(logits=logits, n_classes=n_classes)

    targets = _reshape_targets(targets)
    weight = _get_weight_tensor(features, weight_column_name)
    training_loss = loss_fn(logits, targets, weight=weight)
    loss = _rescale_eval_loss(training_loss, weight)

    return predictions, loss, []

  else:  # mode == estimator.ModeKeys.INFER:
    predictions = _predictions(logits=logits, n_classes=n_classes)

    return predictions, None, []
Beispiel #49
0
def model():
    print("building model ...")
    with tf.variable_scope('train'):
        print("building model ...")
        X_pl = tf.placeholder(tf.float32, [None, num_features])
        X_expand = tf.expand_dims(X_pl, axis=2)
        print("X_pl", X_pl.get_shape())
        t_pl = tf.placeholder(tf.int32, [
            None,
        ])
        print("t_pl", t_pl.get_shape())
        is_training_pl = tf.placeholder(tf.bool)
        cell_fw = tf.nn.rnn_cell.GRUCell(205)
        cell_bw = tf.nn.rnn_cell.GRUCell(205)
        seq_len = tf.reduce_sum(tf.ones(tf.shape(X_pl), dtype=tf.int32),
                                axis=1)
        _, enc_states = tf.nn.bidirectional_dynamic_rnn(
            cell_fw=cell_fw,
            cell_bw=cell_bw,
            inputs=X_expand,
            sequence_length=seq_len,
            dtype=tf.float32)
        enc_states = tf.concat(1, enc_states)
        enc_states_drop = dropout(enc_states, is_training=is_training_pl)
        l1 = fully_connected(enc_states_drop, 200, activation_fn=None)
        l1 = batch_norm(l1, is_training=is_training_pl)
        l1_relu = relu(l1)
        l1_dropout = dropout(l1_relu, is_training=is_training_pl)
        l2 = fully_connected(l1_dropout, 200, activation_fn=None)
        l2 = batch_norm(l2, is_training=is_training_pl)
        l2_relu = relu(l2)
        l_out = fully_connected(l2_relu,
                                num_outputs=num_classes,
                                activation_fn=None)
        l_out_softmax = tf.nn.softmax(l_out)
        tf.contrib.layers.summarize_variables()

    with tf.variable_scope('metrics'):
        loss = sparse_softmax_cross_entropy_with_logits(l_out, t_pl)
        print("loss", loss.get_shape())
        loss = tf.reduce_mean(loss)
        print("loss", loss.get_shape())
        tf.summary.scalar('train/loss', loss)
        argmax = tf.to_int32(tf.argmax(l_out, 1))
        print("argmax", argmax.get_shape())
        correct = tf.to_float(tf.equal(argmax, t_pl))
        print("correct,", correct.get_shape())
        accuracy = tf.reduce_mean(correct)
        print("accuracy", accuracy.get_shape())

    with tf.variable_scope('optimizer'):
        print("building optimizer ...")
        global_step = tf.Variable(0, name='global_step', trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        grads_and_vars = optimizer.compute_gradients(loss)
        gradients, variables = zip(*grads_and_vars)
        clipped_gradients, global_norm = (tf.clip_by_global_norm(
            gradients, clip_norm))
        clipped_grads_and_vars = zip(clipped_gradients, variables)

        tf.summary.scalar('train/global_gradient_norm', global_norm)

        train_op = optimizer.apply_gradients(clipped_grads_and_vars,
                                             global_step=global_step)

    return X_pl, t_pl, is_training_pl, l_out, l_out_softmax, loss, accuracy, train_op, global_step
Beispiel #50
0
    def _encoder(self,
                 images,
                 embedding,
                 scope_name="encoder",
                 reuse_variables=False):
        with tf.variable_scope(scope_name) as scope:
            if reuse_variables:
                scope.reuse_variables()
            # Encode image
            # 32 * 32 * 64
            images = ly.dropout(images,
                                keep_prob=0.9,
                                is_training=self.is_training)
            node1 = tf_utils.cust_conv2d(images,
                                         64,
                                         h_f=4,
                                         w_f=4,
                                         batch_norm=False,
                                         scope_name="node1")
            # 16 * 16 * 128
            node1 = tf_utils.cust_conv2d(node1,
                                         128,
                                         h_f=4,
                                         w_f=4,
                                         is_training=self.is_training,
                                         scope_name="node1_1")
            # 8 * 8 * 256
            node1 = tf_utils.cust_conv2d(node1,
                                         256,
                                         h_f=4,
                                         w_f=4,
                                         is_training=self.is_training,
                                         scope_name="node1_2")
            # 4 * 4 * 512
            node1 = tf_utils.cust_conv2d(node1,
                                         512,
                                         h_f=4,
                                         w_f=4,
                                         activation_fn=None,
                                         is_training=self.is_training,
                                         scope_name="node1_3")
            node1 = ly.dropout(node1,
                               keep_prob=0.7,
                               is_training=self.is_training)

            # 4 * 4 * 128
            node2 = tf_utils.cust_conv2d(node1,
                                         256,
                                         h_f=1,
                                         w_f=1,
                                         h_s=1,
                                         w_s=1,
                                         is_training=self.is_training,
                                         scope_name="node2_1")
            # 4 * 4 * 128
            node2 = tf_utils.cust_conv2d(node2,
                                         256,
                                         h_f=3,
                                         w_f=3,
                                         h_s=1,
                                         w_s=1,
                                         is_training=self.is_training,
                                         scope_name="node2_2")
            # 4 * 4 * 512
            node2 = tf_utils.cust_conv2d(node2,
                                         512,
                                         h_f=3,
                                         w_f=3,
                                         h_s=1,
                                         w_s=1,
                                         activation_fn=None,
                                         is_training=self.is_training,
                                         scope_name="node2_3")
            node2 = ly.dropout(node2,
                               keep_prob=0.7,
                               is_training=self.is_training)

            # 4 * 4 * 512
            node = tf.add(node1, node2)
            node = tf_utils.leaky_rectify(node)

            # Encode embedding
            # 1 x 1 x nb_emb
            emb = tf.expand_dims(tf.expand_dims(embedding, 1), 1)
            # 4 x 4 x nb_emb
            emb = tf.tile(emb, [1, 4, 4, 1])

            # 4 x 4 x 356
            comb = tf.concat([node, emb], axis=3)

            # Compress embedding
            # 4 * 4 * 256
            result = tf_utils.cust_conv2d(comb,
                                          512,
                                          h_f=3,
                                          w_f=3,
                                          w_s=1,
                                          h_s=1,
                                          scope_name="node3")
            result = tf_utils.cust_conv2d(result,
                                          256,
                                          h_f=3,
                                          w_f=3,
                                          w_s=1,
                                          h_s=1,
                                          scope_name="node4")

            if scope_name == "discriminator":
                result = tf_utils.cust_conv2d(result,
                                              128,
                                              h_f=3,
                                              w_f=3,
                                              w_s=1,
                                              h_s=1,
                                              scope_name="node5")
                result = tf_utils.cust_conv2d(result,
                                              64,
                                              h_f=3,
                                              w_f=3,
                                              w_s=2,
                                              h_s=2,
                                              scope_name="node6")

                # 1 x 1 x 16
                result = tf_utils.cust_conv2d(result,
                                              16,
                                              h_f=3,
                                              w_f=3,
                                              w_s=2,
                                              h_s=2,
                                              scope_name="node7")
            return result
Beispiel #51
0
 def define_feedforward_model(self):
     layer_list = []
     with self.graph.as_default() as g:
         is_training_batch = tf.placeholder(tf.bool,
                                            shape=(),
                                            name="is_training_batch")
         bn_params = {
             "is_training": is_training_batch,
             "decay": 0.99,
             "updates_collections": None
         }
         g.add_to_collection("is_training_batch", is_training_batch)
         with tf.name_scope("input"):
             input_layer = tf.placeholder(dtype=tf.float32,
                                          shape=(None, self.n_in),
                                          name="input_layer")
             if self.dropout_rate != 0.0:
                 print "Using dropout to avoid overfitting and the dropout rate is", self.dropout_rate
                 is_training_drop = tf.placeholder(dtype=tf.bool,
                                                   shape=(),
                                                   name="is_training_drop")
                 input_layer_drop = dropout(input_layer,
                                            self.dropout_rate,
                                            is_training=is_training_drop)
                 layer_list.append(input_layer_drop)
                 g.add_to_collection(name="is_training_drop",
                                     value=is_training_drop)
             else:
                 layer_list.append(input_layer)
         g.add_to_collection("input_layer", layer_list[0])
         for i in range(len(self.hidden_layer_size)):
             with tf.name_scope("hidden_layer_" + str(i + 1)):
                 if self.dropout_rate != 0.0:
                     last_layer = layer_list[-1]
                     if self.hidden_layer_type[i] == "tanh":
                         new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.tanh,normalizer_fn=batch_norm,\
                                    normalizer_params=bn_params)
                     if self.hidden_layer_type[i] == "sigmoid":
                         new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.sigmoid,normalizer_fn=batch_norm,\
                                   normalizer_params=bn_params)
                     new_layer_drop = dropout(new_layer,
                                              self.dropout_rate,
                                              is_training=is_training_drop)
                     layer_list.append(new_layer_drop)
                 else:
                     last_layer = layer_list[-1]
                     if self.hidden_layer_type[i] == "tanh":
                         new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.tanh,normalizer_fn=batch_norm,\
                                   normalizer_params=bn_params)
                     if self.hidden_layer_type[i] == "sigmoid":
                         new_layer=fully_connected(last_layer,self.hidden_layer_size[i],activation_fn=tf.nn.sigmoid,normalizer_fn=batch_norm,\
                                   normalizer_params=bn_params)
                     layer_list.append(new_layer)
         with tf.name_scope("output_layer"):
             if self.output_type == "linear":
                 output_layer = fully_connected(layer_list[-1],
                                                self.n_out,
                                                activation_fn=None)
             if self.output_type == "tanh":
                 output_layer = fully_connected(layer_list[-1],
                                                self.n_out,
                                                activation_fn=tf.nn.tanh)
             g.add_to_collection(name="output_layer", value=output_layer)
         with tf.name_scope("training_op"):
             if self.optimizer == "adam":
                 self.training_op = tf.train.AdamOptimizer()
Beispiel #52
0
def _dnn_model_fn(features, labels, mode, params, config=None):
  """Deep Neural Net model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of
      dtype `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `_Head` instance.
      * hidden_units: List of hidden units per layer.
      * feature_columns: An iterable containing all the feature columns used by
          the model.
      * optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training. If `None`, will use the Adagrad
          optimizer with a default learning rate of 0.05.
      * activation_fn: Activation function applied to each layer. If `None`,
          will use `tf.nn.relu`.
      * dropout: When not `None`, the probability we will drop out a given
          coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * embedding_lr_multipliers: Optional. A dictionary from
          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
          multiply with learning rate for the embedding variables.
      * input_layer_min_slice_size: Optional. The min slice size of input layer
          partitions. If not provided, will use the default of 64M.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    predictions: A dict of `Tensor` objects.
    loss: A scalar containing the loss of the step.
    train_op: The op for training.
  """
  head = params["head"]
  hidden_units = params["hidden_units"]
  feature_columns = params["feature_columns"]
  optimizer = params.get("optimizer") or "Adagrad"
  activation_fn = params.get("activation_fn")
  dropout = params.get("dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  input_layer_min_slice_size = (
      params.get("input_layer_min_slice_size") or 64 << 20)
  num_ps_replicas = config.num_ps_replicas if config else 0
  embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})

  features = _get_feature_dict(features)
  parent_scope = "dnn"

  partitioner = partitioned_variables.min_max_variable_partitioner(
      max_partitions=num_ps_replicas)
  with variable_scope.variable_scope(
      parent_scope,
      values=tuple(six.itervalues(features)),
      partitioner=partitioner):
    input_layer_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas,
            min_slice_size=input_layer_min_slice_size))
    with variable_scope.variable_scope(
        "input_from_feature_columns",
        values=tuple(six.itervalues(features)),
        partitioner=input_layer_partitioner) as input_layer_scope:
      if all([
          isinstance(fc, feature_column._FeatureColumn)  # pylint: disable=protected-access
          for fc in feature_columns
      ]):
        net = layers.input_from_feature_columns(
            columns_to_tensors=features,
            feature_columns=feature_columns,
            weight_collections=[parent_scope],
            scope=input_layer_scope)
      else:
        net = fc_core.input_layer(
            features=features,
            feature_columns=feature_columns,
            weight_collections=[parent_scope])

    for layer_id, num_hidden_units in enumerate(hidden_units):
      with variable_scope.variable_scope(
          "hiddenlayer_%d" % layer_id,
          values=(net,)) as hidden_layer_scope:
        net = layers.fully_connected(
            net,
            num_hidden_units,
            activation_fn=activation_fn,
            variables_collections=[parent_scope],
            scope=hidden_layer_scope)
        if dropout is not None and mode == model_fn.ModeKeys.TRAIN:
          net = layers.dropout(net, keep_prob=(1.0 - dropout))
      _add_hidden_layer_summary(net, hidden_layer_scope.name)

    with variable_scope.variable_scope(
        "logits",
        values=(net,)) as logits_scope:
      logits = layers.fully_connected(
          net,
          head.logits_dimension,
          activation_fn=None,
          variables_collections=[parent_scope],
          scope=logits_scope)
    _add_hidden_layer_summary(logits, logits_scope.name)

    def _train_op_fn(loss):
      """Returns the op to optimize the loss."""
      return optimizers.optimize_loss(
          loss=loss,
          global_step=contrib_variables.get_global_step(),
          learning_rate=_LEARNING_RATE,
          optimizer=_get_optimizer(optimizer),
          gradient_multipliers=(
              dnn_linear_combined._extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                  embedding_lr_multipliers, parent_scope,
                  input_layer_scope.name)),
          clip_gradients=gradient_clip_norm,
          name=parent_scope,
          # Empty summaries to prevent optimizers from logging training_loss.
          summaries=[])

    return head.create_model_fn_ops(
        features=features,
        mode=mode,
        labels=labels,
        train_op_fn=_train_op_fn,
        logits=logits)
Beispiel #53
0
    def _inference(self, x, dropout, is_training=True):
        with tf.variable_scope('pretrain_model', reuse=None) as training_scope:
            if self.freeze_opt == None:
                weights = {}
                weights = self.build_emb_weights(weights)
                weights = self.build_lstm_weights(weights)
                weights = self.build_fc_weights(self.n_hidden, weights)

                # embedding
                with tf.variable_scope("embedding"):
                    xemb = self.embedding(x, weights["emb_W"],
                                          weights["emb_mask_W"])

                # recurrent neural networks
                with tf.variable_scope("rnn"):
                    lstm_cell = LSTMCell(self.n_hidden, weights["lstm_W_xh"],
                                         weights["lstm_W_hh"],
                                         weights["lstm_b"])
                    # lstm_cell = LSTMCell(self.n_hidden)
                    xemb = tf.unstack(xemb, self.timesteps, 1)

                    #c, h
                    W_state_c = tf.random_normal(
                        [self.batch_size, self.n_hidden], stddev=0.1)
                    W_state_h = tf.random_normal(
                        [self.batch_size, self.n_hidden], stddev=0.1)
                    outputs, state = tf.nn.static_rnn(
                        lstm_cell,
                        xemb,
                        initial_state=(W_state_c, W_state_h),
                        dtype=tf.float32)
                    _, hout = state

                with tf.variable_scope("dropout"):
                    h_ = layers.dropout(hout, keep_prob=dropout)

                for i, dim in enumerate(self.dim_hidden[:-1]):
                    h_ = self.fc(h_, weights["fc_W" + str(i)],
                                 weights["fc_b" + str(i)])
                    h_ = tf.nn.dropout(h_, dropout)

                # Logits linear layer, i.e. softmax without normalization.
                N, Min = h_.get_shape()
                i = len(self.dim_hidden) - 1
                logits = self.fc(h_,
                                 weights["fc_W" + str(i)],
                                 weights["fc_b" + str(i)],
                                 relu=False)

            else:
                with tf.variable_scope("embedding"):
                    Wemb = self.finetune_weights["emb_W"]
                    Wemb_mask = tf.get_variable("mask_padding",
                                                initializer=MASK_ARRAY,
                                                dtype="float32",
                                                trainable=False)
                    xemb = self.embedding(x, Wemb, Wemb_mask)

                # convolutional network
                with tf.variable_scope("rnn"):
                    lstm_cell = LSTMCell(self.n_hidden,
                                         self.finetune_weights["lstm_W_xh"],
                                         self.finetune_weights["lstm_W_hh"],
                                         self.finetune_weights["lstm_b"])
                    xemb = tf.unstack(xemb, self.timesteps, 1)
                    W_state_c = tf.random_normal(
                        [self.batch_size, self.n_hidden], stddev=0.1)
                    W_state_h = tf.random_normal(
                        [self.batch_size, self.n_hidden], stddev=0.1)
                    outputs, state = tf.nn.static_rnn(
                        lstm_cell,
                        xemb,
                        initial_state=(W_state_c, W_state_h),
                        dtype=tf.float32)
                    _, hout = state

                with tf.variable_scope("dropout"):
                    h_ = layers.dropout(hout, keep_prob=dropout)

                for i, dim in enumerate(self.dim_hidden[:-1]):
                    Wfc = self.finetune_weights["fc_W" + str(i)]
                    bfc = self.finetune_weights["fc_b" + str(i)]
                    h_ = self.fc(h_, Wfc, bfc)
                    h_ = tf.nn.dropout(h_, dropout)

                # finetune the last layer
                i = len(self.dim_hidden) - 1
                weights = {}
                dim_in = self.n_hidden_2
                weights["fc_W" + str(i)] = self.weight_variable(
                    [int(dim_in), FLAGS.n_classes], name="fc_W" + str(i))
                weights["fc_b" + str(i)] = self.bias_variable(
                    [FLAGS.n_classes], name="fc_b" + str(i))

                # Logits linear layer, i.e. softmax without normalization.
                N, Min = h_.get_shape()
                i = len(self.dim_hidden) - 1
                logits = self.fc(h_,
                                 weights["fc_W" + str(i)],
                                 weights["fc_b" + str(i)],
                                 relu=False)
        return logits
def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None):
  """Deep Neural Net and Linear combined model_fn.

  Args:
    features: `Tensor` or dict of `Tensor` (depends on data passed to `fit`).
    labels: `Tensor` of shape [batch_size, 1] or [batch_size] labels of dtype
      `int32` or `int64` in the range `[0, n_classes)`.
    mode: Defines whether this is training, evaluation or prediction.
      See `ModeKeys`.
    params: A dict of hyperparameters.
      The following hyperparameters are expected:
      * head: A `Head` instance.
      * linear_feature_columns: An iterable containing all the feature columns
          used by the Linear model.
      * linear_optimizer: string, `Optimizer` object, or callable that defines
          the optimizer to use for training the Linear model. Defaults to the
          Ftrl optimizer.
      * joint_linear_weights: If True a single (possibly partitioned) variable
          will be used to store the linear model weights. It's faster, but
          requires all columns are sparse and have the 'sum' combiner.
      * dnn_feature_columns: An iterable containing all the feature columns used
          by the DNN model.
      * dnn_optimizer: string, `Optimizer` object, or callable that defines the
          optimizer to use for training the DNN model. Defaults to the Adagrad
          optimizer.
      * dnn_hidden_units: List of hidden units per DNN layer.
      * dnn_activation_fn: Activation function applied to each DNN layer. If
          `None`, will use `tf.nn.relu`.
      * dnn_dropout: When not `None`, the probability we will drop out a given
          DNN coordinate.
      * gradient_clip_norm: A float > 0. If provided, gradients are
          clipped to their global norm with this clipping ratio.
      * embedding_lr_multipliers: Optional. A dictionary from
          `EmbeddingColumn` to a `float` multiplier. Multiplier will be used to
          multiply with learning rate for the embedding variables.
      * input_layer_partitioner: Optional. Partitioner for input layer.
    config: `RunConfig` object to configure the runtime settings.

  Returns:
    `ModelFnOps`

  Raises:
    ValueError: If both `linear_feature_columns` and `dnn_features_columns`
      are empty at the same time, or `input_layer_partitioner` is missing.
  """
  head = params["head"]
  linear_feature_columns = params.get("linear_feature_columns")
  linear_optimizer = params.get("linear_optimizer") or "Ftrl"
  joint_linear_weights = params.get("joint_linear_weights")
  dnn_feature_columns = params.get("dnn_feature_columns")
  dnn_optimizer = params.get("dnn_optimizer") or "Adagrad"
  dnn_hidden_units = params.get("dnn_hidden_units")
  dnn_activation_fn = params.get("dnn_activation_fn") or nn.relu
  dnn_dropout = params.get("dnn_dropout")
  gradient_clip_norm = params.get("gradient_clip_norm")
  num_ps_replicas = config.num_ps_replicas if config else 0
  input_layer_partitioner = params.get("input_layer_partitioner") or (
      partitioned_variables.min_max_variable_partitioner(
          max_partitions=num_ps_replicas,
          min_slice_size=64 << 20))
  embedding_lr_multipliers = params.get("embedding_lr_multipliers", {})
  fix_global_step_increment_bug = params.get(
      "fix_global_step_increment_bug", True)

  if not linear_feature_columns and not dnn_feature_columns:
    raise ValueError(
        "Either linear_feature_columns or dnn_feature_columns must be defined.")

  features = _get_feature_dict(features)

  linear_optimizer = _get_optimizer(linear_optimizer)
  _check_no_sync_replicas_optimizer(linear_optimizer)
  dnn_optimizer = _get_optimizer(dnn_optimizer)
  _check_no_sync_replicas_optimizer(dnn_optimizer)

  # Build DNN Logits.
  dnn_parent_scope = "dnn"

  if not dnn_feature_columns:
    dnn_logits = None
  else:
    if not dnn_hidden_units:
      raise ValueError(
          "dnn_hidden_units must be defined when dnn_feature_columns is "
          "specified.")
    dnn_partitioner = (
        partitioned_variables.min_max_variable_partitioner(
            max_partitions=num_ps_replicas))
    with variable_scope.variable_scope(
        dnn_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=dnn_partitioner):
      with variable_scope.variable_scope(
          "input_from_feature_columns",
          values=tuple(six.itervalues(features)),
          partitioner=input_layer_partitioner) as dnn_input_scope:
        if all(
            isinstance(fc, feature_column_lib._FeatureColumn)  # pylint: disable=protected-access
            for fc in dnn_feature_columns
        ):
          net = layers.input_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=dnn_feature_columns,
              weight_collections=[dnn_parent_scope],
              scope=dnn_input_scope)
        else:
          net = fc_core.input_layer(
              features=features,
              feature_columns=dnn_feature_columns,
              weight_collections=[dnn_parent_scope])

      for layer_id, num_hidden_units in enumerate(dnn_hidden_units):
        with variable_scope.variable_scope(
            "hiddenlayer_%d" % layer_id,
            values=(net,)) as dnn_hidden_layer_scope:
          net = layers.fully_connected(
              net,
              num_hidden_units,
              activation_fn=dnn_activation_fn,
              variables_collections=[dnn_parent_scope],
              scope=dnn_hidden_layer_scope)
          if dnn_dropout is not None and mode == model_fn.ModeKeys.TRAIN:
            net = layers.dropout(
                net,
                keep_prob=(1.0 - dnn_dropout))
        # TODO(b/31209633): Consider adding summary before dropout.
        _add_layer_summary(net, dnn_hidden_layer_scope.name)

      with variable_scope.variable_scope(
          "logits",
          values=(net,)) as dnn_logits_scope:
        dnn_logits = layers.fully_connected(
            net,
            head.logits_dimension,
            activation_fn=None,
            variables_collections=[dnn_parent_scope],
            scope=dnn_logits_scope)
      _add_layer_summary(dnn_logits, dnn_logits_scope.name)

  # Build Linear logits.
  linear_parent_scope = "linear"

  if not linear_feature_columns:
    linear_logits = None
  else:
    linear_partitioner = partitioned_variables.min_max_variable_partitioner(
        max_partitions=num_ps_replicas,
        min_slice_size=64 << 20)
    with variable_scope.variable_scope(
        linear_parent_scope,
        values=tuple(six.itervalues(features)),
        partitioner=linear_partitioner) as scope:
      if all(isinstance(fc, feature_column_lib._FeatureColumn)  # pylint: disable=protected-access
             for fc in linear_feature_columns):
        if joint_linear_weights:
          linear_logits, _, _ = layers.joint_weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=linear_feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[linear_parent_scope],
              scope=scope)
        else:
          linear_logits, _, _ = layers.weighted_sum_from_feature_columns(
              columns_to_tensors=features,
              feature_columns=linear_feature_columns,
              num_outputs=head.logits_dimension,
              weight_collections=[linear_parent_scope],
              scope=scope)
      else:
        linear_logits = fc_core.linear_model(
            features=features,
            feature_columns=linear_feature_columns,
            units=head.logits_dimension,
            weight_collections=[linear_parent_scope])

      _add_layer_summary(linear_logits, scope.name)

  # Combine logits and build full model.
  if dnn_logits is not None and linear_logits is not None:
    logits = dnn_logits + linear_logits
  elif dnn_logits is not None:
    logits = dnn_logits
  else:
    logits = linear_logits

  def _make_training_op(training_loss):
    """Training op for the DNN linear combined model."""
    train_ops = []
    global_step = training_util.get_global_step()
    if dnn_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=global_step,
              learning_rate=_DNN_LEARNING_RATE,
              optimizer=dnn_optimizer,
              gradient_multipliers=_extract_embedding_lr_multipliers(  # pylint: disable=protected-access
                  embedding_lr_multipliers, dnn_parent_scope,
                  dnn_input_scope.name),
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(dnn_parent_scope),
              name=dnn_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[],
              increment_global_step=not fix_global_step_increment_bug))
    if linear_logits is not None:
      train_ops.append(
          optimizers.optimize_loss(
              loss=training_loss,
              global_step=global_step,
              learning_rate=_linear_learning_rate(len(linear_feature_columns)),
              optimizer=linear_optimizer,
              clip_gradients=gradient_clip_norm,
              variables=ops.get_collection(linear_parent_scope),
              name=linear_parent_scope,
              # Empty summaries, because head already logs "loss" summary.
              summaries=[],
              increment_global_step=not fix_global_step_increment_bug))

    train_op = control_flow_ops.group(*train_ops)
    if fix_global_step_increment_bug:
      with ops.control_dependencies([train_op]):
        with ops.colocate_with(global_step):
          return state_ops.assign_add(global_step, 1).op
    return train_op

  return head.create_model_fn_ops(
      features=features,
      mode=mode,
      labels=labels,
      train_op_fn=_make_training_op,
      logits=logits)
Beispiel #55
0
    def _inference(self, x, dropout, is_training=True):
        with tf.variable_scope('pretrain_model', reuse=None) as training_scope:
            weights = {}
            if self.freeze_opt == None:
                weights = self.build_emb_weights(weights)
                weights = self.build_conv_weights(weights)
                weights = self.build_fc_weights(
                    self.n_filters * len(self.filter_sizes), weights)

                with tf.variable_scope("embedding"):
                    self.embedding(x, weights["emb_W"], weights["emb_mask_W"])

                # convolutional network
                with tf.variable_scope("conv"):
                    hout = self.conv(weights, is_training)

                with tf.variable_scope("dropout"):
                    h_ = layers.dropout(hout, keep_prob=dropout)

                for i, dim in enumerate(self.dim_hidden[:-1]):
                    h_ = self.fc(h_, weights["fc_W" + str(i)],
                                 weights["fc_b" + str(i)])
                    h_ = tf.nn.dropout(h_, dropout)

                # Logits linear layer, i.e. softmax without normalization.
                N, Min = h_.get_shape()
                i = len(self.dim_hidden) - 1
                logits = self.fc(h_,
                                 weights["fc_W" + str(i)],
                                 weights["fc_b" + str(i)],
                                 relu=False)

            else:
                with tf.variable_scope("embedding"):
                    Wemb = self.finetune_weights["emb_W"]
                    Wemb_mask = tf.get_variable("mask_padding",
                                                initializer=MASK_ARRAY,
                                                dtype="float32",
                                                trainable=False)
                    self.embedding(x, Wemb, Wemb_mask)

                # convolutional network
                with tf.variable_scope("conv"):
                    # w = {}
                    # for i, filter_size in enumerate(self.filter_sizes):
                    #     w["conv_W"+str(filter_size)] = self.finetune_weights["conv_W"+str(filter_size)]
                    #     w["conv_b"+str(filter_size)] = self.finetune_weights["conv_b"+str(filter_size)]
                    hout = self.conv(self.finetune_weights, is_training)

                with tf.variable_scope("dropout"):
                    h_ = layers.dropout(hout, keep_prob=dropout)

                for i, dim in enumerate(self.dim_hidden[:-1]):
                    Wfc = self.finetune_weights["fc_W" + str(i)]
                    bfc = self.finetune_weights["fc_b" + str(i)]
                    h_ = self.fc(h_, Wfc, bfc)
                    h_ = tf.nn.dropout(h_, dropout)

                # finetune the last layer
                i = len(self.dim_hidden) - 1
                weights = {}
                dim_in = self.n_hidden_2
                weights["fc_W" + str(i)] = self.weight_variable(
                    [int(dim_in), FLAGS.n_classes], name="fc_W" + str(i))
                weights["fc_b" + str(i)] = self.bias_variable(
                    [FLAGS.n_classes], name="fc_b" + str(i))

                # Logits linear layer, i.e. softmax without normalization.
                N, Min = h_.get_shape()
                i = len(self.dim_hidden) - 1
                logits = self.fc(h_,
                                 weights["fc_W" + str(i)],
                                 weights["fc_b" + str(i)],
                                 relu=False)

        return logits