Ejemplo n.º 1
0
    def _build_train(self):
        print("Build train graph")
        all_h, self.train_reset = self._model(self.x_train, True, False)
        log_probs = self._get_log_probs(all_h,
                                        self.y_train,
                                        batch_size=self.batch_size,
                                        is_training=True)
        self.loss = tf.reduce_sum(log_probs) / tf.to_float(self.batch_size)
        self.train_ppl = tf.exp(tf.reduce_mean(log_probs))

        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        self.num_vars = count_model_params(tf_variables)
        print("-" * 80)
        print("Model has {} parameters".format(self.num_vars))

        loss = self.loss
        if self.rnn_l2_reg is not None:
            loss += (self.rnn_l2_reg * tf.reduce_sum(all_h**2) /
                     tf.to_float(self.batch_size))
        if self.rnn_slowness_reg is not None:
            loss += (self.rnn_slowness_reg * self.all_h_diff /
                     tf.to_float(self.batch_size))
        self.global_step = tf.Variable(0,
                                       dtype=tf.int32,
                                       trainable=False,
                                       name="global_step")
        (self.train_op, self.lr, self.grad_norm, self.optimizer,
         self.grad_norms) = get_train_ops(
             loss,
             tf_variables,
             self.global_step,
             clip_mode=self.clip_mode,
             grad_bound=self.grad_bound,
             l2_reg=self.l2_reg,
             lr_warmup_val=self.lr_warmup_val,
             lr_warmup_steps=self.lr_warmup_steps,
             lr_init=self.lr_init,
             lr_dec_start=self.lr_dec_start,
             lr_dec_every=self.lr_dec_every,
             lr_dec_rate=self.lr_dec_rate,
             lr_dec_min=self.lr_dec_min,
             optim_algo=self.optim_algo,
             moving_average=self.optim_moving_average,
             sync_replicas=self.sync_replicas,
             num_aggregate=self.num_aggregate,
             num_replicas=self.num_replicas,
             get_grad_norms=True,
         )
Ejemplo n.º 2
0
  def _build_train(self):
    print("-" * 80)
    print("Build train graph")
    logits = self._model(self.x_train, is_training=True)
    log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits(
      logits=logits, labels=self.y_train)
    self.loss = tf.reduce_mean(log_probs)

    if self.use_aux_heads:
      log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=self.aux_logits, labels=self.y_train)
      self.aux_loss = tf.reduce_mean(log_probs)
      train_loss = self.loss + 0.4 * self.aux_loss
    else:
      train_loss = self.loss

    self.train_preds = tf.argmax(logits, axis=1)
    self.train_preds = tf.to_int32(self.train_preds)
    self.train_acc = tf.equal(self.train_preds, self.y_train)
    self.train_acc = tf.to_int32(self.train_acc)
    self.train_acc = tf.reduce_sum(self.train_acc)

    tf_variables = [
      var for var in tf.trainable_variables() if (
        var.name.startswith(self.name) and "aux_head" not in var.name)]
    self.num_vars = count_model_params(tf_variables)
    print("Model has {0} params".format(self.num_vars))

    self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
      train_loss,
      tf_variables,
      self.global_step,
      clip_mode=self.clip_mode,
      grad_bound=self.grad_bound,
      l2_reg=self.l2_reg,
      lr_init=self.lr_init,
      lr_dec_start=self.lr_dec_start,
      lr_dec_every=self.lr_dec_every,
      lr_dec_rate=self.lr_dec_rate,
      lr_cosine=self.lr_cosine,
      lr_max=self.lr_max,
      lr_min=self.lr_min,
      lr_T_0=self.lr_T_0,
      lr_T_mul=self.lr_T_mul,
      num_train_batches=self.num_train_batches,
      optim_algo=self.optim_algo,
      sync_replicas=self.sync_replicas,
      num_aggregate=self.num_aggregate,
      num_replicas=self.num_replicas)
Ejemplo n.º 3
0
    def _build_train(self):
        print("Build train graph")
        logits = self._model(self.x_train, True)
        log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=self.y_train)
        self.loss = tf.reduce_mean(log_probs)

        self.train_preds = tf.argmax(logits, axis=1)
        self.train_preds = tf.to_int32(self.train_preds)
        self.train_acc = tf.equal(self.train_preds, self.y_train)
        self.train_acc = tf.to_int32(self.train_acc)
        self.train_acc = tf.reduce_sum(self.train_acc)

        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        self.num_vars = count_model_params(tf_variables)
        print("-" * 80)
        for var in tf_variables:
            print(var)

        self.global_step = tf.Variable(0,
                                       dtype=tf.int32,
                                       trainable=False,
                                       name="global_step")
        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.global_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas)
Ejemplo n.º 4
0
  def _model(self, images, is_training, reuse=False):
    """Compute the logits given the images."""

    if self.fixed_arc is None:
      is_training = True

    with tf.variable_scope(self.name, reuse=reuse):
      # the first two inputs
      with tf.variable_scope("stem_conv"):
        w = create_weight("w", [3, 3, 3, self.out_filters * 3])
        x = tf.nn.conv2d(
          images, w, [1, 1, 1, 1], "SAME", data_format=self.data_format)
        x = batch_norm(x, is_training, data_format=self.data_format)
      if self.data_format == "NHCW":
        split_axis = 3
      elif self.data_format == "NCHW":
        split_axis = 1
      else:
        raise ValueError("Unknown data_format '{0}'".format(self.data_format))
      layers = [x, x]

      # building layers in the micro space
      out_filters = self.out_filters
      for layer_id in range(self.num_layers + 2):
        with tf.variable_scope("layer_{0}".format(layer_id)):
          if layer_id not in self.pool_layers:
            if self.fixed_arc is None:
              x = self._enas_layer(
                layer_id, layers, self.normal_arc, out_filters)
            else:
              x = self._fixed_layer(
                layer_id, layers, self.normal_arc, out_filters, 1, is_training,
                normal_or_reduction_cell="normal")
          else:
            out_filters *= 2
            if self.fixed_arc is None:
              x = self._factorized_reduction(x, out_filters, 2, is_training)
              layers = [layers[-1], x]
              x = self._enas_layer(
                layer_id, layers, self.reduce_arc, out_filters)
            else:
              x = self._fixed_layer(
                layer_id, layers, self.reduce_arc, out_filters, 2, is_training,
                normal_or_reduction_cell="reduction")
          print("Layer {0:>2d}: {1}".format(layer_id, x))
          layers = [layers[-1], x]

        # auxiliary heads
        self.num_aux_vars = 0
        if (self.use_aux_heads and
            layer_id in self.aux_head_indices
            and is_training):
          print("Using aux_head at layer {0}".format(layer_id))
          with tf.variable_scope("aux_head"):
            aux_logits = tf.nn.relu(x)
            aux_logits = tf.layers.average_pooling2d(
              aux_logits, [5, 5], [3, 3], "VALID",
              data_format=self.actual_data_format)
            with tf.variable_scope("proj"):
              inp_c = self._get_C(aux_logits)
              w = create_weight("w", [1, 1, inp_c, 128])
              aux_logits = tf.nn.conv2d(aux_logits, w, [1, 1, 1, 1], "SAME",
                                        data_format=self.data_format)
              aux_logits = batch_norm(aux_logits, is_training=True,
                                      data_format=self.data_format)
              aux_logits = tf.nn.relu(aux_logits)

            with tf.variable_scope("avg_pool"):
              inp_c = self._get_C(aux_logits)
              hw = self._get_HW(aux_logits)
              w = create_weight("w", [hw, hw, inp_c, 768])
              aux_logits = tf.nn.conv2d(aux_logits, w, [1, 1, 1, 1], "SAME",
                                        data_format=self.data_format)
              aux_logits = batch_norm(aux_logits, is_training=True,
                                      data_format=self.data_format)
              aux_logits = tf.nn.relu(aux_logits)

            with tf.variable_scope("fc"):
              aux_logits = global_avg_pool(aux_logits,
                                           data_format=self.data_format)
              inp_c = aux_logits.get_shape()[1].value
              w = create_weight("w", [inp_c, 10])
              aux_logits = tf.matmul(aux_logits, w)
              self.aux_logits = aux_logits

          aux_head_variables = [
            var for var in tf.trainable_variables() if (
              var.name.startswith(self.name) and "aux_head" in var.name)]
          self.num_aux_vars = count_model_params(aux_head_variables)
          print("Aux head uses {0} params".format(self.num_aux_vars))

      x = tf.nn.relu(x)
      x = global_avg_pool(x, data_format=self.data_format)
      if is_training and self.keep_prob is not None and self.keep_prob < 1.0:
        x = tf.nn.dropout(x, self.keep_prob)
      with tf.variable_scope("fc"):
        inp_c = self._get_C(x)
        w = create_weight("w", [inp_c, 10])
        x = tf.matmul(x, w)
    return x