コード例 #1
0
    def build_trainer(self, child_model):
        child_model.build_valid_rl()
        self.valid_acc = (tf.to_float(child_model.valid_shuffle_acc) /
                          tf.to_float(child_model.batch_size))
        self.reward = self.valid_acc

        normalize = tf.to_float(self.num_layers * (self.num_layers - 1) / 2)
        self.skip_rate = tf.to_float(self.skip_count) / normalize

        if self.entropy_weight is not None:
            self.reward += self.entropy_weight * self.sample_entropy

        self.sample_log_prob = tf.reduce_sum(self.sample_log_prob)
        self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False)
        baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) *
                                        (self.baseline - self.reward))

        with tf.control_dependencies([baseline_update]):
            self.reward = tf.identity(self.reward)

        self.loss = self.sample_log_prob * (self.reward - self.baseline)
        if self.skip_weight is not None:
            self.loss += self.skip_weight * self.skip_penaltys

        self.train_step = tf.Variable(0,
                                      dtype=tf.int32,
                                      trainable=False,
                                      name="train_step")
        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        print("-" * 80)
        for var in tf_variables:
            print(var)

        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.train_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas)
コード例 #2
0
    def _build_train(self):
        print("Build train graph")
        all_h, self.train_reset = self._model(self.x_train, True, False)
        log_probs = self._get_log_probs(all_h,
                                        self.y_train,
                                        batch_size=self.batch_size,
                                        is_training=True)
        self.loss = tf.reduce_sum(log_probs) / tf.to_float(self.batch_size)
        self.train_ppl = tf.exp(tf.reduce_mean(log_probs))

        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        self.num_vars = count_model_params(tf_variables)
        print("-" * 80)
        print("Model has {} parameters".format(self.num_vars))

        loss = self.loss
        if self.rnn_l2_reg is not None:
            loss += (self.rnn_l2_reg * tf.reduce_sum(all_h**2) /
                     tf.to_float(self.batch_size))
        if self.rnn_slowness_reg is not None:
            loss += (self.rnn_slowness_reg * self.all_h_diff /
                     tf.to_float(self.batch_size))
        self.global_step = tf.Variable(0,
                                       dtype=tf.int32,
                                       trainable=False,
                                       name="global_step")
        (self.train_op, self.lr, self.grad_norm, self.optimizer,
         self.grad_norms) = get_train_ops(
             loss,
             tf_variables,
             self.global_step,
             clip_mode=self.clip_mode,
             grad_bound=self.grad_bound,
             l2_reg=self.l2_reg,
             lr_warmup_val=self.lr_warmup_val,
             lr_warmup_steps=self.lr_warmup_steps,
             lr_init=self.lr_init,
             lr_dec_start=self.lr_dec_start,
             lr_dec_every=self.lr_dec_every,
             lr_dec_rate=self.lr_dec_rate,
             lr_dec_min=self.lr_dec_min,
             optim_algo=self.optim_algo,
             moving_average=self.optim_moving_average,
             sync_replicas=self.sync_replicas,
             num_aggregate=self.num_aggregate,
             num_replicas=self.num_replicas,
             get_grad_norms=True,
         )
コード例 #3
0
  def _build_train(self):
    print("-" * 80)
    print("Build train graph")
    logits = self._model(self.x_train, is_training=True)
    log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits(
      logits=logits, labels=self.y_train)
    self.loss = tf.reduce_mean(log_probs)

    if self.use_aux_heads:
      log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=self.aux_logits, labels=self.y_train)
      self.aux_loss = tf.reduce_mean(log_probs)
      train_loss = self.loss + 0.4 * self.aux_loss
    else:
      train_loss = self.loss

    self.train_preds = tf.argmax(logits, axis=1)
    self.train_preds = tf.to_int32(self.train_preds)
    self.train_acc = tf.equal(self.train_preds, self.y_train)
    self.train_acc = tf.to_int32(self.train_acc)
    self.train_acc = tf.reduce_sum(self.train_acc)

    tf_variables = [
      var for var in tf.trainable_variables() if (
        var.name.startswith(self.name) and "aux_head" not in var.name)]
    self.num_vars = count_model_params(tf_variables)
    print("Model has {0} params".format(self.num_vars))

    self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
      train_loss,
      tf_variables,
      self.global_step,
      clip_mode=self.clip_mode,
      grad_bound=self.grad_bound,
      l2_reg=self.l2_reg,
      lr_init=self.lr_init,
      lr_dec_start=self.lr_dec_start,
      lr_dec_every=self.lr_dec_every,
      lr_dec_rate=self.lr_dec_rate,
      lr_cosine=self.lr_cosine,
      lr_max=self.lr_max,
      lr_min=self.lr_min,
      lr_T_0=self.lr_T_0,
      lr_T_mul=self.lr_T_mul,
      num_train_batches=self.num_train_batches,
      optim_algo=self.optim_algo,
      sync_replicas=self.sync_replicas,
      num_aggregate=self.num_aggregate,
      num_replicas=self.num_replicas)
コード例 #4
0
ファイル: ptb_enas_controller.py プロジェクト: ahundt/renas
    def build_trainer(self, child_model):
        # actor
        self.valid_loss = tf.to_float(child_model.rl_loss)
        self.valid_loss = tf.stop_gradient(self.valid_loss)
        self.valid_ppl = tf.exp(self.valid_loss)
        self.reward = 80.0 / self.valid_ppl

        if self.entropy_weight is not None:
            self.reward += self.entropy_weight * self.sample_entropy

        # or baseline
        self.sample_log_probs = tf.reduce_sum(self.sample_log_probs)
        self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False)
        baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) *
                                        (self.baseline - self.reward))

        with tf.control_dependencies([baseline_update]):
            self.reward = tf.identity(self.reward)
        self.loss = self.sample_log_probs * (self.reward - self.baseline)

        self.train_step = tf.Variable(0,
                                      dtype=tf.int32,
                                      trainable=False,
                                      name="train_step")
        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]

        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.train_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas)
コード例 #5
0
ファイル: models.py プロジェクト: j-varun/enas
    def _build_train(self):
        print("Build train graph")
        logits = self._model(self.x_train, True)
        log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=self.y_train)
        self.loss = tf.reduce_mean(log_probs)

        self.train_preds = tf.argmax(logits, axis=1)
        self.train_preds = tf.to_int32(self.train_preds)
        self.train_acc = tf.equal(self.train_preds, self.y_train)
        self.train_acc = tf.to_int32(self.train_acc)
        self.train_acc = tf.reduce_sum(self.train_acc)

        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        self.num_vars = count_model_params(tf_variables)
        print("-" * 80)
        for var in tf_variables:
            print(var)

        self.global_step = tf.Variable(0,
                                       dtype=tf.int32,
                                       trainable=False,
                                       name="global_step")
        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.global_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas)
コード例 #6
0
    def build_trainer(self, child_model):
        child_model.build_valid_rl()
        self.valid_acc = (tf.to_float(child_model.valid_shuffle_acc) /
                          tf.to_float(child_model.batch_size))
        if self.dataset == "stacking":
            # rewards like mse should grow fast as the distance from 0 shrinks,
            # since the possible improvement gets smaller as you get closer to the exact goal pose
            # use epsilon to avoid dividing by 0
            epsilon = 1e-12
            self.reward = 1 / tf.maximum(
                tf.abs(child_model.valid_shuffle_loss), epsilon)
            # previous reward which sort of worked:
            # self.reward = self.max_loss-child_model.valid_shuffle_loss
            self.mse = child_model.valid_shuffle_loss
            self.mae = child_model.valid_shuffle_mae
            self.angle_error = child_model.valid_shuffle_angle_error
            self.cart_error = child_model.valid_shuffle_cart_error

        else:
            self.reward = self.valid_acc
            self.mse = tf.zeros([1])
            self.mae = tf.zeros([1])
            self.angle_error = tf.zeros([1])
            self.cart_error = tf.zeros([1])

        if self.entropy_weight is not None:
            self.reward += self.entropy_weight * self.sample_entropy

        self.sample_log_prob_ = self.sample_log_prob

        self.sample_log_prob = tf.reduce_sum(self.sample_log_prob)
        self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False)
        baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) *
                                        (self.baseline - self.reward))

        with tf.control_dependencies([baseline_update]):
            self.reward = tf.identity(self.reward)

        self.loss = self.sample_log_prob * (self.reward - self.baseline)
        self.train_step = tf.Variable(0,
                                      dtype=tf.int32,
                                      trainable=False,
                                      name="train_step")

        tf_variables = [
            var for var in tf.trainable_variables()
            if var.name.startswith(self.name)
        ]
        print("-" * 80)
        for var in tf_variables:
            print(var)

        self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops(
            self.loss,
            tf_variables,
            self.train_step,
            clip_mode=self.clip_mode,
            grad_bound=self.grad_bound,
            l2_reg=self.l2_reg,
            lr_init=self.lr_init,
            lr_dec_start=self.lr_dec_start,
            lr_dec_every=self.lr_dec_every,
            lr_dec_rate=self.lr_dec_rate,
            optim_algo=self.optim_algo,
            sync_replicas=self.sync_replicas,
            num_aggregate=self.num_aggregate,
            num_replicas=self.num_replicas)

        self.skip_rate = tf.constant(0.0, dtype=tf.float32)