def build_trainer(self, child_model): child_model.build_valid_rl() self.valid_acc = (tf.to_float(child_model.valid_shuffle_acc) / tf.to_float(child_model.batch_size)) self.reward = self.valid_acc normalize = tf.to_float(self.num_layers * (self.num_layers - 1) / 2) self.skip_rate = tf.to_float(self.skip_count) / normalize if self.entropy_weight is not None: self.reward += self.entropy_weight * self.sample_entropy self.sample_log_prob = tf.reduce_sum(self.sample_log_prob) self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False) baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward)) with tf.control_dependencies([baseline_update]): self.reward = tf.identity(self.reward) self.loss = self.sample_log_prob * (self.reward - self.baseline) if self.skip_weight is not None: self.loss += self.skip_weight * self.skip_penaltys self.train_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="train_step") tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] print("-" * 80) for var in tf_variables: print(var) self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.train_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas)
def _build_train(self): print("Build train graph") all_h, self.train_reset = self._model(self.x_train, True, False) log_probs = self._get_log_probs(all_h, self.y_train, batch_size=self.batch_size, is_training=True) self.loss = tf.reduce_sum(log_probs) / tf.to_float(self.batch_size) self.train_ppl = tf.exp(tf.reduce_mean(log_probs)) tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.num_vars = count_model_params(tf_variables) print("-" * 80) print("Model has {} parameters".format(self.num_vars)) loss = self.loss if self.rnn_l2_reg is not None: loss += (self.rnn_l2_reg * tf.reduce_sum(all_h**2) / tf.to_float(self.batch_size)) if self.rnn_slowness_reg is not None: loss += (self.rnn_slowness_reg * self.all_h_diff / tf.to_float(self.batch_size)) self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") (self.train_op, self.lr, self.grad_norm, self.optimizer, self.grad_norms) = get_train_ops( loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_warmup_val=self.lr_warmup_val, lr_warmup_steps=self.lr_warmup_steps, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, lr_dec_min=self.lr_dec_min, optim_algo=self.optim_algo, moving_average=self.optim_moving_average, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas, get_grad_norms=True, )
def _build_train(self): print("-" * 80) print("Build train graph") logits = self._model(self.x_train, is_training=True) log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.y_train) self.loss = tf.reduce_mean(log_probs) if self.use_aux_heads: log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.aux_logits, labels=self.y_train) self.aux_loss = tf.reduce_mean(log_probs) train_loss = self.loss + 0.4 * self.aux_loss else: train_loss = self.loss self.train_preds = tf.argmax(logits, axis=1) self.train_preds = tf.to_int32(self.train_preds) self.train_acc = tf.equal(self.train_preds, self.y_train) self.train_acc = tf.to_int32(self.train_acc) self.train_acc = tf.reduce_sum(self.train_acc) tf_variables = [ var for var in tf.trainable_variables() if ( var.name.startswith(self.name) and "aux_head" not in var.name)] self.num_vars = count_model_params(tf_variables) print("Model has {0} params".format(self.num_vars)) self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( train_loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, lr_cosine=self.lr_cosine, lr_max=self.lr_max, lr_min=self.lr_min, lr_T_0=self.lr_T_0, lr_T_mul=self.lr_T_mul, num_train_batches=self.num_train_batches, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas)
def build_trainer(self, child_model): # actor self.valid_loss = tf.to_float(child_model.rl_loss) self.valid_loss = tf.stop_gradient(self.valid_loss) self.valid_ppl = tf.exp(self.valid_loss) self.reward = 80.0 / self.valid_ppl if self.entropy_weight is not None: self.reward += self.entropy_weight * self.sample_entropy # or baseline self.sample_log_probs = tf.reduce_sum(self.sample_log_probs) self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False) baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward)) with tf.control_dependencies([baseline_update]): self.reward = tf.identity(self.reward) self.loss = self.sample_log_probs * (self.reward - self.baseline) self.train_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="train_step") tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.train_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas)
def _build_train(self): print("Build train graph") logits = self._model(self.x_train, True) log_probs = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.y_train) self.loss = tf.reduce_mean(log_probs) self.train_preds = tf.argmax(logits, axis=1) self.train_preds = tf.to_int32(self.train_preds) self.train_acc = tf.equal(self.train_preds, self.y_train) self.train_acc = tf.to_int32(self.train_acc) self.train_acc = tf.reduce_sum(self.train_acc) tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] self.num_vars = count_model_params(tf_variables) print("-" * 80) for var in tf_variables: print(var) self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="global_step") self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.global_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas)
def build_trainer(self, child_model): child_model.build_valid_rl() self.valid_acc = (tf.to_float(child_model.valid_shuffle_acc) / tf.to_float(child_model.batch_size)) if self.dataset == "stacking": # rewards like mse should grow fast as the distance from 0 shrinks, # since the possible improvement gets smaller as you get closer to the exact goal pose # use epsilon to avoid dividing by 0 epsilon = 1e-12 self.reward = 1 / tf.maximum( tf.abs(child_model.valid_shuffle_loss), epsilon) # previous reward which sort of worked: # self.reward = self.max_loss-child_model.valid_shuffle_loss self.mse = child_model.valid_shuffle_loss self.mae = child_model.valid_shuffle_mae self.angle_error = child_model.valid_shuffle_angle_error self.cart_error = child_model.valid_shuffle_cart_error else: self.reward = self.valid_acc self.mse = tf.zeros([1]) self.mae = tf.zeros([1]) self.angle_error = tf.zeros([1]) self.cart_error = tf.zeros([1]) if self.entropy_weight is not None: self.reward += self.entropy_weight * self.sample_entropy self.sample_log_prob_ = self.sample_log_prob self.sample_log_prob = tf.reduce_sum(self.sample_log_prob) self.baseline = tf.Variable(0.0, dtype=tf.float32, trainable=False) baseline_update = tf.assign_sub(self.baseline, (1 - self.bl_dec) * (self.baseline - self.reward)) with tf.control_dependencies([baseline_update]): self.reward = tf.identity(self.reward) self.loss = self.sample_log_prob * (self.reward - self.baseline) self.train_step = tf.Variable(0, dtype=tf.int32, trainable=False, name="train_step") tf_variables = [ var for var in tf.trainable_variables() if var.name.startswith(self.name) ] print("-" * 80) for var in tf_variables: print(var) self.train_op, self.lr, self.grad_norm, self.optimizer = get_train_ops( self.loss, tf_variables, self.train_step, clip_mode=self.clip_mode, grad_bound=self.grad_bound, l2_reg=self.l2_reg, lr_init=self.lr_init, lr_dec_start=self.lr_dec_start, lr_dec_every=self.lr_dec_every, lr_dec_rate=self.lr_dec_rate, optim_algo=self.optim_algo, sync_replicas=self.sync_replicas, num_aggregate=self.num_aggregate, num_replicas=self.num_replicas) self.skip_rate = tf.constant(0.0, dtype=tf.float32)