def _step(self, batch: DataBatch, optimizer: tf.optimizers.Optimizer, diff_eq_loss_weight: float, ic_loss_weight: float, bc_loss_weight: float) -> Loss: """ Performs a forward pass on the batch, computes the batch loss, and updates the model parameters. :param batch: the batch to compute the losses over :param optimizer: the optimizer to use to update parameters of the model :param diff_eq_loss_weight: the weight of the differential equation part of the total physics-informed loss :param ic_loss_weight: the weight of the initial condition part of the total physics-informed loss :param bc_loss_weight: the weight of the boundary condition part of the total physics-informed loss :return: the various losses over the batch """ with AutoDifferentiator() as auto_diff: loss = self._physics_informed_loss(batch, diff_eq_loss_weight, ic_loss_weight, bc_loss_weight) optimizer.minimize(loss.weighted_total_loss, self.trainable_variables, tape=auto_diff) return loss
def train_one_step(model: BaseTF2Model, train_x: np.ndarray, train_y: np.ndarray, optimizer: tf.optimizers.Optimizer, loss_fn: tf.losses.Loss): """ Perform one step gradient update Later this function can be reused when training SoftmaxRegressionTF2 or LogisticRegressionTF2 """ with tf.GradientTape() as tape: # [TODO 1.12] Calculate model predictions and loss inside tf.GradientTape context # HINT: Operations on trainable variables that executed inside tf.GradientTape context # will be recorded automatically for autograd # print(train_x.shape) # print('model',model) predictions = model(train_x) # print(predictions.shape) # print(train_y.shape) train_loss = loss_fn(train_y, predictions) # [TODO 1.13] Compute gradient of loss w.r.t model's parameters # HINT : since BaseTF2Classifier extends tf.Module, you can access all # model's parameters using `model.trainable_variables` gradients = tape.gradient(train_loss, model.trainable_variables) # [TODO 1.14] Perform one step weight update using optimizers.apply_gradients # HINT: https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Optimizer optimizer.apply_gradients(zip(gradients, model.trainable_variables)) # `train_loss`, `predictions` currently is a EegerTensor, # you can access its numpy data by using return train_loss.numpy()
def train_step(model: TBH, batch_data, bbn_dim, cbn_dim, batch_size, actor_opt: tf.optimizers.Optimizer, critic_opt: tf.optimizers.Optimizer): random_binary = (tf.sign(tf.random.uniform([batch_size, bbn_dim]) - 0.5) + 1) / 2 random_cont = tf.random.uniform([batch_size, cbn_dim]) with tf.GradientTape() as actor_tape, tf.GradientTape() as critic_tape: model_input = [batch_data, random_binary, random_cont] model_output = model(model_input, training=True) actor_loss = reconstruction_loss(model_output[1], batch_data[1]) - \ adv_loss(model_output[4], model_output[2]) - \ adv_loss(model_output[5], model_output[3]) critic_loss = adv_loss(model_output[4], model_output[2]) + adv_loss( model_output[5], model_output[3]) actor_scope = model.encoder.trainable_variables + model.tbn.trainable_variables + \ model.decoder.trainable_variables critic_scope = model.dis_1.trainable_variables + model.dis_2.trainable_variables actor_gradient = actor_tape.gradient(actor_loss, sources=actor_scope) critic_gradient = critic_tape.gradient(critic_loss, sources=critic_scope) actor_opt.apply_gradients(zip(actor_gradient, actor_scope)) critic_opt.apply_gradients(zip(critic_gradient, critic_scope)) return model_output[0].numpy(), actor_loss.numpy(), critic_loss.numpy()
def test_robust_optimizer_matches( base_optimizer: tf.optimizers.Optimizer, robust_optimizer: RobustOptimizer, grads, grads_with_nan, vars, other_vars, ): keys = list(vars.keys()) vars_list = [vars[key] for key in keys] grads_list = [grads[key] for key in keys] for i in range(grads[keys[0]].shape[0]): base_optimizer.apply_gradients( zip([grad[i] for grad in grads_list], vars_list)) other_vars_list = [other_vars[key] for key in keys] grads_with_nan_list = [grads_with_nan[key] for key in keys] for i in range(grads_with_nan[keys[0]].shape[0]): robust_optimizer.apply_gradients( zip([grad[i] for grad in grads_with_nan_list], other_vars_list)) for key in keys: assert np.all(np.isfinite(other_vars[key])) assert_allclose(vars[key].numpy(), other_vars[key].numpy())
def step_train(model: Model, data, opt: tf.optimizers.Optimizer, t): o_x, o_y, t_x, t_y = next(data) with tf.GradientTape() as tape: mean, var, mvn = model([o_x, o_y], t_x) loss = model.obj(t_y, mvn) gradient = tape.gradient(loss, sources=model.trainable_variables) opt.apply_gradients(zip(gradient, model.trainable_variables)) tf.summary.scalar('train/loss', loss.numpy(), step=t) return loss.numpy()
def train_step(model: JMLH, batch_data, opt: tf.optimizers.Optimizer): with tf.GradientTape() as tape: model_input = batch_data code, prob, cls_prob = model(model_input, training=True) loss = jmlh_loss(prob, cls_prob, label=batch_data[2]) gradient = tape.gradient(loss, sources=model.trainable_variables) opt.apply_gradients(zip(gradient, model.trainable_variables)) return code.numpy(), loss.numpy()
def train_op(self, x_batch: tf.Tensor, optimizer: tf.optimizers.Optimizer): with tf.GradientTape() as tape: loss = self.loss_op(x_batch) variables = tape.watched_variables() gradient = tape.gradient(loss, variables) optimizer.apply_gradients( zip(gradient, variables) ) return loss
def train_single_epoch(model: tf.keras.Model, anchors: tf.Tensor, dataset: tf.data.Dataset, optimizer: tf.optimizers.Optimizer, grad_accum_steps: int, loss_fn: LossFn, epoch: int, num_classes: int, print_every: int = 10): acc_gradients = [] running_loss = tf.metrics.Mean() running_clf_loss = tf.metrics.Mean() running_reg_loss = tf.metrics.Mean() for i, (images, (labels, bbs)) in enumerate(dataset): target_reg, target_clf = utils.anchors.anchor_targets_bbox( anchors, images, bbs, labels, num_classes) reg_loss, clf_loss, grads = _train_step(model=model, optimizer=optimizer, loss_fn=loss_fn, images=images, regress_targets=target_reg, labels=target_clf) if len(acc_gradients) == 0: acc_gradients = grads else: acc_gradients = [g1 + g2 for g1, g2 in zip(acc_gradients, grads)] if (i + 1) % grad_accum_steps == 0: optimizer.apply_gradients( zip(acc_gradients, model.trainable_variables)) acc_gradients = [] running_loss(reg_loss + clf_loss) running_clf_loss(clf_loss) running_reg_loss(reg_loss) if (i + 1) % print_every == 0: lr = get_lr(optimizer) print(f'Epoch[{epoch}] ' f'loss: {running_loss.result():.6f} ' f'clf. loss: {running_clf_loss.result():.6f} ' f'reg. loss: {running_reg_loss.result():.6f} ' f'learning rate: {lr:.6f}') if len(acc_gradients) > 0: optimizer.apply_gradients(zip(acc_gradients, model.trainable_variables))
def execute( bath_of_experiences: List[Experience], target_net: Model, policy_net: Model, gamma: float, number_of_actions: int, optimizer: tf.optimizers.Optimizer, ) -> float: ( states, actions, rewards, next_states, dones, ) = ExtractExperiencesService.execute(bath_of_experiences) formatted_next_states = np.atleast_2d(next_states).astype(np.float32) formatted_rewards = np.atleast_2d(rewards).astype(np.float32) output_from_policy_net = policy_net(formatted_next_states) argmax_from_policy_net_output = np.argmax(output_from_policy_net, axis=1) output_from_target_net = target_net(formatted_next_states) actions_in_one_hot = tf.one_hot( np.squeeze(argmax_from_policy_net_output), number_of_actions) actions_gathered = tf.math.reduce_sum(output_from_target_net * actions_in_one_hot, axis=1, keepdims=True) q_s_a_prime = formatted_rewards + gamma * actions_gathered * (1 - dones) q_s_a_prime_tensor = tf.convert_to_tensor(q_s_a_prime, dtype=tf.float32) with tf.GradientTape() as tape: formatted_states = np.atleast_2d(states).astype(np.float32) formatted_actions = np.squeeze(actions).astype(np.float32) output_from_policy_net = policy_net(formatted_states) actions_in_one_hot = tf.one_hot(formatted_actions, number_of_actions) q_s_a = tf.math.reduce_sum(output_from_policy_net * actions_in_one_hot, axis=1, keepdims=True) loss = tf.math.reduce_mean(tf.square(q_s_a_prime_tensor - q_s_a)) policy_net_variables = policy_net.trainable_variables gradients = tape.gradient(loss, policy_net_variables) optimizer.apply_gradients(zip(gradients, policy_net_variables)) return loss.numpy()
def _train_step(model: tf.keras.Model, optimizer: tf.optimizers.Optimizer, loss_fn: LossFn, images: tf.Tensor, regress_targets: tf.Tensor, labels: tf.Tensor) -> Tuple[float, float]: with tf.GradientTape() as tape: regressors, clf_probas = model(images) reg_loss, clf_loss = loss_fn(labels, clf_probas, regress_targets, regressors) loss = reg_loss + clf_loss grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) return reg_loss, clf_loss
def step_train(model: Model, data, opt: tf.optimizers.Optimizer, t): batch_data = next(data) x = tf.cast(batch_data['x'], tf.float32) summary_step = -1 if t % 50 > 0 else t with tf.GradientTape() as tape: encoded, vq_feat, context_ind, decoded, _ = model(x) loss = model.loss(x, encoded, context_ind, decoded, step=summary_step) gradient = tape.gradient(loss, sources=model.trainable_variables) opt.apply_gradients(zip(gradient, model.trainable_variables)) if summary_step >= 0: gt = tf.reshape(x, [-1, 28, 28, 1]) rec = tf.reshape(decoded, [-1, 28, 28, 1]) tf.summary.image('train/gt', gt, summary_step, max_outputs=1) tf.summary.image('train/rec', rec, summary_step, max_outputs=1) return loss.numpy()
def train_step(model: TBH, batch_data, bbn_dim, cbn_dim, batch_size, actor_opt: tf.optimizers.Optimizer, critic_opt: tf.optimizers.Optimizer): random_binary = (tf.sign(tf.random.uniform([batch_size, bbn_dim]) - 0.5) + 1) / 2 random_cont = tf.random.uniform([batch_size, cbn_dim]) # tf.GradientTape: give exposure and record sequence of gradients # https://stackoverflow.com/questions/53953099/what-is-the-purpose-of-the-tensorflow-gradient-tape with tf.GradientTape() as actor_tape, tf.GradientTape() as critic_tape: model_input = [batch_data, random_binary, random_cont] model_output = model(model_input, training=True) # Original Code # actor_loss = reconstruction_loss(model_output[1], batch_data[1]) - \ # adv_loss(model_output[4], model_output[2]) - \ # adv_loss(model_output[5], model_output[3]) # critic_loss = adv_loss(model_output[4], model_output[2]) + adv_loss(model_output[5], model_output[3]) # Testing Code: critic_loss = adv_loss actor_loss = reconstruction_loss(model_output[1], batch_data[1]) \ - tf.reduce_mean(tf.keras.losses.binary_crossentropy(tf.ones_like(model_output[2]), model_output[2]))\ - tf.reduce_mean(tf.keras.losses.binary_crossentropy(tf.ones_like(model_output[3]), model_output[3])) # log(d(x')) #+ adv_loss(model_output[4], model_output[2]) \ #+ adv_loss(model_output[5], model_output[3]) # adv_loss critic_loss = -adv_loss(model_output[4], model_output[2]) - adv_loss( model_output[5], model_output[3]) actor_scope = model.encoder.trainable_variables + model.tbn.trainable_variables + \ model.decoder.trainable_variables critic_scope = model.dis_1.trainable_variables + model.dis_2.trainable_variables actor_gradient = actor_tape.gradient(actor_loss, sources=actor_scope) critic_gradient = critic_tape.gradient(critic_loss, sources=critic_scope) actor_opt.apply_gradients(zip(actor_gradient, actor_scope)) critic_opt.apply_gradients(zip(critic_gradient, critic_scope)) return model_output[0].numpy(), actor_loss.numpy(), critic_loss.numpy()
def train_step(model: Model, batch_data, opt: tf.optimizers.Optimizer, step): feat = batch_data[1] label = batch_data[2] summary_step = -1 if step % 50 > 0 else step with tf.GradientTape() as tape: net_out = model(feat, training=True) loss = model.loss(feat, net_out, step=summary_step) gradient = tape.gradient(loss, sources=model.trainable_variables) opt.apply_gradients(zip(gradient, model.trainable_variables)) code, prob, fc_cls = net_out['decoder'] if summary_step >= 0: sim_gt = tf.expand_dims(tf.expand_dims(label_relevance(label), 0), -1) batch_map = eval_cls_map(code.numpy(), code.numpy(), label.numpy(), label.numpy()) tf.summary.image('sim/gt', sim_gt, step=summary_step, max_outputs=1) tf.summary.scalar('map/train', batch_map, step=summary_step) return code, loss
def step_train(model: BasicMAB, data: BasicData, opt: tf.optimizers.Optimizer, t): x = data.next() step = t if t % 10 == 0 else -1 with tf.GradientTape() as tape: cls, att = model(x) loss = model.obj(cls, x['label']) gradient = tape.gradient(loss, sources=model.trainable_variables) opt.apply_gradients(zip(gradient, model.trainable_variables)) if t % 10 == 0: img = tf.squeeze(att, 2)[0, :, :100] tf.summary.scalar('train/loss', loss, step=t) tf.summary.image('train/att', img[tf.newaxis, :, :, tf.newaxis], step=t) step_test(model, data, t) return loss.numpy()
def optimizer_minimize(self, loss: tf.Tensor, tape: tf.GradientTape, optimizer: tf.optimizers.Optimizer, model: k.Model): """apply gradients Args: loss (tf.Tensor): tape (tf.GradientTape): optimizer (tf.optimizers.Optimizer): model (k.Model): """ with tape: scaled_loss = loss / self.strategy.num_replicas_in_sync if isinstance(optimizer, tf.keras.mixed_precision.experimental.LossScaleOptimizer): scaled_loss = optimizer.get_scaled_loss(loss) grad = tape.gradient(scaled_loss, model.trainable_variables) if isinstance(optimizer, tf.keras.mixed_precision.experimental.LossScaleOptimizer): grad = optimizer.get_unscaled_gradients(grad) optimizer.apply_gradients(zip(grad, model.trainable_variables)) return scaled_loss
def minimize_variables(this_loss, vars: tf.Tensor, optimizer: tf.optimizers.Optimizer, tape: tf.GradientTape): grads = tape.gradient(this_loss, vars) optimizer.apply_gradients(zip(grads, vars))
def train_single_epoch(model: tf.keras.Model, anchors: tf.Tensor, dataset: tf.data.Dataset, optimizer: tf.optimizers.Optimizer, grad_accum_steps: int, loss_fn: LossFn, steps: int, epoch: int, num_classes: int, print_every: int = 10): @tf.function(input_signature=[ tf.TensorSpec(shape=[None, None, None, 3], dtype=tf.float32), tf.TensorSpec(shape=[None, None, 5], dtype=tf.float32), tf.TensorSpec(shape=[None, None, num_classes + 1], dtype=tf.float32) ]) def train_step(images, r_targets, c_targets): return _train_step(model=model, optimizer=optimizer, loss_fn=loss_fn, images=images, regress_targets=r_targets, labels=c_targets) acc_gradients = [] running_loss = tf.metrics.Mean() running_clf_loss = tf.metrics.Mean() running_reg_loss = tf.metrics.Mean() for i, (images, (labels, bbs)) in enumerate(dataset): target_reg, target_clf = utils.anchors.anchor_targets_bbox( anchors, images, bbs, labels, num_classes) reg_loss, clf_loss, grads = train_step(images=images, r_targets=target_reg, c_targets=target_clf) if tf.math.is_nan(reg_loss) or tf.math.is_nan(clf_loss): print('Loss NaN, skipping training step') if len(acc_gradients) == 0: acc_gradients = grads else: acc_gradients = [g1 + g2 for g1, g2 in zip(acc_gradients, grads)] if (i + 1) % grad_accum_steps == 0: optimizer.apply_gradients( zip(acc_gradients, model.trainable_variables)) acc_gradients = [] running_loss(reg_loss + clf_loss) running_clf_loss(clf_loss) running_reg_loss(reg_loss) if (i + 1) % print_every == 0: lr = get_lr(optimizer) print(f'Epoch[{epoch}] [{i}/{steps}] ' f'loss: {running_loss.result():.6f} ' f'clf. loss: {running_clf_loss.result():.6f} ' f'reg. loss: {running_reg_loss.result():.6f} ' f'learning rate: {lr:.6f}') if len(acc_gradients) > 0: optimizer.apply_gradients(zip(acc_gradients, model.trainable_variables))
def _train_step(model: VAE, x: tf.Tensor, optimizer: tf.optimizers.Optimizer): with tf.GradientTape() as tape: loss = compute_loss(model, x) gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables))