Beispiel #1
0
def train_one_step(model: BaseTF2Model, train_x: np.ndarray,
                   train_y: np.ndarray, optimizer: tf.optimizers.Optimizer,
                   loss_fn: tf.losses.Loss):
    """
    Perform one step gradient update
    Later this function can be reused when training SoftmaxRegressionTF2 or LogisticRegressionTF2
    """
    with tf.GradientTape() as tape:
        # [TODO 1.12] Calculate model predictions and loss inside tf.GradientTape context
        # HINT: Operations on trainable variables that executed inside tf.GradientTape context
        # will be recorded automatically for autograd
        # print(train_x.shape)
        # print('model',model)
        predictions = model(train_x)
        # print(predictions.shape)
        # print(train_y.shape)
        train_loss = loss_fn(train_y, predictions)

    # [TODO 1.13] Compute gradient of loss w.r.t model's parameters
    # HINT : since BaseTF2Classifier extends tf.Module, you can access all
    #         model's parameters using `model.trainable_variables`
    gradients = tape.gradient(train_loss, model.trainable_variables)
    # [TODO 1.14] Perform one step weight update using optimizers.apply_gradients
    # HINT: https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Optimizer
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    # `train_loss`, `predictions` currently is a EegerTensor,
    #  you can access its numpy data by using
    return train_loss.numpy()
Beispiel #2
0
def train_step(model: TBH, batch_data, bbn_dim, cbn_dim, batch_size,
               actor_opt: tf.optimizers.Optimizer,
               critic_opt: tf.optimizers.Optimizer):
    random_binary = (tf.sign(tf.random.uniform([batch_size, bbn_dim]) - 0.5) +
                     1) / 2
    random_cont = tf.random.uniform([batch_size, cbn_dim])

    with tf.GradientTape() as actor_tape, tf.GradientTape() as critic_tape:
        model_input = [batch_data, random_binary, random_cont]
        model_output = model(model_input, training=True)

        actor_loss = reconstruction_loss(model_output[1], batch_data[1]) - \
                     adv_loss(model_output[4], model_output[2]) - \
                     adv_loss(model_output[5], model_output[3])

        critic_loss = adv_loss(model_output[4], model_output[2]) + adv_loss(
            model_output[5], model_output[3])

        actor_scope = model.encoder.trainable_variables + model.tbn.trainable_variables + \
                      model.decoder.trainable_variables
        critic_scope = model.dis_1.trainable_variables + model.dis_2.trainable_variables

        actor_gradient = actor_tape.gradient(actor_loss, sources=actor_scope)
        critic_gradient = critic_tape.gradient(critic_loss,
                                               sources=critic_scope)

        actor_opt.apply_gradients(zip(actor_gradient, actor_scope))
        critic_opt.apply_gradients(zip(critic_gradient, critic_scope))

    return model_output[0].numpy(), actor_loss.numpy(), critic_loss.numpy()
Beispiel #3
0
def test_robust_optimizer_matches(
    base_optimizer: tf.optimizers.Optimizer,
    robust_optimizer: RobustOptimizer,
    grads,
    grads_with_nan,
    vars,
    other_vars,
):

    keys = list(vars.keys())

    vars_list = [vars[key] for key in keys]
    grads_list = [grads[key] for key in keys]
    for i in range(grads[keys[0]].shape[0]):
        base_optimizer.apply_gradients(
            zip([grad[i] for grad in grads_list], vars_list))

    other_vars_list = [other_vars[key] for key in keys]
    grads_with_nan_list = [grads_with_nan[key] for key in keys]
    for i in range(grads_with_nan[keys[0]].shape[0]):
        robust_optimizer.apply_gradients(
            zip([grad[i] for grad in grads_with_nan_list], other_vars_list))

    for key in keys:
        assert np.all(np.isfinite(other_vars[key]))
        assert_allclose(vars[key].numpy(), other_vars[key].numpy())
Beispiel #4
0
def step_train(model: Model, data, opt: tf.optimizers.Optimizer, t):
    o_x, o_y, t_x, t_y = next(data)
    with tf.GradientTape() as tape:
        mean, var, mvn = model([o_x, o_y], t_x)
        loss = model.obj(t_y, mvn)
        gradient = tape.gradient(loss, sources=model.trainable_variables)
        opt.apply_gradients(zip(gradient, model.trainable_variables))
        tf.summary.scalar('train/loss', loss.numpy(), step=t)

    return loss.numpy()
Beispiel #5
0
def train_step(model: JMLH, batch_data, opt: tf.optimizers.Optimizer):
    with tf.GradientTape() as tape:
        model_input = batch_data
        code, prob, cls_prob = model(model_input, training=True)

        loss = jmlh_loss(prob, cls_prob, label=batch_data[2])

        gradient = tape.gradient(loss, sources=model.trainable_variables)
        opt.apply_gradients(zip(gradient, model.trainable_variables))

    return code.numpy(), loss.numpy()
Beispiel #6
0
    def train_op(self, x_batch: tf.Tensor, optimizer: tf.optimizers.Optimizer):
        with tf.GradientTape() as tape:
            loss = self.loss_op(x_batch)

        variables = tape.watched_variables()
        gradient = tape.gradient(loss, variables)
        optimizer.apply_gradients(
            zip(gradient,
                variables)
        )

        return loss
Beispiel #7
0
def train_single_epoch(model: tf.keras.Model,
                       anchors: tf.Tensor,
                       dataset: tf.data.Dataset,
                       optimizer: tf.optimizers.Optimizer,
                       grad_accum_steps: int,
                       loss_fn: LossFn,
                       epoch: int,
                       num_classes: int,
                       print_every: int = 10):

    acc_gradients = []

    running_loss = tf.metrics.Mean()
    running_clf_loss = tf.metrics.Mean()
    running_reg_loss = tf.metrics.Mean()

    for i, (images, (labels, bbs)) in enumerate(dataset):

        target_reg, target_clf = utils.anchors.anchor_targets_bbox(
            anchors, images, bbs, labels, num_classes)

        reg_loss, clf_loss, grads = _train_step(model=model,
                                                optimizer=optimizer,
                                                loss_fn=loss_fn,
                                                images=images,
                                                regress_targets=target_reg,
                                                labels=target_clf)

        if len(acc_gradients) == 0:
            acc_gradients = grads
        else:
            acc_gradients = [g1 + g2 for g1, g2 in zip(acc_gradients, grads)]

        if (i + 1) % grad_accum_steps == 0:
            optimizer.apply_gradients(
                zip(acc_gradients, model.trainable_variables))
            acc_gradients = []

        running_loss(reg_loss + clf_loss)
        running_clf_loss(clf_loss)
        running_reg_loss(reg_loss)

        if (i + 1) % print_every == 0:
            lr = get_lr(optimizer)
            print(f'Epoch[{epoch}] '
                  f'loss: {running_loss.result():.6f} '
                  f'clf. loss: {running_clf_loss.result():.6f} '
                  f'reg. loss: {running_reg_loss.result():.6f} '
                  f'learning rate: {lr:.6f}')

    if len(acc_gradients) > 0:
        optimizer.apply_gradients(zip(acc_gradients,
                                      model.trainable_variables))
    def execute(
        bath_of_experiences: List[Experience],
        target_net: Model,
        policy_net: Model,
        gamma: float,
        number_of_actions: int,
        optimizer: tf.optimizers.Optimizer,
    ) -> float:
        (
            states,
            actions,
            rewards,
            next_states,
            dones,
        ) = ExtractExperiencesService.execute(bath_of_experiences)
        formatted_next_states = np.atleast_2d(next_states).astype(np.float32)
        formatted_rewards = np.atleast_2d(rewards).astype(np.float32)
        output_from_policy_net = policy_net(formatted_next_states)
        argmax_from_policy_net_output = np.argmax(output_from_policy_net,
                                                  axis=1)
        output_from_target_net = target_net(formatted_next_states)
        actions_in_one_hot = tf.one_hot(
            np.squeeze(argmax_from_policy_net_output), number_of_actions)

        actions_gathered = tf.math.reduce_sum(output_from_target_net *
                                              actions_in_one_hot,
                                              axis=1,
                                              keepdims=True)

        q_s_a_prime = formatted_rewards + gamma * actions_gathered * (1 -
                                                                      dones)
        q_s_a_prime_tensor = tf.convert_to_tensor(q_s_a_prime,
                                                  dtype=tf.float32)
        with tf.GradientTape() as tape:
            formatted_states = np.atleast_2d(states).astype(np.float32)
            formatted_actions = np.squeeze(actions).astype(np.float32)
            output_from_policy_net = policy_net(formatted_states)
            actions_in_one_hot = tf.one_hot(formatted_actions,
                                            number_of_actions)
            q_s_a = tf.math.reduce_sum(output_from_policy_net *
                                       actions_in_one_hot,
                                       axis=1,
                                       keepdims=True)
            loss = tf.math.reduce_mean(tf.square(q_s_a_prime_tensor - q_s_a))

        policy_net_variables = policy_net.trainable_variables
        gradients = tape.gradient(loss, policy_net_variables)
        optimizer.apply_gradients(zip(gradients, policy_net_variables))
        return loss.numpy()
Beispiel #9
0
def _train_step(model: tf.keras.Model, optimizer: tf.optimizers.Optimizer,
                loss_fn: LossFn, images: tf.Tensor, regress_targets: tf.Tensor,
                labels: tf.Tensor) -> Tuple[float, float]:

    with tf.GradientTape() as tape:
        regressors, clf_probas = model(images)

        reg_loss, clf_loss = loss_fn(labels, clf_probas, regress_targets,
                                     regressors)
        loss = reg_loss + clf_loss

    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    return reg_loss, clf_loss
Beispiel #10
0
def step_train(model: Model, data, opt: tf.optimizers.Optimizer, t):
    batch_data = next(data)
    x = tf.cast(batch_data['x'], tf.float32)
    summary_step = -1 if t % 50 > 0 else t
    with tf.GradientTape() as tape:
        encoded, vq_feat, context_ind, decoded, _ = model(x)
        loss = model.loss(x, encoded, context_ind, decoded, step=summary_step)
        gradient = tape.gradient(loss, sources=model.trainable_variables)
        opt.apply_gradients(zip(gradient, model.trainable_variables))

    if summary_step >= 0:
        gt = tf.reshape(x, [-1, 28, 28, 1])
        rec = tf.reshape(decoded, [-1, 28, 28, 1])
        tf.summary.image('train/gt', gt, summary_step, max_outputs=1)
        tf.summary.image('train/rec', rec, summary_step, max_outputs=1)

    return loss.numpy()
Beispiel #11
0
def train_step(model: TBH, batch_data, bbn_dim, cbn_dim, batch_size,
               actor_opt: tf.optimizers.Optimizer,
               critic_opt: tf.optimizers.Optimizer):
    random_binary = (tf.sign(tf.random.uniform([batch_size, bbn_dim]) - 0.5) +
                     1) / 2
    random_cont = tf.random.uniform([batch_size, cbn_dim])

    # tf.GradientTape: give exposure and record sequence of gradients
    # https://stackoverflow.com/questions/53953099/what-is-the-purpose-of-the-tensorflow-gradient-tape

    with tf.GradientTape() as actor_tape, tf.GradientTape() as critic_tape:
        model_input = [batch_data, random_binary, random_cont]
        model_output = model(model_input, training=True)
        # Original Code
        #        actor_loss = reconstruction_loss(model_output[1], batch_data[1]) - \
        #                     adv_loss(model_output[4], model_output[2]) - \
        #                     adv_loss(model_output[5], model_output[3])

        #        critic_loss = adv_loss(model_output[4], model_output[2]) + adv_loss(model_output[5], model_output[3])
        # Testing Code: critic_loss = adv_loss
        actor_loss = reconstruction_loss(model_output[1], batch_data[1]) \
                     - tf.reduce_mean(tf.keras.losses.binary_crossentropy(tf.ones_like(model_output[2]), model_output[2]))\
                     - tf.reduce_mean(tf.keras.losses.binary_crossentropy(tf.ones_like(model_output[3]), model_output[3]))
        # log(d(x'))
        #+ adv_loss(model_output[4], model_output[2]) \
        #+ adv_loss(model_output[5], model_output[3])
        # adv_loss

        critic_loss = -adv_loss(model_output[4], model_output[2]) - adv_loss(
            model_output[5], model_output[3])

        actor_scope = model.encoder.trainable_variables + model.tbn.trainable_variables + \
                      model.decoder.trainable_variables
        critic_scope = model.dis_1.trainable_variables + model.dis_2.trainable_variables

        actor_gradient = actor_tape.gradient(actor_loss, sources=actor_scope)
        critic_gradient = critic_tape.gradient(critic_loss,
                                               sources=critic_scope)

        actor_opt.apply_gradients(zip(actor_gradient, actor_scope))
        critic_opt.apply_gradients(zip(critic_gradient, critic_scope))

    return model_output[0].numpy(), actor_loss.numpy(), critic_loss.numpy()
Beispiel #12
0
def train_step(model: Model, batch_data, opt: tf.optimizers.Optimizer, step):
    feat = batch_data[1]
    label = batch_data[2]
    summary_step = -1 if step % 50 > 0 else step
    with tf.GradientTape() as tape:
        net_out = model(feat, training=True)
        loss = model.loss(feat, net_out, step=summary_step)
        gradient = tape.gradient(loss, sources=model.trainable_variables)
        opt.apply_gradients(zip(gradient, model.trainable_variables))

    code, prob, fc_cls = net_out['decoder']
    if summary_step >= 0:
        sim_gt = tf.expand_dims(tf.expand_dims(label_relevance(label), 0), -1)
        batch_map = eval_cls_map(code.numpy(), code.numpy(), label.numpy(),
                                 label.numpy())
        tf.summary.image('sim/gt', sim_gt, step=summary_step, max_outputs=1)
        tf.summary.scalar('map/train', batch_map, step=summary_step)

    return code, loss
def step_train(model: BasicMAB, data: BasicData, opt: tf.optimizers.Optimizer,
               t):
    x = data.next()
    step = t if t % 10 == 0 else -1
    with tf.GradientTape() as tape:
        cls, att = model(x)
        loss = model.obj(cls, x['label'])
        gradient = tape.gradient(loss, sources=model.trainable_variables)
        opt.apply_gradients(zip(gradient, model.trainable_variables))

        if t % 10 == 0:
            img = tf.squeeze(att, 2)[0, :, :100]
            tf.summary.scalar('train/loss', loss, step=t)
            tf.summary.image('train/att',
                             img[tf.newaxis, :, :, tf.newaxis],
                             step=t)

        step_test(model, data, t)

    return loss.numpy()
Beispiel #14
0
  def optimizer_minimize(self, loss: tf.Tensor, tape: tf.GradientTape,
                         optimizer: tf.optimizers.Optimizer, model: k.Model):
    """apply gradients
    
    Args:
        loss (tf.Tensor): 
        tape (tf.GradientTape): 
        optimizer (tf.optimizers.Optimizer): 
        model (k.Model):
    """
    with tape:
      scaled_loss = loss / self.strategy.num_replicas_in_sync
      if isinstance(optimizer,
                    tf.keras.mixed_precision.experimental.LossScaleOptimizer):
        scaled_loss = optimizer.get_scaled_loss(loss)

    grad = tape.gradient(scaled_loss, model.trainable_variables)
    if isinstance(optimizer,
                  tf.keras.mixed_precision.experimental.LossScaleOptimizer):
      grad = optimizer.get_unscaled_gradients(grad)
    optimizer.apply_gradients(zip(grad, model.trainable_variables))
    return scaled_loss
def minimize_variables(this_loss, vars: tf.Tensor,
                       optimizer: tf.optimizers.Optimizer,
                       tape: tf.GradientTape):
    grads = tape.gradient(this_loss, vars)
    optimizer.apply_gradients(zip(grads, vars))
Beispiel #16
0
def train_single_epoch(model: tf.keras.Model,
                       anchors: tf.Tensor,
                       dataset: tf.data.Dataset,
                       optimizer: tf.optimizers.Optimizer,
                       grad_accum_steps: int,
                       loss_fn: LossFn,
                       steps: int,
                       epoch: int,
                       num_classes: int,
                       print_every: int = 10):
    @tf.function(input_signature=[
        tf.TensorSpec(shape=[None, None, None, 3], dtype=tf.float32),
        tf.TensorSpec(shape=[None, None, 5], dtype=tf.float32),
        tf.TensorSpec(shape=[None, None, num_classes + 1], dtype=tf.float32)
    ])
    def train_step(images, r_targets, c_targets):
        return _train_step(model=model,
                           optimizer=optimizer,
                           loss_fn=loss_fn,
                           images=images,
                           regress_targets=r_targets,
                           labels=c_targets)

    acc_gradients = []

    running_loss = tf.metrics.Mean()
    running_clf_loss = tf.metrics.Mean()
    running_reg_loss = tf.metrics.Mean()

    for i, (images, (labels, bbs)) in enumerate(dataset):

        target_reg, target_clf = utils.anchors.anchor_targets_bbox(
            anchors, images, bbs, labels, num_classes)

        reg_loss, clf_loss, grads = train_step(images=images,
                                               r_targets=target_reg,
                                               c_targets=target_clf)

        if tf.math.is_nan(reg_loss) or tf.math.is_nan(clf_loss):
            print('Loss NaN, skipping training step')

        if len(acc_gradients) == 0:
            acc_gradients = grads
        else:
            acc_gradients = [g1 + g2 for g1, g2 in zip(acc_gradients, grads)]

        if (i + 1) % grad_accum_steps == 0:
            optimizer.apply_gradients(
                zip(acc_gradients, model.trainable_variables))
            acc_gradients = []

        running_loss(reg_loss + clf_loss)
        running_clf_loss(clf_loss)
        running_reg_loss(reg_loss)

        if (i + 1) % print_every == 0:
            lr = get_lr(optimizer)
            print(f'Epoch[{epoch}] [{i}/{steps}] '
                  f'loss: {running_loss.result():.6f} '
                  f'clf. loss: {running_clf_loss.result():.6f} '
                  f'reg. loss: {running_reg_loss.result():.6f} '
                  f'learning rate: {lr:.6f}')

    if len(acc_gradients) > 0:
        optimizer.apply_gradients(zip(acc_gradients,
                                      model.trainable_variables))
Beispiel #17
0
 def _train_step(model: VAE, x: tf.Tensor,
                 optimizer: tf.optimizers.Optimizer):
     with tf.GradientTape() as tape:
         loss = compute_loss(model, x)
     gradients = tape.gradient(loss, model.trainable_variables)
     optimizer.apply_gradients(zip(gradients, model.trainable_variables))