Example #1
0
def train_policy_on_episode(optimizer: Optimizer, training_info: TrainingInfo,
                            episode_number: int):
    """ Trains both the actor and the critic using all transitions of the latest episode. The actor's loss is the MSE
     between V(state) and reward + gamma * V(next state), where V indicates the actor's value function.
     The actor / policy is trained by maximizing the log probability * td-error, and an entropy term is
     added to encourage exploration. The entropy is decayed at new each episode by the run_params.entropy_decay
     coefficient.
    """
    training_info.compute_discounted_rewards()

    # Compute the loss of the policy and the critic at each time step
    policy_losses = []  # Policy errors
    value_losses = []  # Critic errors
    for log_prob, discounted_reward, state_value, entropy in zip(
            training_info.log_probs, training_info.discounted_rewards,
            training_info.state_values, training_info.entropies):
        advantage = discounted_reward - state_value.item()
        policy_losses.append(-(log_prob + 0.99**episode_number * entropy) *
                             advantage)
        value_losses.append(
            F.smooth_l1_loss(state_value.squeeze(0),
                             torch.tensor([discounted_reward])))

    # Optimize the policy
    optimizer.zero_grad()
    total_policy_loss = torch.stack(policy_losses).sum() + torch.stack(
        value_losses).sum()
    total_policy_loss.backward()
    optimizer.step()

    # Reset the state of the episode
    training_info.reset()
Example #2
0
def train_policy_on_step(critic: SimpleCritic, optimizer: Optimizer,
                         reward: float, state: np.ndarray,
                         next_state: np.ndarray, gamma: float, log_prob: float,
                         entropy: float, episode_number: int,
                         run_params: RunParams):
    """ Trains both the actor and the critic using the given transition. The actor's loss is the MSE
     between V(state) and reward + gamma * V(next state), where V indicates the actor's value function.
     The actor / policy is trained by maximizing the log probability * td-error, and an entropy term is
     added to encourage exploration. The entropy is decayed at new each episode by the run_params.entropy_decay
     coefficient.
     """
    # Inspired from https://gym.openai.com/evaluations/eval_gUhDnmlbTKG1qW0jS6HSg/

    state, next_state = prepare_state(state), prepare_state(next_state)

    state_value_target = reward + gamma * critic.forward(next_state)
    state_value_prediction = critic.forward(state)
    td_error = state_value_target - state_value_prediction

    # Update policy
    optimizer.zero_grad()
    loss = -(log_prob + run_params.entropy_coeff *
             run_params.entropy_decay**episode_number * entropy) * td_error
    loss += F.mse_loss(state_value_prediction, state_value_target)
    loss.backward()
    optimizer.step()
Example #3
0
    def train_steps(
            self, optimizer: Optimizer,
            triplet_dataset: FederatedTripletsDataset) -> TrainStepResults:
        losses: List[float] = []
        local_step: int = 0

        triplet_loader = DataLoader(triplet_dataset,
                                    batch_size=self.settings.batch_size,
                                    shuffle=True)

        for triplets in triplet_loader:
            # Calculate triplet loss
            triplet_loss = self.loss_fn(
                anchor=self.model(triplets["anchor"].cuda()),
                positive=self.model(triplets["positive"].cuda()),
                negative=self.model(triplets["negative"].cuda()),
            ).cuda()

            # Backward pass
            optimizer.zero_grad()
            triplet_loss.backward()
            optimizer.step()

            self.global_step += 1
            local_step += 1
            losses.append(triplet_loss.item())

        loss_mean = sum(losses) / len(losses)
        return TrainStepResults(loss_mean, local_step)
Example #4
0
    def _fit_epoch(self, g_optim: Optimizer,
                   d_optim: Optimizer) -> TensorTuple:
        r"""
        Trains a single entire epoch

        :param g_optim: Generator optimizer
        :param d_optim: Discriminator optimizer
        :return: Average training loss
        """
        tot_l_g = tot_l_d = 0
        num_batch = min(len(self._mal_data.train), len(self._ben_data.train))

        for (m, _), (b, _) in zip(self._mal_data.train, self._ben_data.train):
            if self._is_cuda: m, b = m.cuda(), b.cuda()
            m_prime, g_theta = self._gen.forward(m)
            l_g = self._calc_gen_loss(g_theta)
            g_optim.zero_grad()
            l_g.backward()
            # torch.nn.utils.clip_grad_value_(l_g, 1)
            g_optim.step()
            tot_l_g += l_g

            # Update the discriminator
            for x in [m_prime, b]:
                l_d = self._calc_discrim_loss(x)
                d_optim.zero_grad()
                l_d.backward()
                # torch.nn.utils.clip_grad_value_(l_d, 1)
                d_optim.step()
                tot_l_d += l_d
        # noinspection PyUnresolvedReferences
        return (tot_l_g / num_batch).item(), (tot_l_d / num_batch).item()
Example #5
0
def train(loader: DataLoader, network: nn.Module, optimizer: Optimizer,
          epoch: int, log_interval: int, state: TrainingState) -> None:

    network.train()

    for batch_idx, (data, target) in enumerate(loader):

        # manually set all gradients to zero
        optimizer.zero_grad()

        # produce the network's output (forward pass)
        output = network(data)

        # compute negative log-likelihood loss between
        # the output and the ground truth label
        loss = F.nll_loss(output, target)

        # collect a new set of gradients and
        # backprogpagate to network parameters
        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(loader.dataset),
                100.0 * batch_idx / len(loader), loss.item()))
            count = (batch_idx * 64) + ((epoch - 1) * len(loader.dataset))
            state.update(network, optimizer, loss.item(), count)
def _train_segmenter_epoch(
        model: torch.nn.Module, optimizer: Optimizer,
        train_dataloader: DataLoader,
        val_dataloader: DataLoader) -> Tuple[List[Any], List[Any]]:
    t_losses, v_losses = [], []
    model.train()
    for x, y in tqdm(train_dataloader):
        optimizer.zero_grad()
        preds = model(x)

        loss = F.binary_cross_entropy(preds, y.unsqueeze(1))
        loss.backward()
        optimizer.step()

        t_losses.append(loss.item())

    with torch.no_grad():
        model.eval()
        for val_x, val_y in tqdm(val_dataloader):
            val_preds = model(val_x)
            val_loss = F.binary_cross_entropy(val_preds, val_y.unsqueeze(1))
            v_losses.append(val_loss.item())
    print(f'Train loss: {np.mean(t_losses)}, Val loss: {np.mean(v_losses)}')

    return t_losses, v_losses
Example #7
0
    def _fit_epoch(self, generator_optimizer: Optimizer,
                   discriminator_optimizer: Optimizer) -> TensorTuple:
        total_loss_generator = total_loss_discriminator = 0
        num_batch = min(len(self._mal_data.train), len(self._ben_data.train))

        for (malware_data, _), (benign_data, _) in zip(self._mal_data.train,
                                                       self._ben_data.train):
            if self._is_cuda:
                malware_data, benign_data = malware_data.cuda(
                ), benign_data.cuda()
            m_prime, g_theta = self._gen.forward(malware_data)
            generator_loss = self._calc_gen_loss(g_theta)
            generator_optimizer.zero_grad()
            generator_loss.backward()

            generator_optimizer.step()
            total_loss_generator += generator_loss

            for x in [m_prime, benign_data]:
                discriminator_loss = self._calc_discrim_loss(x)
                discriminator_optimizer.zero_grad()
                discriminator_loss.backward()

                discriminator_optimizer.step()
                total_loss_discriminator += discriminator_loss
        # noinspection PyUnresolvedReferences
        return (total_loss_generator /
                num_batch).item(), (total_loss_discriminator /
                                    num_batch).item()
Example #8
0
    def optimizer_step(
        self,
        epoch: int,
        batch_idx: int,
        optimizer: Optimizer,
        optimizer_idx: int,
        optimizer_closure: Optional[Callable] = None,
        on_tpu: bool = False,
        using_native_amp: bool = False,
        using_lbfgs: bool = False,
    ) -> None:
        # warm-up + decay schedule placed here since LARSWrapper is not optimizer class
        # adjust LR of optim contained within LARSWrapper
        if self.lars_wrapper:
            for param_group in optimizer.optim.param_groups:
                param_group["lr"] = self.lr_schedule[self.trainer.global_step]
        else:
            for param_group in optimizer.param_groups:
                param_group["lr"] = self.lr_schedule[self.trainer.global_step]

        # log LR (LearningRateLogger callback doesn't work with LARSWrapper)
        self.logger.log_metrics(
            {"learning_rate": self.lr_schedule[self.trainer.global_step]}, self.current_epoch * batch_idx
        )

        # from lightning
        if self.trainer.amp_backend == AMPType.NATIVE:
            optimizer_closure()
            self.trainer.scaler.step(optimizer)
        elif self.trainer.amp_backend == AMPType.APEX:
            optimizer_closure()
            optimizer.step()
        else:
            optimizer.step(closure=optimizer_closure)
Example #9
0
def train_loop(data_loader: DataLoader, model: nn.Module, optimizer: Optimizer, device: torch.device) -> List[float]:
    """
    Train loop.
    Loop in model over input batches, compute loss and do back-propagation.

    :param data_loader: Pytorch DataLoader containing word2vec model inputs.
    :param model: Word2Vec pytorch model.
    :param optimizer: Pytorch Optimizer.
    :param device:
    :return: List of loss of each training step.
    """
    loss_values = list()
    model.train()
    for bi, d in enumerate(data_loader):
        center_id = d["center_id"]
        context_id = d["context_id"]

        center_id = center_id.to(device, dtype=torch.long)
        context_id = context_id.to(device, dtype=torch.long)

        optimizer.zero_grad()

        outputs = model(center_id=center_id)

        loss = loss_fn(outputs, context_id)
        loss_values.append(loss.item())

        loss.backward()

        optimizer.step()

    return loss_values
Example #10
0
    def optimizer_step(
        self,
        epoch: int,
        batch_idx: int,
        optimizer: Optimizer,
        optimizer_idx: int,
        optimizer_closure: typing.Optional[typing.Callable] = None,
        on_tpu: bool = False,
        using_native_amp: bool = False,
        using_lbfgs: bool = False,
    ) -> None:
        # warm-up + decay schedule placed here since LARSWrapper is not optimizer class
        # adjust LR of optim contained within LARSWrapper
        if self.lars_wrapper:
            for param_group in optimizer.optim.param_groups:
                param_group["lr"] = self.lr_schedule[self.trainer.global_step]
        else:
            for param_group in optimizer.param_groups:
                if param_group["name"] == "predictor":
                    param_group["lr"] = self.learning_rate
                else:
                    param_group["lr"] = self.lr_schedule[
                        self.trainer.global_step]
            #param_group[0]["lr"]

        # from lightning
        #if self.trainer.amp_backend == AMPType.NATIVE:
        #    optimizer_closure()
        #    self.trainer.scaler.step(optimizer)
        if ((batch_idx + 1) % self.accumulate_grad_batches_custom) == 0:
            if self.trainer.amp_backend == AMPType.APEX:
                optimizer_closure()
                optimizer.step()
            else:
                optimizer.step(closure=optimizer_closure)
def train_epoch(model: nn.Module, loader: DataLoader, optimizer: Optimizer,
                epoch: int) -> Tuple[torch.Tensor, torch.Tensor]:
    log_interval = len(loader) // 10
    device = next(model.parameters()).device
    model.train()
    steps = []
    traces = []
    for batch_idx, (data, target) in enumerate(loader, start=1):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        if batch_idx % log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                epoch,
                batch_idx * len(data),
                len(train_loader.dataset),
                100.0 * batch_idx / len(train_loader),
                loss.item(),
            ))
            steps.append(batch_idx)
            batch_traces = batch_data_matrix_trace(model, data)
            traces.append(batch_traces)
        optimizer.step()
    steps = torch.tensor(steps)
    traces = torch.stack(traces, dim=1)
    return steps, traces
Example #12
0
def train_batch(dsc_model: Discriminator, gen_model: Generator,
                dsc_loss_fn: Callable, gen_loss_fn: Callable,
                dsc_optimizer: Optimizer, gen_optimizer: Optimizer,
                x_data: DataLoader, y_data: DataLoader):
    """
    Trains a GAN for over one batch, updating both the discriminator and
    generator.
    :return: The discriminator and generator losses.
    """

    fake, gen_labels = gen_model.sample(x_data.shape[0], with_grad=True)

    dsc_optimizer.zero_grad()
    # Descriminator on real data
    real_output, real_classes = dsc_model(x_data)  #.view(-1)
    # Descriminator on fake data
    fake_output, fake_classes = dsc_model(fake.detach())

    dsc_loss = dsc_loss_fn(real_output, real_classes, y_data, fake_output,
                           fake_classes, gen_labels)

    dsc_loss.backward()
    dsc_optimizer.step()
    # =======================

    # Train Generator
    gen_optimizer.zero_grad()
    fake_output, fake_classes = dsc_model(fake)
    gen_loss = gen_loss_fn(fake_output, fake_classes, gen_labels)
    #
    gen_loss.backward()
    gen_optimizer.step()
    # ========================

    return dsc_loss.item(), gen_loss.item()
Example #13
0
def train_policy(optimizer: Optimizer, training_info: TrainingInfo,
                 run_params: RunParams):
    """ Trains the policy using the policy gradient method, given the discounted rewards of the latest episode
    Entropy is also taken into account. Each new episode diminishes its importance by run_params.entropy_decay,
    such that the agent will explore at the beginning and tend to explore less and less over time. The agent is
    trained once on all the transitions of the episode (instead of training many times over mini-batches).
    """
    training_info.compute_discounted_rewards()

    # Compute the loss of the policy at each time step
    policy_losses = []
    for log_prob, discounted_reward, entropy in zip(
            training_info.log_probs, training_info.discounted_rewards,
            training_info.entropies):
        entropy_coeff = run_params.entropy_coeff * run_params.entropy_decay**training_info.episode_number
        policy_losses.append(-(log_prob + entropy_coeff * entropy) *
                             discounted_reward)

    # Optimize the policy
    optimizer.zero_grad()
    total_policy_loss = torch.cat(policy_losses).sum()
    total_policy_loss.backward()
    optimizer.step()

    # Reset the state of the episode
    training_info.reset()
Example #14
0
def train_batch(policy: SimplePolicyContinuous, states: List[torch.Tensor],
                actions: List[torch.Tensor],
                discounted_rewards: List[torch.Tensor], optimizer: Optimizer,
                episode_number: int, run_params: RunParams):
    """ Trains the policy using the policy gradient method using a single mini-batch of transitions.
    Entropy is also taken into account. Each new episode diminishes its importance by run_params.entropy_decay,
    such that the agent will explore at the beginning and tend to explore less and less over time"""
    optimizer.zero_grad()

    policy_losses = []
    for (state, action, discounted_reward) in zip(states, actions,
                                                  discounted_rewards):
        state = state.float().unsqueeze(0)

        if run_params.continuous_actions:
            mu, sigma = policy.forward(state)
            n = Normal(mu, sigma)
        else:
            probs = policy.forward(state)
            n = Categorical(probs)
        policy_losses.append(
            -(n.log_prob(action) + 0.99**episode_number * n.entropy()) *
            discounted_reward)

    total_policy_loss = torch.cat(policy_losses).sum()
    total_policy_loss.backward()
    optimizer.step()
Example #15
0
 def optimizer_step(
     self,
     epoch: int,
     batch_idx: int,
     optimizer: Optimizer,
     optimizer_idx: int,
     optimizer_closure: typing.Optional[typing.Callable] = None,
     on_tpu: bool = False,
     using_native_amp: bool = False,
     using_lbfgs: bool = False,
 ) -> None:
     # warm-up + decay schedule placed here since LARSWrapper is not optimizer class
     # adjust LR of optim contained within LARSWrapper
     new_learning_rate = self._get_latest_lr()
     if self.lars_wrapper:
         for param_group in optimizer.optim.param_groups:
             param_group["lr"] = new_learning_rate
     else:
         for param_group in optimizer.param_groups:
             param_group["lr"] = new_learning_rate
     if self.trainer.amp_backend == AMPType.APEX:
         optimizer_closure()
         optimizer.step()
     else:
         optimizer.step(closure=optimizer_closure)
 def train_step(self, x: torch.Tensor, y: torch.Tensor, support_set,
                optimizer: Optimizer):
     optimizer.zero_grad()
     loss, y_pred = self.compute_loss(x, y, support_set)
     loss.backward()
     optimizer.step()
     return loss, y_pred
Example #17
0
            def after_loss_fn_new(_input: torch.Tensor, _label: torch.Tensor, _output: torch.Tensor,
                                  loss: torch.Tensor, optimizer: Optimizer, loss_fn: Callable[..., torch.Tensor] = None,
                                  amp: bool = False, scaler: torch.cuda.amp.GradScaler = None, **kwargs):
                noise = torch.zeros_like(_input)
                adv_loss_fn = functools.partial(self.adv_loss, _label=_label)

                for m in range(self.pgd.iteration):
                    if amp:
                        scaler.step(optimizer)
                        scaler.update()
                    else:
                        optimizer.step()
                    self.eval()
                    adv_x, _ = self.pgd.optimize(_input=_input, noise=noise,
                                                 loss_fn=adv_loss_fn,
                                                 iteration=1, epsilon=adv_train_epsilon)
                    self.train()
                    loss = loss_fn(adv_x, _label)
                    if callable(after_loss_fn_old):
                        after_loss_fn_old(_input=_input, _label=_label, _output=_output,
                                          loss=loss, optimizer=optimizer, loss_fn=loss_fn,
                                          amp=amp, scaler=scaler, **kwargs)
                    if amp:
                        scaler.scale(loss).backward()
                    else:
                        loss.backward()
Example #18
0
def lr_find(model: UNet,
            data_loader,
            optimizer: Optimizer,
            criterion,
            use_gpu,
            min_lr=0.0001,
            max_lr=0.1):
    # Save model and optimizer states to revert
    model_state = model.state_dict()
    optimizer_state = optimizer.state_dict()

    losses = []
    lrs = []
    scheduler = CyclicExpLR(optimizer,
                            min_lr,
                            max_lr,
                            step_size_up=100,
                            mode='triangular',
                            cycle_momentum=True)
    model.train()
    for i, (data, target, class_ids) in enumerate(data_loader):
        data, target = data, target

        if use_gpu:
            data = data.cuda()
            target = target.cuda()

        optimizer.zero_grad()
        output_raw = model(data)
        # This step is specific for this project
        output = torch.zeros(output_raw.shape[0], 1, output_raw.shape[2],
                             output_raw.shape[3])

        if use_gpu:
            output = output.cuda()

        # This step is specific for this project
        for idx, (raw_o, class_id) in enumerate(zip(output_raw, class_ids)):
            output[idx] = raw_o[class_id - 1]

        loss = criterion(output, target)
        loss.backward()
        current_lr = optimizer.param_groups[0]['lr']
        # Stop if lr stopped increasing
        if len(lrs) > 0 and current_lr < lrs[-1]:
            break
        lrs.append(current_lr)
        losses.append(loss.item())
        optimizer.step()
        scheduler.step()

    # Plot in log scale
    plt.plot(lrs, losses)
    plt.xscale('log')

    plt.show()

    model.load_state_dict(model_state)
    optimizer.load_state_dict(optimizer_state)
Example #19
0
def train(args, model: SentimentAnalysisModel, train_loader: DataLoader,
          optimizer: Optimizer, epoch: int, device_: device):
    global eps_threshold_hit

    model = model.train().to(device_)
    criterion = nn.CrossEntropyLoss()
    losses = []
    accuracies = []

    virtual_batch_rate = VIRTUAL_BATCH_SIZE / BATCH_SIZE

    for idx, batch in enumerate(tqdm(train_loader)):
        ids = batch['input_ids'].to(device_, dtype=torch.long)
        mask = batch['attention_mask'].to(device_, dtype=torch.long)
        # token_type_ids = batch['token_type_ids'].to(device_, dtype = torch.long)
        targets = batch['label'].to(device_, dtype=torch.long)
        decoder_input_ids = batch['decoder_input_ids'].to(device_,
                                                          dtype=torch.long)

        optimizer.zero_grad()
        predictions = model(input_ids=ids,
                            attention_mask=mask,
                            decoder_input_ids=decoder_input_ids)
        loss = criterion(predictions, targets)
        acc = binary_accuracy(predictions, targets)

        loss.backward()

        if args.eps_threshold is not None:
            # do virtual stepping to improve performance
            if (idx + 1
                ) % virtual_batch_rate == 0 or idx == len(train_loader) - 1:
                optimizer.step()
                optimizer.zero_grad()
            else:
                optimizer.virtual_step()
        else:
            optimizer.step()

        losses.append(loss.item())
        accuracies.append(acc.item())

    if args.eps_threshold is not None:
        epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent()
        print(f"Train Epoch: {epoch} \t"
              f"Train Loss: {np.mean(losses):.6f} "
              f"Train Accuracy: {np.mean(accuracies):.6f} "
              f"(ε = {epsilon:.2f}, δ = {1e-06}) for α = {best_alpha}")

        # stop training if eps >= eps_threshold
        eps_threshold_hit = epsilon >= args.eps_threshold

        if eps_threshold_hit:
            print('Hit epsilon threshold, stopping training.')

    else:
        print(
            f'Train epoch: {epoch} \t Avg Loss: {np.mean(losses)} \t Avg Accuracy: {np.mean(accuracies)}'
        )
Example #20
0
    def event_loop_across_minibatches(self, lm_dataloader: DataLoader,
                                      criterion: nn.Module,
                                      optimizer: Optimizer,
                                      transform_logger_object: Any) -> None:
        activations: Dict[int, Batch] = dict()
        num_microbatch = len(lm_dataloader)
        num_activations = 0
        num_gradients = 0

        ranks = get_pipeline_parallel_ranks()  # for warmup phase
        N = len(ranks)
        cur_rank = torch.distributed.get_rank()

        # warmup phase (forward passes)
        # cur_rank worker will do (max_rank - cur_rank) forward passes
        n_warmup = ranks[-1] - cur_rank
        for _ in range(n_warmup):
            if self.weight_prediction:
                optimizer.update_weight_using_future_predictions(
                    cur_rank, N, forward=True)  # type: ignore
            message = self.event_loop_trunk_forward_helper(activations)
            self.transport.send_message(message, sync=True)
            num_activations += 1

        # common loop for remanining items in the warmup phase and steady phase
        while num_activations < num_microbatch:
            # 1 Forward
            if self.weight_prediction:
                optimizer.update_weight_using_future_predictions(
                    cur_rank, N, forward=True)  # type: ignore
            message = self.event_loop_trunk_forward_helper(activations)

            num_activations += 1
            # 1 Backward
            if self.weight_prediction:
                optimizer.update_weight_using_future_predictions(
                    cur_rank, N, forward=False)  # type: ignore
            self.event_loop_trunk_backward_helper(activations)
            num_gradients += 1
            if self.perform_optimizer_step(optimizer, num_gradients):
                optimizer.step()
                optimizer.zero_grad()
                transform_logger_object.check_and_save_weights(num_gradients)

            self.transport.send_message(message, sync=True)

        # remaining backwards
        remaining = len(activations)
        for _ in range(remaining):
            if self.weight_prediction:
                optimizer.update_weight_using_future_predictions(
                    cur_rank, N, forward=False)  # type: ignore
            self.event_loop_trunk_backward_helper(activations)
            num_gradients += 1
            if self.perform_optimizer_step(optimizer, num_gradients):
                optimizer.step()
                optimizer.zero_grad()
                transform_logger_object.check_and_save_weights(num_gradients)
Example #21
0
def train_step(dataloader: DataLoader,
               netD: nn.Module,
               netG: nn.Module,
               optimizerD: Optimizer,
               optimizerG: Optimizer,
               generator_criterion_loss,
               idx_epoch,
               num_epochs,
               num_print: int = 5) -> dict:
    netG.train()
    netD.train()
    results = {'d_loss': 0, 'g_loss': 0, 'd_score': 0, 'g_score': 0}
    batch_sizes = 0
    num_samples = len(dataloader)
    step_ = int(math.ceil(num_samples / num_print))
    t1 = time.time()
    for idx_train, data_train in enumerate(dataloader):
        # (0) get lr/hr data
        data_lr, data_hr_target = data_train['lr'], data_train['hr']
        batch_size = data_lr.size(0)
        batch_sizes += batch_size
        # (1) Update D network: maximize D(x)-1-D(G(z))
        z = x_preprocess(data_lr, to_device=to_device)
        real_img = x_preprocess(data_hr_target, to_device=to_device)
        fake_img = netG(z)
        #
        netD.zero_grad()
        real_out = netD(real_img).mean()
        fake_out = netD(fake_img).mean()
        d_loss = 1 - real_out + fake_out
        d_loss.backward(retain_graph=True)
        optimizerD.step()
        # (2) Update G network: minimize 1-D(G(z)) + Perception Loss + Image Loss + TV Loss
        netG.zero_grad()
        g_loss = generator_criterion_loss(fake_out, fake_img, real_img)
        g_loss.backward()
        optimizerG.step()
        fake_img = netG(z)
        fake_out = netD(fake_img).mean()
        # (3)
        g_loss = generator_criterion_loss(fake_out, fake_img, real_img)
        results['g_loss'] += float(g_loss) * batch_size
        d_loss = 1 - real_out + fake_out
        results['d_loss'] += float(d_loss) * batch_size
        results['d_score'] += float(real_out) * batch_size
        results['g_score'] += float(fake_out) * batch_size
        if (idx_train % step_) == 0:
            str_desc = ' * Loss_D: {:0.4f} Loss_G: {:0.4f} D(x): {:0.4f} D(G(z)): {:0.4f}'\
                .format(results['d_loss'], results['g_loss'], results['d_score'], results['g_score'])
            print('(TRN) [{}/{}] [{}/{}] -> {}'.format(idx_epoch, num_epochs,
                                                       idx_train, num_samples,
                                                       str_desc))
    dt = time.time() - t1
    results = {k: v / batch_sizes for k, v in results.items()}
    tmp_ = ', '.join(['{}: {:0.2f}'.format(k, v) for k, v in results.items()])
    print(' (TRAIN) ({}/{}) dt ~{:0.2f} (s), {}'.format(
        idx_epoch, num_epochs, dt, tmp_))
    return results
Example #22
0
    def event_loop_tail_across_minibatches(
        self, lm_dataloader: DataLoader, criterion: nn.Module, optimizer: Optimizer, transform_logger_object: Any
    ) -> None:
        # handles one epoch

        cur_rank = self.group.rank()
        N = len(get_pipeline_parallel_ranks())
        num_batches = len(lm_dataloader)
        lm_iter = enumerate(lm_dataloader)
        # last partition -> one forward / one backward -> no warmup
        count = 0
        num_gradients = 0
        activations = dict()

        log_interval = 1
        word_counter = 0
        total_loss = 0

        while True:
            try:
                start_time = time.time()
                microbatch_index, cur_batch = next(lm_iter)
                reqd_target = transform_logger_object.transform_target(cur_batch).to(self.input_device)

                # one forward
                message = self.transport.recv_message_header(EVENT_LOOP_ACTIVATIONS_QUEUE)
                args: AsyncMessageBody = message.args
                assert args.microbatch_index == count
                batch = self.get_batch_from_message(message, EVENT_LOOP_GRADIENTS_QUEUE)

                if self.weight_prediction:
                    optimizer.update_weight_using_future_predictions(cur_rank, N, forward=True)
                task = create_task_without_skip_trackers(
                    self.checkpoint_stop, args.microbatch_index, self.group.rank(), batch, self.partitions[0].module,
                )
                output = task.compute()
                activations[args.microbatch_index] = output
                task.finalize(output)
                # one backward
                if self.weight_prediction:
                    optimizer.update_weight_using_future_predictions(cur_rank, N, forward=False)

                output_tensor = transform_logger_object.transform_output_before_loss(output.tensor)
                loss = criterion(output_tensor, reqd_target)
                loss.backward()
                count += 1
                num_gradients += 1

                if self.perform_optimizer_step(optimizer, num_gradients):
                    optimizer.step()
                    optimizer.zero_grad()
                    transform_logger_object.check_and_save_weights(num_gradients)

                transform_logger_object.log_loss(cur_batch, loss, count)
                del loss
                del activations[args.microbatch_index]
            except StopIteration:
                break
Example #23
0
def run_epoch(model: torch.nn.Module, loader: DataLoader, criterion: nn.modules.loss._Loss,
              gt_former: GroundTruthFormer, epoch: int, mode: str = 'train', writer: SummaryWriter = None,
              optimizer: Optimizer = None, n_dumps_per_epoch: int = 10, train_loader_size: int = None,
              device: Union[torch.device, str] = torch.device('cpu')) -> Optional[Tuple[float, float]]:
    """
    Run one epoch for model. Can be used for both training and validation.
    :param model: pytorch model to be trained or validated
    :param loader: data loader to run model on batches
    :param criterion: callable class to calculate loss
    :param gt_former: callable class to form ground truth data to compute loss
    :param epoch: number of current epoch
    :param mode: `train` or `val', controls model parameters update need
    :param writer: tensorboard writer
    :param optimizer: pytorch model parameters optimizer
    :param n_dumps_per_epoch: how many times per epoch to dump images to tensorboard (not implemented yet)
    :param train_loader_size: number of objects in the train loader, needed for plots scaling in val mode
    :param device: device to be used for model related computations
    :return: values for cumulative loss and score (only in 'val' mode)
    """
    if mode == 'train':
        model.train()
    elif mode == 'val':
        model.eval()
        cumulative_loss, cumulative_score = 0, 0
    else:
        raise ValueError(f'Unknown mode: {mode}')

    for i, (frames, bboxes) in enumerate(tqdm(loader, desc="Batch", leave=False)):
        frames = frames.to(device)
        bboxes = [bbox.to(device) for bbox in bboxes]
        preds = model(frames)
        gt_data = gt_former.form_gt(bboxes)
        loss = criterion(preds, gt_data)
        score = pr_auc(gt_data[0], preds[0])
        if mode == 'train':
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if writer is not None:
                writer.add_scalar('Loss', loss.item(), epoch * len(loader) + i)
                writer.add_scalar('Score', score, epoch * len(loader) + i)
        else:
            cumulative_loss += loss.item()
            cumulative_score += score
    if mode == 'val':
        if train_loader_size is not None:
            # scales val data to train data on the plots
            iterations = epoch * train_loader_size + loader.batch_size
        else:
            iterations = epoch * len(loader) + loader.batch_size
        cumulative_loss /= len(loader)
        cumulative_score /= len(loader)
        if writer is not None:
            writer.add_scalar('Loss', cumulative_loss, iterations)
            writer.add_scalar('Score', cumulative_score, iterations)
        return cumulative_loss, cumulative_score
Example #24
0
 def step(self, optimizer: Optimizer):
     if self.is_distributed:
         self.average_gradients(self._model)
         #TODO: Maybe we dont need to average every step ?
     if self.fp16:
         self._scaler.step(optimizer)
         self._scaler.update()
     else:
         optimizer.step()
     optimizer.zero_grad()
Example #25
0
 def _update_params(self,
                    docs: Sequence[Doc],
                    optimizer: Optimizer,
                    verbose: bool = False):
     loss = get_loss_from_docs(docs)
     optimizer.zero_grad()
     loss.backward()
     optimizer.step()
     if verbose:
         logger.info(f"Loss: {loss.detach().item()}")
Example #26
0
def train_one_epoch(
    model: Module,
    optimizer: Optimizer,
    data_loader: DataLoader,
    device: device,
    epoch: int,
    print_freq: int,
) -> MetricLogger:
    """Trains Faster R-CNN for one epoch on the data loader.

    Parameters
    ----------
    model : Module
        Model to train.
    optimizer : Optimizer
        Selected optimizer which updates weights of the model
    data_loader : DataLoader
        Train data.
    device : device
        Device on which is the model.
    epoch : int
        The number of the training epoch.
    print_freq : int
        The printing frequency during the training.

    Returns
    -------
    MetricLogger:
        Statistics about the training epoch.
    """
    model.train()
    metric_logger = MetricLogger(delimiter="  ")
    metric_logger.add_meter("lr",
                            SmoothedValue(window_size=1, fmt="{value:.6f}"))
    header = "Epoch: [{}]".format(epoch)

    for images, targets in metric_logger.log_every(data_loader, print_freq,
                                                   header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        loss_dict_reduced = reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

    return metric_logger
Example #27
0
def train(train_loader: DataLoader,
          model: nn.Module,
          criterion: nn.Module,
          optimizer: Optimizer,
          epoch: int,
          world_size: int):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):

        # measure data loading time
        data_time.update(time.time() - end)

        # Create non_blocking tensors for distributed training
        input = input.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        # compute output
        logits = model(input)
        loss = criterion(logits, target)

        # compute gradients in a backward pass
        optimizer.zero_grad()
        loss.backward()

        # Call step of optimizer to update model params
        optimizer.step()

       # Measure accuracy
        prec1, prec5 = accuracy(logits.data, target.data, topk=(1, 5))

            # Average loss and accuracy across processes for logging
        reduced_loss = reduce_tensor(loss.data, world_size)
        prec1 = reduce_tensor(prec1, world_size)
        prec5 = reduce_tensor(prec5, world_size)

        # to_python_float incurs a host<->device sync
        batch_size = input[0].size(0)
        losses.update(to_python_float(reduced_loss), batch_size)
        top1.update(to_python_float(prec1), batch_size)
        top5.update(to_python_float(prec5), batch_size)

        torch.cuda.synchronize()
        batch_time.update((time.time() - end))
        end = time.time()

    return losses.avg,top1.avg,top5.avg,batch_time.sum
Example #28
0
def train_batch(dsc_model: Discriminator, gen_model: Generator,
                dsc_loss_fn: Callable, gen_loss_fn: Callable,
                dsc_optimizer: Optimizer, gen_optimizer: Optimizer,
                x_data: DataLoader):
    """
    Trains a GAN for over one batch, updating both the discriminator and
    generator.
    :return: The discriminator and generator losses.
    """

    # TODO: Discriminator update
    #  1. Show the discriminator real and generated data
    #  2. Calculate discriminator loss
    #  3. Update discriminator parameters
    # ====== YOUR CODE: ======
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    dsc_model.to(device)

    y_pred = dsc_model(x_data)

    num_to_sample = y_pred.shape[0]
    samples = gen_model.sample(num_to_sample, False)
    generated = dsc_model(samples)

    dsc_loss = dsc_loss_fn(y_pred, generated)

    dsc_optimizer.zero_grad()
    dsc_loss.backward(retain_graph=True)
    dsc_optimizer.step()
    # ========================

    # TODO: Generator update
    #  1. Show the discriminator generated data
    #  2. Calculate generator loss
    #  3. Update generator parameters
    # ====== YOUR CODE: ======
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    gen_model.to(device)

    y_pred  = gen_model(x_data)

    num_to_sample = y_pred.shape[0]
    samples = gen_model.sample(num_to_sample, True)
    generated = dsc_model(samples)

    gen_loss = gen_loss_fn(generated)

    gen_optimizer.zero_grad()
    gen_loss.backward(retain_graph=True)
    gen_optimizer.step()
    # ========================

    return dsc_loss.item(), gen_loss.item()
Example #29
0
def train_function(
    config: Any,
    engine: Engine,
    batch: Any,
    model: torch.nn.Module,
    loss_fn: torch.nn.Module,
    optimizer: Optimizer,
    device: torch.device,
):
    """Model training step.

    Parameters
    ----------
    config
        config object
    engine
        Engine instance
    batch
        batch in current iteration
    model
        nn.Module model
    loss_fn
        nn.Module loss
    optimizer
        torch optimizer
    device
        device to use for training

    Returns
    -------
    {INSERT HERE}
    """

    model.train()

    samples = batch[0].to(device, non_blocking=True)
    targets = batch[1].to(device, non_blocking=True)

    with autocast(enabled=config.use_amp):
        outputs = model(samples)
        loss = loss_fn(outputs, targets)

    loss.backward()
    engine.state.backward_completed += 1
    engine.fire_event(TrainEvents.BACKWARD_COMPLETED)

    optimizer.step()
    engine.state.optim_step_completed += 1
    engine.fire_event(TrainEvents.OPTIM_STEP_COMPLETED)

    optimizer.zero_grad()

    loss_value = loss.item()
    engine.state.metrics = {"epoch": engine.state.epoch, "train_loss": loss_value}
    return loss_value
Example #30
0
    def optimize(opt: Optimizer, loss: torch.Tensor):
        """
        Optimize the parameters based on the loss and the optimizer.

        Args:
            opt: optimizer
            loss: loss, a scalar
        """
        opt.zero_grad()
        loss.backward()
        opt.step()