def train_policy_on_episode(optimizer: Optimizer, training_info: TrainingInfo, episode_number: int): """ Trains both the actor and the critic using all transitions of the latest episode. The actor's loss is the MSE between V(state) and reward + gamma * V(next state), where V indicates the actor's value function. The actor / policy is trained by maximizing the log probability * td-error, and an entropy term is added to encourage exploration. The entropy is decayed at new each episode by the run_params.entropy_decay coefficient. """ training_info.compute_discounted_rewards() # Compute the loss of the policy and the critic at each time step policy_losses = [] # Policy errors value_losses = [] # Critic errors for log_prob, discounted_reward, state_value, entropy in zip( training_info.log_probs, training_info.discounted_rewards, training_info.state_values, training_info.entropies): advantage = discounted_reward - state_value.item() policy_losses.append(-(log_prob + 0.99**episode_number * entropy) * advantage) value_losses.append( F.smooth_l1_loss(state_value.squeeze(0), torch.tensor([discounted_reward]))) # Optimize the policy optimizer.zero_grad() total_policy_loss = torch.stack(policy_losses).sum() + torch.stack( value_losses).sum() total_policy_loss.backward() optimizer.step() # Reset the state of the episode training_info.reset()
def train_policy_on_step(critic: SimpleCritic, optimizer: Optimizer, reward: float, state: np.ndarray, next_state: np.ndarray, gamma: float, log_prob: float, entropy: float, episode_number: int, run_params: RunParams): """ Trains both the actor and the critic using the given transition. The actor's loss is the MSE between V(state) and reward + gamma * V(next state), where V indicates the actor's value function. The actor / policy is trained by maximizing the log probability * td-error, and an entropy term is added to encourage exploration. The entropy is decayed at new each episode by the run_params.entropy_decay coefficient. """ # Inspired from https://gym.openai.com/evaluations/eval_gUhDnmlbTKG1qW0jS6HSg/ state, next_state = prepare_state(state), prepare_state(next_state) state_value_target = reward + gamma * critic.forward(next_state) state_value_prediction = critic.forward(state) td_error = state_value_target - state_value_prediction # Update policy optimizer.zero_grad() loss = -(log_prob + run_params.entropy_coeff * run_params.entropy_decay**episode_number * entropy) * td_error loss += F.mse_loss(state_value_prediction, state_value_target) loss.backward() optimizer.step()
def train_steps( self, optimizer: Optimizer, triplet_dataset: FederatedTripletsDataset) -> TrainStepResults: losses: List[float] = [] local_step: int = 0 triplet_loader = DataLoader(triplet_dataset, batch_size=self.settings.batch_size, shuffle=True) for triplets in triplet_loader: # Calculate triplet loss triplet_loss = self.loss_fn( anchor=self.model(triplets["anchor"].cuda()), positive=self.model(triplets["positive"].cuda()), negative=self.model(triplets["negative"].cuda()), ).cuda() # Backward pass optimizer.zero_grad() triplet_loss.backward() optimizer.step() self.global_step += 1 local_step += 1 losses.append(triplet_loss.item()) loss_mean = sum(losses) / len(losses) return TrainStepResults(loss_mean, local_step)
def train_epoch(model: nn.Module, loader: DataLoader, optimizer: Optimizer, epoch: int) -> Tuple[torch.Tensor, torch.Tensor]: log_interval = len(loader) // 10 device = next(model.parameters()).device model.train() steps = [] traces = [] for batch_idx, (data, target) in enumerate(loader, start=1): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) loss.backward() if batch_idx % log_interval == 0: print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format( epoch, batch_idx * len(data), len(train_loader.dataset), 100.0 * batch_idx / len(train_loader), loss.item(), )) steps.append(batch_idx) batch_traces = batch_data_matrix_trace(model, data) traces.append(batch_traces) optimizer.step() steps = torch.tensor(steps) traces = torch.stack(traces, dim=1) return steps, traces
def train(loader: DataLoader, network: nn.Module, optimizer: Optimizer, epoch: int, log_interval: int, state: TrainingState) -> None: network.train() for batch_idx, (data, target) in enumerate(loader): # manually set all gradients to zero optimizer.zero_grad() # produce the network's output (forward pass) output = network(data) # compute negative log-likelihood loss between # the output and the ground truth label loss = F.nll_loss(output, target) # collect a new set of gradients and # backprogpagate to network parameters loss.backward() optimizer.step() if batch_idx % log_interval == 0: print('Train Epoch: {} [{}/{} ({:0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(loader.dataset), 100.0 * batch_idx / len(loader), loss.item())) count = (batch_idx * 64) + ((epoch - 1) * len(loader.dataset)) state.update(network, optimizer, loss.item(), count)
def fnTrain( loader: DataLoader, device: str, model: nn.Module, optimizer: Optimizer, fnLoss, scaler: GradScaler, ) -> float: runningLoss = 0 for _, (data, targets) in enumerate(loader): data = data.to(device=device) targets = targets.float().unsqueeze(1).to(device=device) with torch.cuda.amp.autocast(): predictions = model(data) loss = fnLoss(predictions, targets) optimizer.zero_grad() scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() # print(f"batch {idxBatch+ 1} loss {loss.item()}") runningLoss += loss.item() return runningLoss / len(loader)
def train_loop(data_loader: DataLoader, model: nn.Module, optimizer: Optimizer, device: torch.device) -> List[float]: """ Train loop. Loop in model over input batches, compute loss and do back-propagation. :param data_loader: Pytorch DataLoader containing word2vec model inputs. :param model: Word2Vec pytorch model. :param optimizer: Pytorch Optimizer. :param device: :return: List of loss of each training step. """ loss_values = list() model.train() for bi, d in enumerate(data_loader): center_id = d["center_id"] context_id = d["context_id"] center_id = center_id.to(device, dtype=torch.long) context_id = context_id.to(device, dtype=torch.long) optimizer.zero_grad() outputs = model(center_id=center_id) loss = loss_fn(outputs, context_id) loss_values.append(loss.item()) loss.backward() optimizer.step() return loss_values
def train_step(self, x: torch.Tensor, y: torch.Tensor, support_set, optimizer: Optimizer): optimizer.zero_grad() loss, y_pred = self.compute_loss(x, y, support_set) loss.backward() optimizer.step() return loss, y_pred
def _fit_epoch(self, g_optim: Optimizer, d_optim: Optimizer) -> TensorTuple: r""" Trains a single entire epoch :param g_optim: Generator optimizer :param d_optim: Discriminator optimizer :return: Average training loss """ tot_l_g = tot_l_d = 0 num_batch = min(len(self._mal_data.train), len(self._ben_data.train)) for (m, _), (b, _) in zip(self._mal_data.train, self._ben_data.train): if self._is_cuda: m, b = m.cuda(), b.cuda() m_prime, g_theta = self._gen.forward(m) l_g = self._calc_gen_loss(g_theta) g_optim.zero_grad() l_g.backward() # torch.nn.utils.clip_grad_value_(l_g, 1) g_optim.step() tot_l_g += l_g # Update the discriminator for x in [m_prime, b]: l_d = self._calc_discrim_loss(x) d_optim.zero_grad() l_d.backward() # torch.nn.utils.clip_grad_value_(l_d, 1) d_optim.step() tot_l_d += l_d # noinspection PyUnresolvedReferences return (tot_l_g / num_batch).item(), (tot_l_d / num_batch).item()
def train_batch(policy: SimplePolicyContinuous, states: List[torch.Tensor], actions: List[torch.Tensor], discounted_rewards: List[torch.Tensor], optimizer: Optimizer, episode_number: int, run_params: RunParams): """ Trains the policy using the policy gradient method using a single mini-batch of transitions. Entropy is also taken into account. Each new episode diminishes its importance by run_params.entropy_decay, such that the agent will explore at the beginning and tend to explore less and less over time""" optimizer.zero_grad() policy_losses = [] for (state, action, discounted_reward) in zip(states, actions, discounted_rewards): state = state.float().unsqueeze(0) if run_params.continuous_actions: mu, sigma = policy.forward(state) n = Normal(mu, sigma) else: probs = policy.forward(state) n = Categorical(probs) policy_losses.append( -(n.log_prob(action) + 0.99**episode_number * n.entropy()) * discounted_reward) total_policy_loss = torch.cat(policy_losses).sum() total_policy_loss.backward() optimizer.step()
def train_batch(dsc_model: Discriminator, gen_model: Generator, dsc_loss_fn: Callable, gen_loss_fn: Callable, dsc_optimizer: Optimizer, gen_optimizer: Optimizer, x_data: DataLoader, y_data: DataLoader): """ Trains a GAN for over one batch, updating both the discriminator and generator. :return: The discriminator and generator losses. """ fake, gen_labels = gen_model.sample(x_data.shape[0], with_grad=True) dsc_optimizer.zero_grad() # Descriminator on real data real_output, real_classes = dsc_model(x_data) #.view(-1) # Descriminator on fake data fake_output, fake_classes = dsc_model(fake.detach()) dsc_loss = dsc_loss_fn(real_output, real_classes, y_data, fake_output, fake_classes, gen_labels) dsc_loss.backward() dsc_optimizer.step() # ======================= # Train Generator gen_optimizer.zero_grad() fake_output, fake_classes = dsc_model(fake) gen_loss = gen_loss_fn(fake_output, fake_classes, gen_labels) # gen_loss.backward() gen_optimizer.step() # ======================== return dsc_loss.item(), gen_loss.item()
def NOLAGattack(self, X_nat, y, delta, Optimizer): #it gets v and delta for batch as inputs if self.dataname in ['MNIST', 'FashionMNIST']: rand_i = torch.from_numpy( np.random.uniform(low=-self.eps, high=self.eps, size=X_nat.size())).to(device) rand_i = rand_i.float() pert = X_nat + rand_i pert = pert + delta pert = torch.where(pert > X_nat + self.eps, X_nat + self.eps, pert) pert = torch.where(pert < X_nat - self.eps, X_nat - self.eps, pert) else: pert = X_nat + delta pert = torch.clamp(pert, 0, 1) randpert = pert - X_nat Optimizer.zero_grad() outputs = self.model(pert) loss = self.criterion(outputs, y) loss.backward() grad = delta.grad.data.clone().detach() ########## Update Delta ############ new_delta = (delta + self.stepsize * (grad)).detach().cpu() new_delta = torch.clamp(new_delta, -self.eps, self.eps) return pert.detach(), pert - X_nat.detach(), new_delta
def train_policy(optimizer: Optimizer, training_info: TrainingInfo, run_params: RunParams): """ Trains the policy using the policy gradient method, given the discounted rewards of the latest episode Entropy is also taken into account. Each new episode diminishes its importance by run_params.entropy_decay, such that the agent will explore at the beginning and tend to explore less and less over time. The agent is trained once on all the transitions of the episode (instead of training many times over mini-batches). """ training_info.compute_discounted_rewards() # Compute the loss of the policy at each time step policy_losses = [] for log_prob, discounted_reward, entropy in zip( training_info.log_probs, training_info.discounted_rewards, training_info.entropies): entropy_coeff = run_params.entropy_coeff * run_params.entropy_decay**training_info.episode_number policy_losses.append(-(log_prob + entropy_coeff * entropy) * discounted_reward) # Optimize the policy optimizer.zero_grad() total_policy_loss = torch.cat(policy_losses).sum() total_policy_loss.backward() optimizer.step() # Reset the state of the episode training_info.reset()
def _fit_epoch(self, generator_optimizer: Optimizer, discriminator_optimizer: Optimizer) -> TensorTuple: total_loss_generator = total_loss_discriminator = 0 num_batch = min(len(self._mal_data.train), len(self._ben_data.train)) for (malware_data, _), (benign_data, _) in zip(self._mal_data.train, self._ben_data.train): if self._is_cuda: malware_data, benign_data = malware_data.cuda( ), benign_data.cuda() m_prime, g_theta = self._gen.forward(malware_data) generator_loss = self._calc_gen_loss(g_theta) generator_optimizer.zero_grad() generator_loss.backward() generator_optimizer.step() total_loss_generator += generator_loss for x in [m_prime, benign_data]: discriminator_loss = self._calc_discrim_loss(x) discriminator_optimizer.zero_grad() discriminator_loss.backward() discriminator_optimizer.step() total_loss_discriminator += discriminator_loss # noinspection PyUnresolvedReferences return (total_loss_generator / num_batch).item(), (total_loss_discriminator / num_batch).item()
def _train_segmenter_epoch( model: torch.nn.Module, optimizer: Optimizer, train_dataloader: DataLoader, val_dataloader: DataLoader) -> Tuple[List[Any], List[Any]]: t_losses, v_losses = [], [] model.train() for x, y in tqdm(train_dataloader): optimizer.zero_grad() preds = model(x) loss = F.binary_cross_entropy(preds, y.unsqueeze(1)) loss.backward() optimizer.step() t_losses.append(loss.item()) with torch.no_grad(): model.eval() for val_x, val_y in tqdm(val_dataloader): val_preds = model(val_x) val_loss = F.binary_cross_entropy(val_preds, val_y.unsqueeze(1)) v_losses.append(val_loss.item()) print(f'Train loss: {np.mean(t_losses)}, Val loss: {np.mean(v_losses)}') return t_losses, v_losses
def train(args, model: SentimentAnalysisModel, train_loader: DataLoader, optimizer: Optimizer, epoch: int, device_: device): global eps_threshold_hit model = model.train().to(device_) criterion = nn.CrossEntropyLoss() losses = [] accuracies = [] virtual_batch_rate = VIRTUAL_BATCH_SIZE / BATCH_SIZE for idx, batch in enumerate(tqdm(train_loader)): ids = batch['input_ids'].to(device_, dtype=torch.long) mask = batch['attention_mask'].to(device_, dtype=torch.long) # token_type_ids = batch['token_type_ids'].to(device_, dtype = torch.long) targets = batch['label'].to(device_, dtype=torch.long) decoder_input_ids = batch['decoder_input_ids'].to(device_, dtype=torch.long) optimizer.zero_grad() predictions = model(input_ids=ids, attention_mask=mask, decoder_input_ids=decoder_input_ids) loss = criterion(predictions, targets) acc = binary_accuracy(predictions, targets) loss.backward() if args.eps_threshold is not None: # do virtual stepping to improve performance if (idx + 1 ) % virtual_batch_rate == 0 or idx == len(train_loader) - 1: optimizer.step() optimizer.zero_grad() else: optimizer.virtual_step() else: optimizer.step() losses.append(loss.item()) accuracies.append(acc.item()) if args.eps_threshold is not None: epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent() print(f"Train Epoch: {epoch} \t" f"Train Loss: {np.mean(losses):.6f} " f"Train Accuracy: {np.mean(accuracies):.6f} " f"(ε = {epsilon:.2f}, δ = {1e-06}) for α = {best_alpha}") # stop training if eps >= eps_threshold eps_threshold_hit = epsilon >= args.eps_threshold if eps_threshold_hit: print('Hit epsilon threshold, stopping training.') else: print( f'Train epoch: {epoch} \t Avg Loss: {np.mean(losses)} \t Avg Accuracy: {np.mean(accuracies)}' )
def lr_find(model: UNet, data_loader, optimizer: Optimizer, criterion, use_gpu, min_lr=0.0001, max_lr=0.1): # Save model and optimizer states to revert model_state = model.state_dict() optimizer_state = optimizer.state_dict() losses = [] lrs = [] scheduler = CyclicExpLR(optimizer, min_lr, max_lr, step_size_up=100, mode='triangular', cycle_momentum=True) model.train() for i, (data, target, class_ids) in enumerate(data_loader): data, target = data, target if use_gpu: data = data.cuda() target = target.cuda() optimizer.zero_grad() output_raw = model(data) # This step is specific for this project output = torch.zeros(output_raw.shape[0], 1, output_raw.shape[2], output_raw.shape[3]) if use_gpu: output = output.cuda() # This step is specific for this project for idx, (raw_o, class_id) in enumerate(zip(output_raw, class_ids)): output[idx] = raw_o[class_id - 1] loss = criterion(output, target) loss.backward() current_lr = optimizer.param_groups[0]['lr'] # Stop if lr stopped increasing if len(lrs) > 0 and current_lr < lrs[-1]: break lrs.append(current_lr) losses.append(loss.item()) optimizer.step() scheduler.step() # Plot in log scale plt.plot(lrs, losses) plt.xscale('log') plt.show() model.load_state_dict(model_state) optimizer.load_state_dict(optimizer_state)
def optimizer_zero_grad(self, epoch: int, batch_idx: int, optimizer: Optimizer, optimizer_idx: int): last_batch = batch_idx == len(self.trainer.train_dataloader) - 1 if last_batch: tensorboard = self.logger.experiment fig = plot_grad_flow(self.named_parameters()) tensorboard.add_figure("Gradient Flow", fig, self.current_epoch) optimizer.zero_grad()
def event_loop_tail_across_minibatches( self, lm_dataloader: DataLoader, criterion: nn.Module, optimizer: Optimizer, transform_logger_object: Any ) -> None: # handles one epoch cur_rank = self.group.rank() N = len(get_pipeline_parallel_ranks()) num_batches = len(lm_dataloader) lm_iter = enumerate(lm_dataloader) # last partition -> one forward / one backward -> no warmup count = 0 num_gradients = 0 activations = dict() log_interval = 1 word_counter = 0 total_loss = 0 while True: try: start_time = time.time() microbatch_index, cur_batch = next(lm_iter) reqd_target = transform_logger_object.transform_target(cur_batch).to(self.input_device) # one forward message = self.transport.recv_message_header(EVENT_LOOP_ACTIVATIONS_QUEUE) args: AsyncMessageBody = message.args assert args.microbatch_index == count batch = self.get_batch_from_message(message, EVENT_LOOP_GRADIENTS_QUEUE) if self.weight_prediction: optimizer.update_weight_using_future_predictions(cur_rank, N, forward=True) task = create_task_without_skip_trackers( self.checkpoint_stop, args.microbatch_index, self.group.rank(), batch, self.partitions[0].module, ) output = task.compute() activations[args.microbatch_index] = output task.finalize(output) # one backward if self.weight_prediction: optimizer.update_weight_using_future_predictions(cur_rank, N, forward=False) output_tensor = transform_logger_object.transform_output_before_loss(output.tensor) loss = criterion(output_tensor, reqd_target) loss.backward() count += 1 num_gradients += 1 if self.perform_optimizer_step(optimizer, num_gradients): optimizer.step() optimizer.zero_grad() transform_logger_object.check_and_save_weights(num_gradients) transform_logger_object.log_loss(cur_batch, loss, count) del loss del activations[args.microbatch_index] except StopIteration: break
def event_loop_across_minibatches(self, lm_dataloader: DataLoader, criterion: nn.Module, optimizer: Optimizer, transform_logger_object: Any) -> None: activations: Dict[int, Batch] = dict() num_microbatch = len(lm_dataloader) num_activations = 0 num_gradients = 0 ranks = get_pipeline_parallel_ranks() # for warmup phase N = len(ranks) cur_rank = torch.distributed.get_rank() # warmup phase (forward passes) # cur_rank worker will do (max_rank - cur_rank) forward passes n_warmup = ranks[-1] - cur_rank for _ in range(n_warmup): if self.weight_prediction: optimizer.update_weight_using_future_predictions( cur_rank, N, forward=True) # type: ignore message = self.event_loop_trunk_forward_helper(activations) self.transport.send_message(message, sync=True) num_activations += 1 # common loop for remanining items in the warmup phase and steady phase while num_activations < num_microbatch: # 1 Forward if self.weight_prediction: optimizer.update_weight_using_future_predictions( cur_rank, N, forward=True) # type: ignore message = self.event_loop_trunk_forward_helper(activations) num_activations += 1 # 1 Backward if self.weight_prediction: optimizer.update_weight_using_future_predictions( cur_rank, N, forward=False) # type: ignore self.event_loop_trunk_backward_helper(activations) num_gradients += 1 if self.perform_optimizer_step(optimizer, num_gradients): optimizer.step() optimizer.zero_grad() transform_logger_object.check_and_save_weights(num_gradients) self.transport.send_message(message, sync=True) # remaining backwards remaining = len(activations) for _ in range(remaining): if self.weight_prediction: optimizer.update_weight_using_future_predictions( cur_rank, N, forward=False) # type: ignore self.event_loop_trunk_backward_helper(activations) num_gradients += 1 if self.perform_optimizer_step(optimizer, num_gradients): optimizer.step() optimizer.zero_grad() transform_logger_object.check_and_save_weights(num_gradients)
def run_epoch(model: torch.nn.Module, loader: DataLoader, criterion: nn.modules.loss._Loss, gt_former: GroundTruthFormer, epoch: int, mode: str = 'train', writer: SummaryWriter = None, optimizer: Optimizer = None, n_dumps_per_epoch: int = 10, train_loader_size: int = None, device: Union[torch.device, str] = torch.device('cpu')) -> Optional[Tuple[float, float]]: """ Run one epoch for model. Can be used for both training and validation. :param model: pytorch model to be trained or validated :param loader: data loader to run model on batches :param criterion: callable class to calculate loss :param gt_former: callable class to form ground truth data to compute loss :param epoch: number of current epoch :param mode: `train` or `val', controls model parameters update need :param writer: tensorboard writer :param optimizer: pytorch model parameters optimizer :param n_dumps_per_epoch: how many times per epoch to dump images to tensorboard (not implemented yet) :param train_loader_size: number of objects in the train loader, needed for plots scaling in val mode :param device: device to be used for model related computations :return: values for cumulative loss and score (only in 'val' mode) """ if mode == 'train': model.train() elif mode == 'val': model.eval() cumulative_loss, cumulative_score = 0, 0 else: raise ValueError(f'Unknown mode: {mode}') for i, (frames, bboxes) in enumerate(tqdm(loader, desc="Batch", leave=False)): frames = frames.to(device) bboxes = [bbox.to(device) for bbox in bboxes] preds = model(frames) gt_data = gt_former.form_gt(bboxes) loss = criterion(preds, gt_data) score = pr_auc(gt_data[0], preds[0]) if mode == 'train': optimizer.zero_grad() loss.backward() optimizer.step() if writer is not None: writer.add_scalar('Loss', loss.item(), epoch * len(loader) + i) writer.add_scalar('Score', score, epoch * len(loader) + i) else: cumulative_loss += loss.item() cumulative_score += score if mode == 'val': if train_loader_size is not None: # scales val data to train data on the plots iterations = epoch * train_loader_size + loader.batch_size else: iterations = epoch * len(loader) + loader.batch_size cumulative_loss /= len(loader) cumulative_score /= len(loader) if writer is not None: writer.add_scalar('Loss', cumulative_loss, iterations) writer.add_scalar('Score', cumulative_score, iterations) return cumulative_loss, cumulative_score
def step(self, optimizer: Optimizer): if self.is_distributed: self.average_gradients(self._model) #TODO: Maybe we dont need to average every step ? if self.fp16: self._scaler.step(optimizer) self._scaler.update() else: optimizer.step() optimizer.zero_grad()
def _update_params(self, docs: Sequence[Doc], optimizer: Optimizer, verbose: bool = False): loss = get_loss_from_docs(docs) optimizer.zero_grad() loss.backward() optimizer.step() if verbose: logger.info(f"Loss: {loss.detach().item()}")
def train_function( config: Any, engine: Engine, batch: Any, model: torch.nn.Module, loss_fn: torch.nn.Module, optimizer: Optimizer, device: torch.device, ): """Model training step. Parameters ---------- config config object engine Engine instance batch batch in current iteration model nn.Module model loss_fn nn.Module loss optimizer torch optimizer device device to use for training Returns ------- {INSERT HERE} """ model.train() samples = batch[0].to(device, non_blocking=True) targets = batch[1].to(device, non_blocking=True) with autocast(enabled=config.use_amp): outputs = model(samples) loss = loss_fn(outputs, targets) loss.backward() engine.state.backward_completed += 1 engine.fire_event(TrainEvents.BACKWARD_COMPLETED) optimizer.step() engine.state.optim_step_completed += 1 engine.fire_event(TrainEvents.OPTIM_STEP_COMPLETED) optimizer.zero_grad() loss_value = loss.item() engine.state.metrics = {"epoch": engine.state.epoch, "train_loss": loss_value} return loss_value
def train(train_loader: DataLoader, model: nn.Module, criterion: nn.Module, optimizer: Optimizer, epoch: int, world_size: int): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # Create non_blocking tensors for distributed training input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output logits = model(input) loss = criterion(logits, target) # compute gradients in a backward pass optimizer.zero_grad() loss.backward() # Call step of optimizer to update model params optimizer.step() # Measure accuracy prec1, prec5 = accuracy(logits.data, target.data, topk=(1, 5)) # Average loss and accuracy across processes for logging reduced_loss = reduce_tensor(loss.data, world_size) prec1 = reduce_tensor(prec1, world_size) prec5 = reduce_tensor(prec5, world_size) # to_python_float incurs a host<->device sync batch_size = input[0].size(0) losses.update(to_python_float(reduced_loss), batch_size) top1.update(to_python_float(prec1), batch_size) top5.update(to_python_float(prec5), batch_size) torch.cuda.synchronize() batch_time.update((time.time() - end)) end = time.time() return losses.avg,top1.avg,top5.avg,batch_time.sum
def train_one_epoch( model: Module, optimizer: Optimizer, data_loader: DataLoader, device: device, epoch: int, print_freq: int, ) -> MetricLogger: """Trains Faster R-CNN for one epoch on the data loader. Parameters ---------- model : Module Model to train. optimizer : Optimizer Selected optimizer which updates weights of the model data_loader : DataLoader Train data. device : device Device on which is the model. epoch : int The number of the training epoch. print_freq : int The printing frequency during the training. Returns ------- MetricLogger: Statistics about the training epoch. """ model.train() metric_logger = MetricLogger(delimiter=" ") metric_logger.add_meter("lr", SmoothedValue(window_size=1, fmt="{value:.6f}")) header = "Epoch: [{}]".format(epoch) for images, targets in metric_logger.log_every(data_loader, print_freq, header): images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) loss_dict_reduced = reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) optimizer.zero_grad() losses.backward() optimizer.step() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) return metric_logger
def train_batch(dsc_model: Discriminator, gen_model: Generator, dsc_loss_fn: Callable, gen_loss_fn: Callable, dsc_optimizer: Optimizer, gen_optimizer: Optimizer, x_data: DataLoader): """ Trains a GAN for over one batch, updating both the discriminator and generator. :return: The discriminator and generator losses. """ # TODO: Discriminator update # 1. Show the discriminator real and generated data # 2. Calculate discriminator loss # 3. Update discriminator parameters # ====== YOUR CODE: ====== device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') dsc_model.to(device) y_pred = dsc_model(x_data) num_to_sample = y_pred.shape[0] samples = gen_model.sample(num_to_sample, False) generated = dsc_model(samples) dsc_loss = dsc_loss_fn(y_pred, generated) dsc_optimizer.zero_grad() dsc_loss.backward(retain_graph=True) dsc_optimizer.step() # ======================== # TODO: Generator update # 1. Show the discriminator generated data # 2. Calculate generator loss # 3. Update generator parameters # ====== YOUR CODE: ====== device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') gen_model.to(device) y_pred = gen_model(x_data) num_to_sample = y_pred.shape[0] samples = gen_model.sample(num_to_sample, True) generated = dsc_model(samples) gen_loss = gen_loss_fn(generated) gen_optimizer.zero_grad() gen_loss.backward(retain_graph=True) gen_optimizer.step() # ======================== return dsc_loss.item(), gen_loss.item()
def optimize(opt: Optimizer, loss: torch.Tensor): """ Optimize the parameters based on the loss and the optimizer. Args: opt: optimizer loss: loss, a scalar """ opt.zero_grad() loss.backward() opt.step()
def FGSMattack(self, X_nat, y, Optimizer): X = X_nat Optimizer.zero_grad() with torch.enable_grad(): scores = self.model(X) loss = self.criterion(scores, y) loss.backward() grad = X.grad.data.sign() per = self.eps * grad X = Variable(X.data + per, requires_grad=True) X = Variable(torch.clamp(X, 0, 1.0), requires_grad=True) return X
def train_function( config: Any, engine: Engine, batch: Any, model: torch.nn.Module, loss_fn: torch.nn.Module, optimizer: Optimizer, device: torch.device, ) -> dict: """Model training step. Parameters ---------- config config object engine Engine instance batch batch in current iteration model nn.Module model loss_fn nn.Module loss optimizer torch optimizer device device to use for training Returns ------- training loss dict """ model.train() samples = batch[0].to(device, non_blocking=True) targets = batch[1].to(device, non_blocking=True) with autocast(enabled=config.use_amp): outputs = model(samples) loss = loss_fn(outputs, targets) loss.backward() optimizer.step() optimizer.zero_grad() loss_value = loss.item() engine.state.metrics = { "epoch": engine.state.epoch, "train_loss": loss_value } return {"train_loss": loss_value}