def train(model: MoleculeModel, data_loader: MoleculeDataLoader, loss_func: Callable, optimizer: Optimizer, scheduler: _LRScheduler, args: TrainArgs, n_iter: int = 0, logger: logging.Logger = None, writer: SummaryWriter = None) -> int: """ Trains a model for an epoch. :param model: A :class:`~chemprop.models.model.MoleculeModel`. :param data_loader: A :class:`~chemprop.data.data.MoleculeDataLoader`. :param loss_func: Loss function. :param optimizer: An optimizer. :param scheduler: A learning rate scheduler. :param args: A :class:`~chemprop.args.TrainArgs` object containing arguments for training the model. :param n_iter: The number of iterations (training examples) trained on so far. :param logger: A logger for recording output. :param writer: A tensorboardX SummaryWriter. :return: The total number of iterations (training examples) trained on so far. """ debug = logger.debug if logger is not None else print model.train() loss_sum, iter_count = 0, 0 for batch in tqdm(data_loader, total=len(data_loader)): # Prepare batch batch: MoleculeDataset mol_batch, features_batch, target_batch = batch.batch_graph( ), batch.features(), batch.targets() mask = torch.Tensor([[x is not None for x in tb] for tb in target_batch]) targets = torch.Tensor([[0 if x is None else x for x in tb] for tb in target_batch]) # Run model model.zero_grad() preds = model(mol_batch, features_batch) # Move tensors to correct device mask = mask.to(preds.device) targets = targets.to(preds.device) class_weights = torch.ones(targets.shape, device=preds.device) if args.dataset_type == 'multiclass': targets = targets.long() loss = torch.cat([ loss_func(preds[:, target_index, :], targets[:, target_index]).unsqueeze(1) for target_index in range(preds.size(1)) ], dim=1) * class_weights * mask else: loss = loss_func(preds, targets) * class_weights * mask loss = loss.sum() / mask.sum() loss_sum += loss.item() iter_count += len(batch) loss.backward() if args.grad_clip: nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() if isinstance(scheduler, NoamLR): scheduler.step() n_iter += len(batch) # Log and/or add to tensorboard if (n_iter // args.batch_size) % args.log_frequency == 0: lrs = scheduler.get_lr() pnorm = compute_pnorm(model) gnorm = compute_gnorm(model) loss_avg = loss_sum / iter_count loss_sum, iter_count = 0, 0 lrs_str = ', '.join(f'lr_{i} = {lr:.4e}' for i, lr in enumerate(lrs)) debug( f'Loss = {loss_avg:.4e}, PNorm = {pnorm:.4f}, GNorm = {gnorm:.4f}, {lrs_str}' ) if writer is not None: writer.add_scalar('train_loss', loss_avg, n_iter) writer.add_scalar('param_norm', pnorm, n_iter) writer.add_scalar('gradient_norm', gnorm, n_iter) for i, lr in enumerate(lrs): writer.add_scalar(f'learning_rate_{i}', lr, n_iter) return n_iter
def train(model: MoleculeModel, data_loader: MoleculeDataLoader, loss_func: Callable, optimizer: Optimizer, scheduler: _LRScheduler, args: TrainArgs, n_iter: int = 0, logger: logging.Logger = None, writer: SummaryWriter = None) -> int: """ Trains a model for an epoch. :param model: A :class:`~chemprop.models.model.MoleculeModel`. :param data_loader: A :class:`~chemprop.data.data.MoleculeDataLoader`. :param loss_func: Loss function. :param optimizer: An optimizer. :param scheduler: A learning rate scheduler. :param args: A :class:`~chemprop.args.TrainArgs` object containing arguments for training the model. :param n_iter: The number of iterations (training examples) trained on so far. :param logger: A logger for recording output. :param writer: A tensorboardX SummaryWriter. :return: The total number of iterations (training examples) trained on so far. """ debug = logger.debug if logger is not None else print model.train() loss_sum = iter_count = 0 for batch in tqdm(data_loader, total=len(data_loader), leave=False): # Prepare batch batch: MoleculeDataset mol_batch, features_batch, target_batch, mask_batch, atom_descriptors_batch, atom_features_batch, bond_features_batch, data_weights_batch = \ batch.batch_graph(), batch.features(), batch.targets(), batch.mask(), batch.atom_descriptors(), \ batch.atom_features(), batch.bond_features(), batch.data_weights() mask = torch.tensor(mask_batch, dtype=torch.bool) # shape(batch, tasks) targets = torch.tensor([[0 if x is None else x for x in tb] for tb in target_batch]) # shape(batch, tasks) if args.target_weights is not None: target_weights = torch.tensor(args.target_weights).unsqueeze(0) # shape(1,tasks) else: target_weights = torch.ones(targets.shape[1]).unsqueeze(0) data_weights = torch.tensor(data_weights_batch).unsqueeze(1) # shape(batch,1) if args.loss_function == 'bounded_mse': lt_target_batch = batch.lt_targets() # shape(batch, tasks) gt_target_batch = batch.gt_targets() # shape(batch, tasks) lt_target_batch = torch.tensor(lt_target_batch) gt_target_batch = torch.tensor(gt_target_batch) # Run model model.zero_grad() preds = model(mol_batch, features_batch, atom_descriptors_batch, atom_features_batch, bond_features_batch) # Move tensors to correct device torch_device = preds.device mask = mask.to(torch_device) targets = targets.to(torch_device) target_weights = target_weights.to(torch_device) data_weights = data_weights.to(torch_device) if args.loss_function == 'bounded_mse': lt_target_batch = lt_target_batch.to(torch_device) gt_target_batch = gt_target_batch.to(torch_device) # Calculate losses if args.loss_function == 'mcc' and args.dataset_type == 'classification': loss = loss_func(preds, targets, data_weights, mask) *target_weights.squeeze(0) elif args.loss_function == 'mcc': # multiclass dataset type targets = targets.long() target_losses = [] for target_index in range(preds.size(1)): target_loss = loss_func(preds[:, target_index, :], targets[:, target_index], data_weights, mask[:, target_index]).unsqueeze(0) target_losses.append(target_loss) loss = torch.cat(target_losses).to(torch_device) * target_weights.squeeze(0) elif args.dataset_type == 'multiclass': targets = targets.long() if args.loss_function == 'dirichlet': loss = loss_func(preds, targets, args.evidential_regularization) * target_weights * data_weights * mask else: target_losses = [] for target_index in range(preds.size(1)): target_loss = loss_func(preds[:, target_index, :], targets[:, target_index]).unsqueeze(1) target_losses.append(target_loss) loss = torch.cat(target_losses, dim=1).to(torch_device) * target_weights * data_weights * mask elif args.dataset_type == 'spectra': loss = loss_func(preds, targets, mask) * target_weights * data_weights * mask elif args.loss_function == 'bounded_mse': loss = loss_func(preds, targets, lt_target_batch, gt_target_batch) * target_weights * data_weights * mask elif args.loss_function == 'evidential': loss = loss_func(preds, targets, args.evidential_regularization) * target_weights * data_weights * mask elif args.loss_function == 'dirichlet': # classification loss = loss_func(preds, targets, args.evidential_regularization) * target_weights * data_weights * mask else: loss = loss_func(preds, targets) * target_weights * data_weights * mask loss = loss.sum() / mask.sum() loss_sum += loss.item() iter_count += 1 loss.backward() if args.grad_clip: nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip) optimizer.step() if isinstance(scheduler, NoamLR): scheduler.step() n_iter += len(batch) # Log and/or add to tensorboard if (n_iter // args.batch_size) % args.log_frequency == 0: lrs = scheduler.get_lr() pnorm = compute_pnorm(model) gnorm = compute_gnorm(model) loss_avg = loss_sum / iter_count loss_sum = iter_count = 0 lrs_str = ', '.join(f'lr_{i} = {lr:.4e}' for i, lr in enumerate(lrs)) debug(f'Loss = {loss_avg:.4e}, PNorm = {pnorm:.4f}, GNorm = {gnorm:.4f}, {lrs_str}') if writer is not None: writer.add_scalar('train_loss', loss_avg, n_iter) writer.add_scalar('param_norm', pnorm, n_iter) writer.add_scalar('gradient_norm', gnorm, n_iter) for i, lr in enumerate(lrs): writer.add_scalar(f'learning_rate_{i}', lr, n_iter) return n_iter