def train_routine(cls, best_value, decay_lr, validation_interval, start_epoch, epochs, checkpoint_all_epochs, current_log_folder, **kwargs): """ Performs the training and validatation routines Parameters ---------- best_value : float Best value of the model so far. Non-zero only in case of --resume being used decay_lr : boolean Decay the lr flag validation_interval : int Run evaluation on validation set every N epochs start_epoch : int Int to initialize the starting epoch. Non-zero only in case of --resume being used epochs : int Number of epochs to train checkpoint_all_epochs : bool Save checkpoint at each epoch current_log_folder : string Path to where logs/checkpoints are saved kwargs : dict Any additional arguments. Returns ------- train_value : ndarray[floats] of size (1, `epochs`) Accuracy values for train split val_value : ndarray[floats] of size (1, `epochs`+1) Accuracy values for validation split """ logging.info('Begin training') val_value = np.zeros((epochs + 1 - start_epoch)) train_value = np.zeros((epochs - start_epoch)) # Validate before training val_value[-1] = cls._validate(epoch=-1, **kwargs) for epoch in range(start_epoch, epochs): # Train train_value[epoch] = cls._train(epoch=epoch, **kwargs) # Validate if epoch % validation_interval == 0: val_value[epoch] = cls._validate(epoch=epoch, **kwargs) if decay_lr is not None: adjust_learning_rate(epoch=epoch, decay_lr_epochs=decay_lr, **kwargs) # Checkpoint best_value = checkpoint( epoch=epoch, new_value=val_value[epoch], best_value=best_value, log_dir=current_log_folder, checkpoint_all_epochs=checkpoint_all_epochs, **kwargs) logging.info('Training done') return train_value, val_value
def single_run(writer, current_log_folder, model_name, epochs, lr, decay_lr, validation_interval, **kwargs): """ This is the main routine where train(), validate() and test() are called. Parameters ---------- writer : Tensorboard.SummaryWriter Responsible for writing logs in Tensorboard compatible format. current_log_folder : string Path to where logs/checkpoints are saved model_name : string Name of the model epochs : int Number of epochs to train lr : float Value for learning rate kwargs : dict Any additional arguments. decay_lr : boolean Decay the lr flag validation_interval: int Run evaluation on validation set every N epochs Returns ------- train_value : ndarray[floats] Accuracy values for train val_value : ndarray(1, `epochs`+1) test_value : float Precision values for train and validation splits. Single precision value for the test split. """ # Get the selected model input size model_expected_input_size = models.__dict__[model_name]( ).expected_input_size RandomLabel._validate_model_input_size(model_expected_input_size, model_name) logging.info('Model {} expects input size of {}'.format( model_name, model_expected_input_size)) # Setting up the dataloaders train_loader, val_loader, test_loader, num_classes = set_up_dataloaders( model_expected_input_size, **kwargs) # Remove the "shuffle=True" for the train loader train_loader.sampler = SequentialSampler(train_loader.dataset) train_loader.batch_sampler = BatchSampler(train_loader.sampler, train_loader.batch_size, train_loader.drop_last) # Setting up model, optimizer, criterion model, criterion, optimizer, best_value, start_epoch = set_up_model( num_classes=num_classes, model_name=model_name, lr=lr, train_loader=train_loader, **kwargs) # Create an observer observer, observer_criterion, observer_optimizer, _, _ = set_up_model( num_classes=num_classes, model_name='Observer', lr=lr, train_loader=train_loader, **kwargs) # Core routine logging.info('Begin training') val_value = np.zeros((epochs + 1 - start_epoch)) train_value = np.zeros((epochs - start_epoch)) val_value[-1] = RandomLabel._validate(val_loader, model, criterion, observer, observer_criterion, writer, -1, **kwargs) for epoch in range(start_epoch, epochs): # Train train_value[epoch] = RandomLabel._train(train_loader, model, criterion, optimizer, observer, observer_criterion, observer_optimizer, writer, epoch, **kwargs) # Validate if epoch % validation_interval == 0: val_value[epoch] = RandomLabel._validate( val_loader, model, criterion, observer, observer_criterion, writer, epoch, **kwargs) if decay_lr is not None: adjust_learning_rate(lr=lr, optimizer=optimizer, epoch=epoch, decay_lr_epochs=decay_lr) best_value = checkpoint(epoch, val_value[epoch], best_value, model, optimizer, current_log_folder) # Test test_value = RandomLabel._test(test_loader, model, criterion, observer, observer_criterion, writer, epochs - 1, **kwargs) logging.info('Training completed') return train_value, val_value, test_value
def single_run(writer, current_log_folder, model_name, epochs, lr, decay_lr, validation_interval, checkpoint_all_epochs, **kwargs): """ This is the main routine where train(), validate() and test() are called. Parameters ---------- writer : Tensorboard.SummaryWriter Responsible for writing logs in Tensorboard compatible format. current_log_folder : string Path to where logs/checkpoints are saved model_name : string Name of the model epochs : int Number of epochs to train lr : float Value for learning rate kwargs : dict Any additional arguments. decay_lr : boolean Decay the lr flag validation_interval : int Run evaluation on validation set every N epochs checkpoint_all_epochs : bool If enabled, save checkpoint after every epoch. Returns ------- train_value : ndarray[floats] of size (1, `epochs`) Accuracy values for train split val_value : ndarray[floats] of size (1, `epochs`+1) Accuracy values for validation split test_value : float Accuracy value for test split """ # Get the selected model input size model_expected_input_size = models.__dict__[model_name]().expected_input_size ImageClassification._validate_model_input_size(model_expected_input_size, model_name) logging.info('Model {} expects input size of {}'.format(model_name, model_expected_input_size)) # Setting up the dataloaders train_loader, val_loader, test_loader, num_classes = set_up_dataloaders(model_expected_input_size, **kwargs) # Setting up model, optimizer, criterion model, criterion, optimizer, best_value, start_epoch = set_up_model(num_classes=num_classes, model_name=model_name, lr=lr, train_loader=train_loader, **kwargs) pytorch_total_params = sum(p.numel() for p in model.parameters()) logging.info('Total parameters: ' + str(pytorch_total_params)) pytorch_total_params_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) logging.info('Total trainable parameters: ' + str(pytorch_total_params_trainable)) # Core routine logging.info('Begin training') val_value = np.zeros((epochs + 1 - start_epoch)) train_value = np.zeros((epochs - start_epoch)) val_value[-1] = ImageClassification._validate(val_loader, model, criterion, writer, -1, **kwargs) for epoch in range(start_epoch, epochs): # Train train_value[epoch] = ImageClassification._train(train_loader, model, criterion, optimizer, writer, epoch, **kwargs) # Validate if epoch % validation_interval == 0: val_value[epoch] = ImageClassification._validate(val_loader, model, criterion, writer, epoch, **kwargs) if decay_lr is not None: adjust_learning_rate(lr=lr, optimizer=optimizer, epoch=epoch, decay_lr_epochs=decay_lr) best_value = checkpoint(epoch=epoch, new_value=val_value[epoch], best_value=best_value, model=model, optimizer=optimizer, log_dir=current_log_folder, checkpoint_all_epochs=checkpoint_all_epochs) # Load the best model before evaluating on the test set. logging.info('Loading the best model before evaluating on the ' 'test set.') kwargs["load_model"] = os.path.join(current_log_folder, 'model_best.pth.tar') model, _, _, _, _ = set_up_model(num_classes=num_classes, model_name=model_name, lr=lr, train_loader=train_loader, **kwargs) # Test test_value = ImageClassification._test(test_loader, model, criterion, writer, epochs - 1, **kwargs) logging.info('Training completed') return train_value, val_value, test_value
tensorboard_writer.add_scalar('train/entropy', mean_entropy.cpu().detach().numpy(), iteration) iteration += 1 if iteration % 100 == 0: avg_loss_epoch = sum([l.item() for l in avg_losses_epoch ]) / len(avg_losses_epoch) tensorboard_writer.add_scalar('train/loss_avg', avg_loss_epoch, iteration) avg_loss_epoch = sum([l.item() for l in avg_losses_epoch]) / len(avg_losses_epoch) print("Avg epoch loss: ", avg_loss_epoch) tensorboard_writer.add_scalar('train/loss_epoch_avg', avg_loss_epoch, iteration) for mi in range(opt.instances): avg_per_model_loss_epoch = sum([ l for l in avg_per_model_losses_epoch[mi] ]) / len(avg_per_model_losses_epoch[mi]) tensorboard_writer.add_scalar('train/model_loss_avg_%d' % mi, avg_per_model_loss_epoch, iteration) if opt.calr: scheduler.step() adjust_learning_rate(optimizer, scheduler.get_lr()[0]) torch.save(model.state_dict(), '%s/weights_%06d.net' % (ckpt_dir, opt.epochs))
def single_run(writer, current_log_folder, model_name, epochs, lr, decay_lr, validation_interval, **kwargs): """ This is the main routine where train(), validate() and test() are called. Parameters ---------- :param writer: Tensorboard SummaryWriter Responsible for writing logs in Tensorboard compatible format. :param current_log_folder: string Path to where logs/checkpoints are saved :param model_name: string Name of the model :param epochs: int Number of epochs to train :param lr: float Value for learning rate :param decay_lr: boolean Decay the lr flag :param validation_interval: int Run evaluation on validation set every N epochs :param kwargs: dict Any additional arguments. :return: train_value, val_value, test_value Precision values for train and validation splits. Single precision value for the test split. """ # Get the selected model model_expected_input_size = models.__dict__[model_name]().expected_input_size Bidimensional._validate_model_input_size(model_expected_input_size, model_name) logging.info('Model {} expects input size of {}'.format(model_name, model_expected_input_size)) # Setting up the dataloaders train_loader, val_loader, test_loader, num_classes = set_up_dataloaders(model_expected_input_size, **kwargs) # Setting up model, optimizer, criterion model, criterion, optimizer, best_value, start_epoch = set_up_model(num_classes=num_classes, model_name=model_name, lr=lr, train_loader=train_loader, **kwargs) # Core routine logging.info('Begin training') val_value = np.zeros((epochs + 1 - start_epoch)) train_value = np.zeros((epochs - start_epoch)) # Make data for points grid_resolution = 100 mini_batches = np.array([input_mini_batch.numpy() for input_mini_batch, _ in val_loader]) val_coords = np.squeeze(np.array([sample for mini_batch in mini_batches for sample in mini_batch])) min_x, min_y = np.min(val_coords[:, 0]), np.min(val_coords[:, 1]) max_x, max_y = np.max(val_coords[:, 0]), np.max(val_coords[:, 1]) coords = np.array([[x, y] for x in np.linspace(min_x, max_x, grid_resolution) for y in np.linspace(min_y, max_y, grid_resolution) ]) coords = torch.autograd.Variable(torch.from_numpy(coords).type(torch.FloatTensor)) if not kwargs['no_cuda']: coords = coords.cuda(async=True) # PLOT: decision boundary routine Bidimensional._evaluate_and_plot_decision_boundary(model=model, val_coords=val_coords, coords=coords, grid_resolution=grid_resolution, val_loader=val_loader, num_classes=num_classes, writer=writer, epoch=-1, epochs=epochs, **kwargs) val_value[-1] = Bidimensional._validate(val_loader, model, criterion, writer, -1, **kwargs) # Add model parameters to Tensorboard for name, param in model.named_parameters(): writer.add_histogram(name + '_-1', param.clone().cpu().data.numpy(), -1, bins='auto') for epoch in range(start_epoch, epochs): # Train train_value[epoch] = Bidimensional._train(train_loader, model, criterion, optimizer, writer, epoch, **kwargs) # Validate if epoch % validation_interval == 0: val_value[epoch] = Bidimensional._validate(val_loader, model, criterion, writer, epoch, **kwargs) if decay_lr is not None: adjust_learning_rate(lr, optimizer, epoch, decay_lr) best_value = checkpoint(epoch, val_value[epoch], best_value, model, optimizer, current_log_folder) # PLOT: decision boundary routine Bidimensional._evaluate_and_plot_decision_boundary(model=model, val_coords=val_coords, coords=coords, grid_resolution=grid_resolution, val_loader=val_loader, num_classes=num_classes, writer=writer, epoch=epoch, epochs=epochs, **kwargs) # Add model parameters to Tensorboard for name, param in model.named_parameters(): writer.add_histogram(name + '_{}'.format(epoch), param.clone().cpu().data.numpy(), epoch, bins='auto') # Test test_value = Bidimensional._test(test_loader, model, criterion, writer, epochs, **kwargs) logging.info('Training completed') return train_value, val_value, test_value
def single_run(writer, current_log_folder, model_name, epochs, lr, decay_lr, validation_interval, checkpoint_all_epochs, input_patch_size, **kwargs): """ This is the main routine where train(), validate() and test() are called. Parameters ---------- writer : Tensorboard.SummaryWriter Responsible for writing logs in Tensorboard compatible format. current_log_folder : string Path to where logs/checkpoints are saved model_name : string Name of the model epochs : int Number of epochs to train lr : float Value for learning rate kwargs : dict Any additional arguments. decay_lr : boolean Decay the lr flag validation_interval : int Run evaluation on validation set every N epochs checkpoint_all_epochs : bool If enabled, save checkpoint after every epoch. input_patch_size : int Size of the input patch, e.g. with 32 the input will be re-sized to 32x32 Returns ------- train_value : ndarray[floats] of size (1, `epochs`) Accuracy values for train split val_value : ndarray[floats] of size (1, `epochs`+1) Accuracy values for validation split test_value : float Accuracy value for test split """ # Setting up the dataloaders train_loader, val_loader, test_loader = set_up_dataloaders( input_patch_size, **kwargs) # Setting up model, optimizer, criterion model, _, optimizer, best_value, start_epoch = set_up_model( num_classes=3, # In this case is the num dimension of the output model_name=model_name, lr=lr, train_loader=train_loader, **kwargs) criterion = nn.MSELoss() # Core routine logging.info('Begin training') val_value = np.zeros((epochs + 1 - start_epoch)) train_value = np.zeros((epochs - start_epoch)) val_value[-1] = SemanticSegmentation._validate(val_loader, model, criterion, writer, -1, **kwargs) for epoch in range(start_epoch, epochs): # Train train_value[epoch] = SemanticSegmentation._train( train_loader, model, criterion, optimizer, writer, epoch, **kwargs) # Validate if epoch % validation_interval == 0: val_value[epoch] = SemanticSegmentation._validate( val_loader, model, criterion, writer, epoch, **kwargs) if decay_lr is not None: adjust_learning_rate(lr=lr, optimizer=optimizer, epoch=epoch, decay_lr_epochs=decay_lr) best_value = checkpoint( epoch=epoch, new_value=val_value[epoch], best_value=best_value, model=model, optimizer=optimizer, log_dir=current_log_folder, checkpoint_all_epochs=checkpoint_all_epochs) # Load the best model before evaluating on the test set. logging.info( 'Loading the best model before evaluating on the test set.') kwargs["load_model"] = os.path.join(current_log_folder, 'model_best.pth.tar') model, _, _, _, _ = set_up_model(num_classes=3, model_name=model_name, lr=lr, train_loader=train_loader, **kwargs) # Test test_value = SemanticSegmentation._test(test_loader, model, criterion, writer, epochs - 1, **kwargs) logging.info('Training completed') return train_value, val_value, test_value
def single_run(writer, current_log_folder, model_name, epochs, lr, decay_lr, margin, anchor_swap, validation_interval, regenerate_every, checkpoint_all_epochs, only_evaluate, **kwargs): """ This is the main routine where train(), validate() and test() are called. Parameters ---------- writer : Tensorboard SummaryWriter Responsible for writing logs in Tensorboard compatible format. current_log_folder : string Path to where logs/checkpoints are saved model_name : string Name of the model epochs : int Number of epochs to train lr : float Value for learning rate margin : float The margin value for the triplet loss function anchor_swap : boolean Turns on anchor swap decay_lr : boolean Decay the lr flag validation_interval : int Run evaluation on validation set every N epochs regenerate_every : int Re-generate triplets every N epochs checkpoint_all_epochs : bool If enabled, save checkpoint after every epoch. only_evaluate : boolean Flag : if True, only the test set is loaded. Returns ------- train_value, val_value, test_value Mean Average Precision values for train and validation splits. """ # Sanity check on parameters if kwargs["output_channels"] is None: logging.error( "Using triplet class but --output-channels is not specified.") sys.exit(-1) # Get the selected model input size model_expected_input_size = models.__dict__[model_name]( ).expected_input_size Triplet._validate_model_input_size(model_expected_input_size, model_name) logging.info('Model {} expects input size of {}'.format( model_name, model_expected_input_size)) # Setting up the dataloaders if only_evaluate: _, _, test_loader = setup_dataloaders( model_expected_input_size=model_expected_input_size, only_evaluate=only_evaluate, **kwargs) else: train_loader, val_loader, test_loader = setup_dataloaders( model_expected_input_size=model_expected_input_size, **kwargs) # Setting up model, optimizer, criterion model, _, optimizer, best_value, start_epoch = set_up_model( model_name=model_name, lr=lr, # train_loader=train_loader, **kwargs) # Set the special criterion for triplets criterion = nn.TripletMarginLoss(margin=margin, swap=anchor_swap) train_value = np.zeros((epochs - start_epoch)) val_value = np.zeros((epochs - start_epoch)) if not only_evaluate: # Core routine logging.info('Begin training') Triplet._validate(val_loader, model, None, writer, -1, **kwargs) for epoch in range(start_epoch, epochs): # Train train_value[epoch] = Triplet._train(train_loader=train_loader, model=model, criterion=criterion, optimizer=optimizer, writer=writer, epoch=epoch, **kwargs) # Validate if epoch % validation_interval == 0: val_value[epoch] = Triplet._validate(val_loader=val_loader, model=model, criterion=criterion, writer=writer, epoch=epoch, **kwargs) if decay_lr is not None: adjust_learning_rate(lr, optimizer, epoch, epochs) best_value = checkpoint( epoch=epoch, new_value=val_value[epoch], best_value=best_value, model=model, optimizer=optimizer, log_dir=current_log_folder, invert_best=True, checkpoint_all_epochs=checkpoint_all_epochs) # Generate new triplets every N epochs if epoch % regenerate_every == 0: train_loader.dataset.generate_triplets() logging.info('Training completed') # Test test_value = Triplet._test(test_loader=test_loader, model=model, criterion=criterion, writer=writer, epoch=(epochs - 1), **kwargs) return train_value, val_value, test_value
def single_run(writer, current_log_folder, model_name, epochs, lr, decay_lr, validation_interval, checkpoint_all_epochs, **kwargs): """ DESC Parameters ---------- Param Desc Returns ------- None """ if not kwargs['train'] and kwargs['load_model'] == None: logging.error( 'You have to provide load_model argument if model is not trained.' ) sys.exit(-1) # Get the selected model input size model_expected_input_size = models.__dict__[model_name]( ).expected_input_size ProcessActivation._validate_model_input_size(model_expected_input_size, model_name) logging.info('Model {} expects input size of {}'.format( model_name, model_expected_input_size)) # Setting up the dataloaders train_loader, val_loader, test_loader, num_classes = set_up_dataloaders( model_expected_input_size, **kwargs) # Freezing the dataset used for processing activation activation_dataset = [] for i, data in enumerate(train_loader): activation_dataset.append(data) if i >= kwargs['process_size']: break # Setting up model, optimizer, criterion model, criterion, optimizer, best_value, start_epoch = set_up_model( num_classes=num_classes, model_name=model_name, lr=lr, train_loader=train_loader, **kwargs) # Setting up activation_worker activation_worker = Activation(current_log_folder, model_name, activation_dataset, kwargs['process_size'], kwargs['save_images'], kwargs['no_cuda']) activation_worker.init(model) activation_worker.resolve_items() # With training part if kwargs['train']: logging.info('Begin training') val_value = np.zeros((epochs + 1 - start_epoch)) train_value = np.zeros((epochs - start_epoch)) # Pretraining validation step val_value[-1] = ProcessActivation._validate( val_loader, model, criterion, writer, -1, **kwargs) # Training for epoch in range(start_epoch, epochs): train_value[epoch] = ProcessActivation._train( train_loader, model, criterion, optimizer, writer, epoch, **kwargs) # Validate if epoch % validation_interval == 0: val_value[epoch] = ProcessActivation._validate( val_loader, model, criterion, writer, epoch, **kwargs) # Activation if (epoch == start_epoch) or (epoch % kwargs['process_every'] == 0) or epoch == (epochs - 1): activation_worker.add_epoch(epoch, val_value[epoch], model) if decay_lr is not None: adjust_learning_rate(lr=lr, optimizer=optimizer, epoch=epoch, decay_lr_epochs=decay_lr) best_value = checkpoint( epoch=epoch, new_value=val_value[epoch], best_value=best_value, model=model, optimizer=optimizer, log_dir=current_log_folder, checkpoint_all_epochs=checkpoint_all_epochs) # Load the best model before evaluating on the test set. logging.info( 'Loading the best model before evaluating on the test set.') kwargs["load_model"] = os.path.join(current_log_folder, 'model_best.pth.tar') model, _, _, _, _ = set_up_model(num_classes=num_classes, model_name=model_name, lr=lr, train_loader=train_loader, **kwargs) # Test test_value = ProcessActivation._test(test_loader, model, criterion, writer, epochs - 1, **kwargs) logging.info('Training completed') # Without training part else: activation_worker.add_epoch(0, 0, model) sys.exit(-1)
def single_run(writer, current_log_folder, model_name, epochs, lr, decay_lr, margin, anchor_swap, validation_interval, regenerate_every, **kwargs): """ This is the main routine where train(), validate() and test() are called. Parameters ---------- :param writer: Tensorboard SummaryWriter Responsible for writing logs in Tensorboard compatible format. :param current_log_folder: string Path to where logs/checkpoints are saved :param model_name: string Name of the model :param epochs: int Number of epochs to train :param lr: float Value for learning rate :param margin: float the margin value for the triplet loss function :param anchor_swap: boolean turns on anchor swap :param decay_lr: boolean Decay the lr flag :param validation_interval: int Run evaluation on validation set every N epochs :param regenerate_every: int Re-generate triplets every N epochs :param kwargs: dict Any additional arguments. :return: train_value, val_value, test_value Precision values for train and validation splits. Single precision value for the test split. """ # Get the selected model input size model_expected_input_size = models.__dict__[model_name]( ).expected_input_size Triplet._validate_model_input_size(model_expected_input_size, model_name) logging.info('Model {} expects input size of {}'.format( model_name, model_expected_input_size)) # Setting up the dataloaders train_loader, val_loader, test_loader = setup_dataloaders( model_expected_input_size=model_expected_input_size, **kwargs) # Setting up model, optimizer, criterion model, _, optimizer, best_value, start_epoch = set_up_model( model_name=model_name, lr=lr, train_loader=train_loader, **kwargs) # Set the special criterion for triplets criterion = nn.TripletMarginLoss(margin=margin, swap=anchor_swap) # Core routine logging.info('Begin training') val_value = np.zeros((epochs - start_epoch)) train_value = np.zeros((epochs - start_epoch)) Triplet._validate(val_loader, model, None, writer, -1, **kwargs) for epoch in range(start_epoch, epochs): # Train train_value[epoch] = Triplet._train(train_loader=train_loader, model=model, criterion=criterion, optimizer=optimizer, writer=writer, epoch=epoch, **kwargs) # Validate if epoch % validation_interval == 0: val_value[epoch] = Triplet._validate(val_loader=val_loader, model=model, criterion=criterion, writer=writer, epoch=epoch, **kwargs) if decay_lr is not None: adjust_learning_rate(lr, optimizer, epoch, epochs) best_value = checkpoint(epoch=epoch, new_value=val_value[epoch], best_value=best_value, model=model, optimizer=optimizer, log_dir=current_log_folder, invert_best=True) # Generate new triplets every N epochs if epoch % regenerate_every == 0: train_loader.dataset.generate_triplets() # Test logging.info('Training completed') test_value = Triplet._test(test_loader=test_loader, model=model, criterion=criterion, writer=writer, epoch=(epochs - 1), **kwargs) return train_value, val_value, test_value
def single_run(writer, current_log_folder, model_name, epochs, lr, decay_lr, validation_interval, checkpoint_all_epochs, **kwargs): """ This is the main routine where train(), validate() and test() are called. Parameters ---------- writer : Tensorboard.SummaryWriter Responsible for writing logs in Tensorboard compatible format. current_log_folder : string Path to where logs/checkpoints are saved model_name : string Name of the model epochs : int Number of epochs to train lr : float Value for learning rate kwargs : dict Any additional arguments. decay_lr : boolean Decay the lr flag validation_interval : int Run evaluation on validation set every N epochs checkpoint_all_epochs : bool If enabled, save checkpoint after every epoch. Returns ------- train_value : ndarray[floats] of size (1, `epochs`) Accuracy values for train split val_value : ndarray[floats] of size (1, `epochs`+1) Accuracy values for validation split test_value : float Accuracy value for test split """ # Get the selected model input size model_expected_input_size = models.__dict__[model_name]().expected_input_size MultiLabelImageClassification._validate_model_input_size(model_expected_input_size, model_name) logging.info('Model {} expects input size of {}'.format(model_name, model_expected_input_size)) # Setting up the dataloaders train_loader, val_loader, test_loader, num_classes = set_up_dataloaders(model_expected_input_size, **kwargs) # Check if the correct criterion has been applied try: assert kwargs['criterion_name'] == 'BCEWithLogitsLoss' except AssertionError: logging.error('Inappropriate criterion for Multi-Label classification! Please use an appropriate criterion ' 'such as BCEWithLogitsLoss by specifying --criterion-name BCEWithLogitsLoss') sys.exit(-1) # Setting up model, optimizer, criterion model, criterion, optimizer, best_value, start_epoch = set_up_model(num_classes=num_classes, model_name=model_name, lr=lr, train_loader=train_loader, **kwargs) # Core routine logging.info('Begin training') val_value = np.zeros((epochs + 1 - start_epoch)) train_value = np.zeros((epochs - start_epoch)) val_value[-1] = MultiLabelImageClassification._validate(val_loader, model, criterion, writer, -1, **kwargs) for epoch in range(start_epoch, epochs): # Train train_value[epoch] = MultiLabelImageClassification._train(train_loader, model, criterion, optimizer, writer, epoch, **kwargs) # Validate if epoch % validation_interval == 0: val_value[epoch] = MultiLabelImageClassification._validate(val_loader, model, criterion, writer, epoch, **kwargs) if decay_lr is not None: adjust_learning_rate(lr=lr, optimizer=optimizer, epoch=epoch, decay_lr_epochs=decay_lr) best_value = checkpoint(epoch=epoch, new_value=val_value[epoch], best_value=best_value, model=model, optimizer=optimizer, log_dir=current_log_folder, checkpoint_all_epochs=checkpoint_all_epochs) # Load the best model before evaluating on the test set. logging.info('Loading the best model before evaluating on the ' 'test set.') kwargs["load_model"] = os.path.join(current_log_folder, 'model_best.pth.tar') model, _, _, _, _ = set_up_model(num_classes=num_classes, model_name=model_name, lr=lr, train_loader=train_loader, **kwargs) # Test test_value = MultiLabelImageClassification._test(test_loader, model, criterion, writer, epochs - 1, **kwargs) logging.info('Training completed') return train_value, val_value, test_value