def __init__(self, options, path, freeze=True, pre_model_path=None): """Prepare the network, criterion, solver, and data. Args: options, dict: Hyperparameters. """ print('Prepare the network and data.') self._options = options self._path = path # Network. self._net = torch.nn.DataParallel(BCNN()).cuda() #print(self._net) if freeze is True: self._net.module.freeze_layers() if pre_model_path is not None: self._net.load_state_dict(torch.load(pre_model_path)) # Criterion. self._criterion = torch.nn.CrossEntropyLoss().cuda() # Solver. if freeze is True: self._solver = torch.optim.SGD( list(self._net.module.conv2.parameters()) + list(self._net.module.bn2.parameters()) + list(self._net.module.conv.parameters()) + list(self._net.module.bn.parameters()) + list(self._net.module.fc.parameters()), lr=self._options['base_lr'], momentum=0.9, weight_decay=self._options['weight_decay']) else: self._solver = torch.optim.SGD( self._net.parameters(), lr=self._options['base_lr'], momentum=0.9, weight_decay=self._options['weight_decay']) self._scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( self._solver, mode='max', factor=0.1, patience=3, verbose=True, threshold=1e-4) self._train_loader, self._valid_loader = dataset.get_train_validation_data_loader( resize_size=224, batch_size=self._options['batch_size'], random_seed=96, validation_size=0, object_boxes_dict=None, show_sample=False, augment=True) self._test_loader = dataset.get_test_data_loader( resize_size=224, batch_size=32, object_boxes_dict=None)
def __init__(self, path): """Prepare the network, criterion, solver, and data. Args: options, dict: Hyperparameters. """ print('Prepare the network and data.') self._path = path # Network. self._net = torch.nn.DataParallel(BCNN()).cuda() self._net.module.freeze_layers() self._net.load_state_dict(torch.load(self._path)) self._test_loader = dataset.get_test_data_loader( resize_size=448, batch_size=32, object_boxes_dict=None )
if __name__ == "__main__": log_file_name_prefix = 'combined' logger = utils.get_logger(log_file_name_prefix) logger.info('start loading dataset') begin_time = time.time() train_loader_glb, valid_loader_glb = dataset.get_train_validation_data_loader( resize_size=224, # apply random crop for train set batch_size=train_batch_size, random_seed=random_seed, augment=True, validation_size=validation_size, object_boxes_dict=None, show_sample=False) test_loader_glb = dataset.get_test_data_loader( resize_size=224, # no any crop batch_size=test_batch_size, object_boxes_dict=None) bounding_boxes = utils.get_annotated_bounding_boxes() train_loader_obj, valid_loader_obj = dataset.get_train_validation_data_loader( resize_size=( 224, 224), # for object level model, we don't need cropping any more! batch_size=train_batch_size, random_seed=random_seed, augment=True, validation_size=validation_size, object_boxes_dict=bounding_boxes, show_sample=False) test_loader_obj = dataset.get_test_data_loader( resize_size=224,
def train_and_evaluate(logger=None, model_name='resnet152', pre_model=None, use_pretrained_params=True, fine_tune_all_layers=False, data_loaders=None, is_object_level=False, num_epochs=4, learning_rate=1e-3, use_scheduler=False, weight_decay=5e-4, train_batch_size=32, eval_epoch_step=4, use_gpu=cuda.is_available(), cuda_device_idx=0, use_multiple_gpu=False, save_model=True): """ Train a model and evaluate it after training :param logger: the utils.LoggerS object to print logs onto file and console :param model_name: model's name, used to create the model and help provide more detailed log :param pre_model: if this is not None, we will train and evaluate on it instead of creating a new model :param use_pretrained_params: True if we initialize the model with pretrained parameters :param fine_tune_all_layers: True if we want to fine tune all layers of the model :param data_loaders: a list of data loaders for train, validation and test set. The order must be correct :param is_object_level: as its name :param num_epochs: the number of training iterations on whole train set :param learning_rate: as its name :param weight_decay: as its name :param train_batch_size: batch size of train set :param eval_epoch_step: evaluation step :param use_gpu: use GPU to train/evaluate or not :param cuda_device_idx: an int value that indicates which cuda device that we want to use for inputs and model :param use_multiple_gpu: use multiple GPU to train/evaluate or not; todo currently this flag is useless :param save_model: True if we want to save the model that has best validation accuracy when training :return: trained model, accuracies on train, validation and test set, and stored model path if :param save_model is set to True """ # obj -- object # glb -- global # prtrn -- pretrain # ep -- epoch # bt -- batch_size if is_object_level: res_file_name_prefix = 'obj' else: res_file_name_prefix = 'glb' res_file_name_prefix += '_' + model_name if use_pretrained_params: res_file_name_prefix += '_prtrn' if fine_tune_all_layers: res_file_name_prefix += 'All' res_file_name_prefix += '_ep' + str(num_epochs) + '_bt' + str( train_batch_size) + '_' + str(learning_rate) if logger is None: logger = utils.get_logger(res_file_name_prefix) # get train/valid/test_loader if data_loaders is None: logger.info('start loading dataset') begin_time = time.time() train_loader, valid_loader = dataset.get_train_validation_data_loader( resize_size=224, batch_size=train_batch_size, random_seed=96, validation_size=0.2, object_boxes_dict=None, show_sample=False) test_loader = dataset.get_test_data_loader(resize_size=224, batch_size=32, object_boxes_dict=None) logger.info('loading dataset costs ' + str(time.time() - begin_time)) else: train_loader = data_loaders[0] valid_loader = data_loaders[1] test_loader = data_loaders[2] # Create nn model if pre_model is not None: model = pre_model # pre_model should have been trained if not fine_tune_all_layers: for param in model.parameters(): param.requires_grad = False replace_model_fc(model_name, model) else: model = get_model_by_name(model_name, use_pretrained_params) if use_pretrained_params and not fine_tune_all_layers: # only fine tune fully connected layer, which means we should not upgrade network layers except for last one for param in model.parameters(): param.requires_grad = False replace_model_fc(model_name, model) has_multiple_gpu = cuda.device_count() > 1 cuda_device = None # declare this just in order to remove IDE warnings ... if use_gpu: if has_multiple_gpu and use_multiple_gpu: model = nn.DataParallel(model).cuda() else: model = model.cuda(cuda_device_idx) cuda_device = torch.device('cuda', cuda_device_idx) criterion = nn.CrossEntropyLoss().cuda() if has_multiple_gpu and use_multiple_gpu: _model = model.module else: _model = model optimizer = optim.SGD(get_model_parameters(model_name, _model, use_pretrained_params, fine_tune_all_layers), lr=learning_rate, momentum=0.9, weight_decay=weight_decay) # Reduce learning rate when a metric has stopped improving. if use_scheduler is True: scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True, threshold=1e-4) logger.info('start training') train_cost_time = 0.0 epochs_arr = [] losses_arr = [] epochs_step_arr = [] train_acc_arr = [] valid_acc_arr = [] best_valid_acc = 0.0 best_valid_acc_model_params = None for epoch in range(num_epochs): running_loss = 0.0 batch_num = 0 for i, (_, inputs, labels) in enumerate(train_loader, 0): begin_time = time.time() # get the inputs if use_gpu: if has_multiple_gpu and use_multiple_gpu: inputs = torch.autograd.Variable(inputs.cuda()) labels = torch.autograd.Variable(labels.cuda(async=True)) else: inputs = inputs.cuda(cuda_device) labels = labels.cuda(cuda_device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() # print statistics logger.info('[%d, %5d] loss: %.6f' % (epoch + 1, i + 1, loss.item())) cost_time_i = time.time() - begin_time train_cost_time += cost_time_i logger.info('cost time: %.4fs' % cost_time_i) batch_num = i if use_scheduler is True: scheduler.step(running_loss) epochs_arr.append(epoch + 1) losses_arr.append(running_loss / batch_num) if epoch == 0 or ( epoch + 1 ) % eval_epoch_step == 0: # compute classification accuracy on train and validation set epochs_step_arr.append(epoch + 1) logger.info('') train_acc = evaluate(logger=logger, models=[model], data_loaders=[train_loader], set_name='train set', cuda_device_idx=cuda_device_idx, use_multiple_gpu=use_multiple_gpu) train_acc_arr.append(train_acc) valid_acc = evaluate(logger=logger, models=[model], data_loaders=[valid_loader], set_name='validation set', cuda_device_idx=cuda_device_idx, use_multiple_gpu=use_multiple_gpu) valid_acc_arr.append(valid_acc) if valid_acc > best_valid_acc: best_valid_acc = valid_acc best_valid_acc_model_params = model.state_dict() logger.info('') logger.info('Finished Training, cost time: %.4fs' % train_cost_time) logger.info('') test_acc = evaluate(logger=logger, models=[model], data_loaders=[test_loader], set_name='test set', cuda_device_idx=cuda_device_idx, use_multiple_gpu=use_multiple_gpu) logger.info('') save_evaluation_result(res_file_name_prefix, epochs_arr, losses_arr, epochs_step_arr, train_acc_arr, valid_acc_arr) saved_model_path = None if save_model: logger.info('') logger.info('saving model parameters') if is_object_level: model_file_name_prefix = 'obj_' else: model_file_name_prefix = 'glb_' model_file_name_prefix += model_name + ('_acc%.4f' % best_valid_acc) saved_model_path = save_model_parameters(best_valid_acc_model_params, model_file_name_prefix) logger.info('parameters have been saved successfully to ' + saved_model_path) logger.info('') return model, train_acc_arr[len(train_acc_arr) - 1], valid_acc_arr[ len(valid_acc_arr) - 1], test_acc, saved_model_path
logger = get_logger(train_batch_size) logger.info('start loading dataset') begin_time = time.time() bounding_boxes = utils.get_annotated_bounding_boxes() train_loader, valid_loader = dataset.get_train_validation_data_loader( resize_size=224, batch_size=train_batch_size, random_seed=random_seed, augment=True, validation_size=validation_size, object_boxes_dict=bounding_boxes, show_sample=False) test_loader = dataset.get_test_data_loader( resize_size=224, batch_size=test_batch_size, object_boxes_dict=bounding_boxes) logger.info('loading dataset costs %.4fs' % (time.time() - begin_time)) get_trained_model_object( logger=logger, data_loaders=[train_loader, valid_loader, test_loader], train_batch_size=train_batch_size) logger.info('model: ' + model_name) logger.info('pretrained: ' + str(use_pretrained_params)) logger.info('fine tune all layers: ' + str(fine_tune_all_layers)) logger.info('epochs: ' + str(num_epochs)) logger.info('batch size: ' + str(train_batch_size)) logger.info('learning rate: ' + str(learning_rate))