def test(args): """Run model testing.""" model_args = args.model_args data_args = args.data_args logger_args = args.logger_args # import pdb; pdb.set_trace() # Get logger. logger = Logger(logger_args.log_path, logger_args.save_dir, logger_args.results_dir) # Get image paths corresponding to predictions for logging paths = None if model_args.config_path is not None: # Instantiate the EnsemblePredictor class for obtaining # model predictions. predictor = EnsemblePredictor(config_path=model_args.config_path, model_args=model_args, data_args=data_args, gpu_ids=args.gpu_ids, device=args.device, logger=logger) # Obtain ensemble predictions. # Caches both individual and ensemble predictions. # We always turn off caching to ensure that we write the Path column. predictions, groundtruth, paths = predictor.predict(cache=False, return_paths=True, all_gt_tasks=True) else: # Load the model at ckpt_path. ckpt_path = model_args.ckpt_path ckpt_save_dir = Path(ckpt_path).parent model_uncertainty = model_args.model_uncertainty # Get model args from checkpoint and add them to # command-line specified model args. model_args, transform_args\ = ModelSaver.get_args(cl_model_args=model_args, dataset=data_args.dataset, ckpt_save_dir=ckpt_save_dir, model_uncertainty=model_uncertainty) # TODO JBY: in test moco should never be true. model_args.moco = args.model_args.moco model, ckpt_info = ModelSaver.load_model(ckpt_path=ckpt_path, gpu_ids=args.gpu_ids, model_args=model_args, is_training=False) # Instantiate the Predictor class for obtaining model predictions. predictor = Predictor(model=model, device=args.device) # Get phase loader object. return_info_dict = True loader = get_loader(phase=data_args.phase, data_args=data_args, transform_args=transform_args, is_training=False, return_info_dict=return_info_dict, logger=logger) # Obtain model predictions. if return_info_dict: predictions, groundtruth, paths = predictor.predict(loader) else: predictions, groundtruth = predictor.predict(loader) # print(predictions[CHEXPERT_COMPETITION_TASKS]) if model_args.calibrate: #open the json file which has the saved parameters import json with open(CALIBRATION_FILE) as f: data = json.load(f) i = 0 #print(predictions) import math def sigmoid(x): return 1 / (1 + math.exp(-x)) for column in predictions: predictions[column] = predictions[column].apply \ (lambda x: sigmoid(x * data[i][0][0][0] \ + data[i][1][0])) i += 1 # print(predictions[CHEXPERT_COMPETITION_TASKS]) #run forward on all the predictions in each row of predictions # Log predictions and groundtruth to file in CSV format. logger.log_predictions_groundtruth(predictions, groundtruth, paths) if not args.inference_only: # Instantiate the evaluator class for evaluating models. evaluator = Evaluator(logger, operating_points_path=CHEXPERT_RAD_PATH) # Get model metrics and curves on the phase dataset. metrics, curves = evaluator.evaluate_tasks(groundtruth, predictions) # Log metrics to stdout and file. logger.log_stdout(f"Writing metrics to {logger.metrics_path}.") logger.log_metrics(metrics, save_csv=True) # TODO: make this work with ensemble # TODO: investigate if the eval_loader can just be the normal loader here if logger_args.save_cams: cams_dir = logger_args.save_dir / 'cams' print(f'Save cams to {cams_dir}') save_grad_cams(args, loader, model, cams_dir, only_competition=logger_args.only_competition_cams, only_top_task=False) logger.log("=== Testing Complete ===")
def train(args): """Run model training.""" print("Start Training ...") # Get nested namespaces. model_args = args.model_args logger_args = args.logger_args optim_args = args.optim_args data_args = args.data_args transform_args = args.transform_args # Get logger. print('Getting logger... log to path: {}'.format(logger_args.log_path)) logger = Logger(logger_args.log_path, logger_args.save_dir) # For conaug, point to the MOCO pretrained weights. if model_args.ckpt_path and model_args.ckpt_path != 'None': print("pretrained checkpoint specified : {}".format( model_args.ckpt_path)) # CL-specified args are used to load the model, rather than the # ones saved to args.json. model_args.pretrained = False ckpt_path = model_args.ckpt_path model, ckpt_info = ModelSaver.load_model(ckpt_path=ckpt_path, gpu_ids=args.gpu_ids, model_args=model_args, is_training=True) if not model_args.moco: optim_args.start_epoch = ckpt_info['epoch'] + 1 else: optim_args.start_epoch = 1 else: print( 'Starting without pretrained training checkpoint, random initialization.' ) # If no ckpt_path is provided, instantiate a new randomly # initialized model. model_fn = models.__dict__[model_args.model] if data_args.custom_tasks is not None: tasks = NamedTasks[data_args.custom_tasks] else: tasks = model_args.__dict__[TASKS] # TASKS = "tasks" print("Tasks: {}".format(tasks)) model = model_fn(tasks, model_args) model = nn.DataParallel(model, args.gpu_ids) # Put model on gpu or cpu and put into training mode. model = model.to(args.device) model.train() print("========= MODEL ==========") print(model) # Get train and valid loader objects. train_loader = get_loader(phase="train", data_args=data_args, transform_args=transform_args, is_training=True, return_info_dict=False, logger=logger) valid_loader = get_loader(phase="valid", data_args=data_args, transform_args=transform_args, is_training=False, return_info_dict=False, logger=logger) # Instantiate the predictor class for obtaining model predictions. predictor = Predictor(model, args.device) # Instantiate the evaluator class for evaluating models. evaluator = Evaluator(logger) # Get the set of tasks which will be used for saving models # and annealing learning rate. eval_tasks = EVAL_METRIC2TASKS[optim_args.metric_name] # Instantiate the saver class for saving model checkpoints. saver = ModelSaver(save_dir=logger_args.save_dir, iters_per_save=logger_args.iters_per_save, max_ckpts=logger_args.max_ckpts, metric_name=optim_args.metric_name, maximize_metric=optim_args.maximize_metric, keep_topk=logger_args.keep_topk) # TODO: JBY: handle threshold for fine tuning if model_args.fine_tuning == 'full': # Fine tune all layers. pass else: # Freeze other layers. models.PretrainedModel.set_require_grad_for_fine_tuning( model, model_args.fine_tuning.split(',')) # Instantiate the optimizer class for guiding model training. optimizer = Optimizer(parameters=model.parameters(), optim_args=optim_args, batch_size=data_args.batch_size, iters_per_print=logger_args.iters_per_print, iters_per_visual=logger_args.iters_per_visual, iters_per_eval=logger_args.iters_per_eval, dataset_len=len(train_loader.dataset), logger=logger) if model_args.ckpt_path and not model_args.moco: # Load the same optimizer as used in the original training. optimizer.load_optimizer(ckpt_path=model_args.ckpt_path, gpu_ids=args.gpu_ids) model_uncertainty = model_args.model_uncertainty loss_fn = evaluator.get_loss_fn( loss_fn_name=optim_args.loss_fn, model_uncertainty=model_args.model_uncertainty, mask_uncertain=True, device=args.device) # Run training while not optimizer.is_finished_training(): optimizer.start_epoch() # TODO: JBY, HACK WARNING # What is the hack? metrics = None for inputs, targets in train_loader: optimizer.start_iter() if optimizer.global_step and optimizer.global_step % optimizer.iters_per_eval == 0 or len( train_loader.dataset ) - optimizer.iter < optimizer.batch_size: # Only evaluate every iters_per_eval examples. predictions, groundtruth = predictor.predict(valid_loader) # print("predictions: {}".format(predictions)) metrics, curves = evaluator.evaluate_tasks( groundtruth, predictions) # Log metrics to stdout. logger.log_metrics(metrics) # Add logger for all the metrics for valid_loader logger.log_scalars(metrics, optimizer.global_step) # Get the metric used to save model checkpoints. average_metric = evaluator.evaluate_average_metric( metrics, eval_tasks, optim_args.metric_name) if optimizer.global_step % logger_args.iters_per_save == 0: # Only save every iters_per_save examples directly # after evaluation. print("Save global step: {}".format(optimizer.global_step)) saver.save(iteration=optimizer.global_step, epoch=optimizer.epoch, model=model, optimizer=optimizer, device=args.device, metric_val=average_metric) # Step learning rate scheduler. optimizer.step_scheduler(average_metric) with torch.set_grad_enabled(True): logits, embedding = model(inputs.to(args.device)) loss = loss_fn(logits, targets.to(args.device)) optimizer.log_iter(inputs, logits, targets, loss) optimizer.zero_grad() loss.backward() optimizer.step() optimizer.end_iter() optimizer.end_epoch(metrics) logger.log('=== Training Complete ===')