ckpt_target_file_path = os.path.join(args.checkpoint_dir_target, best_ckpt_filename) if os.path.exists(ckpt_target_file_path): model_target = utils.load_checkpoint(model_target, ckpt_dir=args.checkpoint_dir_target, filename=best_ckpt_filename, is_best=True)[0] print("=> loaded target model checkpoint form {}".format(ckpt_target_file_path)) else: print("=> Initializing target model from scratch") for p in model_source.parameters(): p.requires_grad = False model_source.eval() for p in model_target.parameters(): p.requires_grad = False model_target.eval() opt = optim.AdamW(transfer.parameters(), lr=params_transfer.learning_rate) lr_scheduler = torch.optim.lr_scheduler.OneCycleLR( opt, max_lr=params_transfer.learning_rate, steps_per_epoch=len(train_dl), epochs=params_transfer.num_epochs, div_factor=20) # fetch loss function and metrics loss_fn = get_loss_fn(params_transfer) # num_classes+1 for background. metrics = OrderedDict({}) for metric in params_transfer.metrics: metrics[metric] = get_metrics(metric, params_transfer) # Train the model logging.info("Starting training for {} epoch(s)".format(params_transfer.num_epochs)) train_and_evaluate(model_source, model_target, transfer, train_dl, val_dl_source, val_dl_target, opt, loss_fn, metrics, params_transfer, lr_scheduler, args.checkpoint_dir_transfer, ckpt_filename, log_dir, writer)
ckpt_target_file_path = os.path.join(args.checkpoint_dir_target, ckpt_filename) if os.path.exists(ckpt_target_file_path): model_target, _, _, _, _ = utils.load_checkpoint( model_target, None, None, ckpt_dir=args.checkpoint_dir_target, filename=ckpt_filename) print("=> loaded target model checkpoint form {}".format( ckpt_target_file_path)) else: print("=> Initializing target model from scratch") # fetch loss function and metrics loss_fn1 = get_loss_fn(params_source) loss_fn2 = get_loss_fn(params_target) metrics_depth = OrderedDict({}) for metric in params_source.metrics: metrics_depth[metric] = get_metrics(metric, params_source) metrics_segmentation = OrderedDict({}) for metric in params_target.metrics: metrics_segmentation[metric] = get_metrics(metric, params_target) # Train the model logging.info("Starting training for {} epoch(s)".format( params_transfer.num_epochs)) train_and_evaluate(
args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) # use GPU if available device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Set the random seed for reproducible experiments seed = 42 torch.manual_seed(seed) random.seed(seed) torch.backends.cudnn.deterministic = True np.random.seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) train_dl = data_loader.fetch_dataloader(args.data_dir, args.txt_train, 'train', params) # Define the model and optimizer model = get_network(params).to(device) opt = optim.AdamW(model.parameters(), lr=params.learning_rate) loss_fn = get_loss_fn(loss_name=params.loss_fn) if args.checkpoint_dir: model = utils.load_checkpoint(model, is_best=False, ckpt_dir=args.checkpoint_dir)[0] log_lrs, losses = find_lr(train_dl, opt, model, loss_fn, device) plot_lr(log_lrs, losses)
# fetch dataloaders train_dl = dataloader.fetch_dataloader(args.data_dir, args.txt_train, "train", params) val_dl = dataloader.fetch_dataloader(args.data_dir, args.txt_val, "val", params) logging.info("- done.") # Define the model and optimizer model = get_network(params).to(params.device) opt = optim.AdamW(model.parameters(), lr=params.learning_rate) lr_scheduler = torch.optim.lr_scheduler.OneCycleLR( opt, max_lr=params.learning_rate, steps_per_epoch=len(train_dl), epochs=params.num_epochs, div_factor=20) # fetch loss function and metrics loss_fn = get_loss_fn(params) # num_classes+1 for background. metrics = OrderedDict({}) for metric in params.metrics: metrics[metric] = get_metrics(metric, params) # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(model, train_dl, val_dl, opt, loss_fn, metrics, params, lr_scheduler, args.checkpoint_dir, ckpt_filename, log_dir, writer)