def run(config, num_checkpoints, cuda=False): train_joint_transform_list, train_img_transform, train_label_transform = get_transforms( config, mode="train") val_joint_transform_list, val_img_transform, val_label_transform = None, None, None train_dataset = DataSet(mode="train", joint_transform_list=train_joint_transform_list, img_transform=train_img_transform, label_transform=train_label_transform) val_dataset = DataSet(mode="val", joint_transform_list=val_joint_transform_list, img_transform=val_img_transform, label_transform=val_label_transform) train_loader = data.DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers, drop_last=True) val_loader = data.DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers) criterion, val_criterion = get_loss(config, cuda=cuda) model = get_net(config, criterion, cuda=cuda) checkpoints = get_checkpoints(config, num_checkpoints) print("[*] Checkpoints as follow:") pprint.pprint(checkpoints) util_checkpoint.load_checkpoint(model, None, checkpoints[0]) for i, checkpoint in enumerate(checkpoints[1:]): model2 = get_net(config, criterion, cuda=cuda) util_checkpoint.load_checkpoint(model2, None, checkpoint) swa.moving_average(model, model2, 1. / (i + 2)) with torch.no_grad(): swa.update_bn(train_loader, model, cuda=cuda) output_name = "model-swa.pth" print(f"[*] SAVED: to {output_name}") checkpoint_dir = os.path.join(ROOT_DIR, LOG_DIR, os.path.basename(config.model_dir)) util_checkpoint.save_checkpoint(checkpoint_dir, output_name, model) # test the model scores = validation(config, val_loader, model, val_criterion, "swa", cuda=cuda, is_record=False) print(scores) with open(os.path.join(checkpoint_dir, "swa-scores.json"), "w") as f: json.dump(scores["FWIOU"], f)
def main(args): test_joint_transform_list, test_img_transform, test_label_transform = None, None, None test_dataset = DataSet(mode=args.mode, joint_transform_list=test_joint_transform_list, img_transform=test_img_transform, label_transform=test_label_transform) test_loader = data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) logger.info("[*] Initial the test loader.") model = get_net(args, criterion=None, cuda=args.cuda) if args.load_path: if not (os.path.isfile(args.load_path) and args.load_path.endswith(".pth")): raise ValueError( "[*] The `load_path` should be exists and end with pth.") state_dict = torch.load(args.load_path) model.load_state_dict(state_dict["state_dict"]) logger.info( f"[*] LOADED successfully checkpoints from: {args.load_path}") else: raise ValueError("[*] The `load_path` should not be None.") test(args, test_loader, model, cuda=args.cuda)
def main(args): train_dataset = DataSet(mode="train", uniform_sampling=False, filter_data=False, joint_transform_list=None, img_transform=None, label_transform=None) train_loader = data.DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=8, drop_last=False) ce_criterion = CrossEntropyLoss2d() iou_criterion = MultiIouLoss(cfg.DATASET.NUM_CLASSES) criterions = {"ce_loss": ce_criterion, "iou_loss": iou_criterion} model = get_net(args, ce_criterion, cuda=args.cuda) if not args.load_path: raise KeyError("[*] Please input the load path.") if not args.load_path.endswith("pth"): raise ValueError("[*] load path should be end with pth") checkpoints = torch.load(args.load_path) model.load_state_dict(checkpoints["state_dict"]) records = infer_train_info(model, train_loader, criterions, cuda=args.cuda) output_file = os.path.join(args.output_dir, "train_loss_overview.json") with open(output_file, "w") as f: json.dump(records, f)
def main(args): test_joint_transform_list, test_img_transform, test_label_transform = None, None, None test_dataset = DataSet(mode=args.mode, joint_transform_list=test_joint_transform_list, img_transform=test_img_transform, label_transform=test_label_transform) test_loader = data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) logger.info("[*] Initial the test loader.") model = get_net(args, criterion=None, cuda=args.cuda) if args.load_path: if not (os.path.isfile(args.load_path) and args.load_path.endswith(".pth")): raise ValueError( "[*] The `load_path` should be exists and end with pth.") state_dict = torch.load(args.load_path) model.load_state_dict(state_dict["state_dict"]) logger.info( f"[*] LOADED successfully checkpoints from: {args.load_path}") else: raise ValueError("[*] The `load_path` should not be None.") # # tta transform # transforms = tta.Compose([ # tta.HorizontalFlip(), # tta.Rotate90(angles=[0, 90, 180, 270]) # # tta.Multiply(factors=[0.9, 1, 1.1]) # ]) # model = tta.SegmentationTTAWrapper(model, transforms) test(args, test_loader, model, cuda=args.cuda)
def main(args): # train_joint_transform_list, train_img_transform, train_label_transform = None, None, None # val_joint_transform_list, val_img_transform, val_label_transform = None, None, None train_joint_transform_list, train_img_transform, train_label_transform = get_transforms( args, mode="train") val_joint_transform_list, val_img_transform, val_label_transform = None, None, None train_dataset = DataSet(mode="train", uniform_sampling=args.uniform_sampling, filter_data=args.data_filter, joint_transform_list=train_joint_transform_list, img_transform=train_img_transform, label_transform=train_label_transform) val_dataset = DataSet(mode="val", uniform_sampling=False, joint_transform_list=val_joint_transform_list, img_transform=val_img_transform, label_transform=val_label_transform) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, drop_last=True) val_loader = data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) logger.info("[*] Initial the train loader and val loader.") criterion, val_criterion = get_loss(args, cuda=args.cuda) # logger.info("[*] Loaded the criterion.") model = get_net(args, criterion, cuda=args.cuda) logger.info("[*] Loaded the model.") optimizer, lr_scheduler = get_optimizer(args, model) # minloss, maxscore maxscore = 0 if args.load_path: if not os.path.exists(args.load_path): raise ValueError( "[*] The `load_path` should be exists and end with pth.") # checkpoint = torch.load(args.load_path) checkpoint = load_model(args, save_criterion="FWIOU") model.load_state_dict(checkpoint["state_dict"], strict=False) if not args.retrain: start_epoch = checkpoint["epoch"] start_step = checkpoint["step"] + 1 optimizer.load_state_dict(checkpoint["optimizer"]) else: start_epoch, start_step = 0, 0 logger.info( f"[*] LOADED successfully checkpoints from: {args.load_path}") else: start_epoch, start_step = 0, 0 tb = TensorBoard(args.model_dir) step = start_step for epoch in range(start_epoch, args.num_epochs): logger.info( f"| model_name {args.model_name} | epoch {epoch} | lr {args.lr}") train_loss, step, maxscore = train(args, train_loader, val_loader, model, val_criterion, optimizer, lr_scheduler, epoch, step, tb, maxscore, cuda=args.cuda) if args.uniform_sampling or args.data_filter: train_loader.dataset.build_epoch()