def main(args): config = load_config(args.config) if args.type == "onnx": os.environ["CUDA_VISIBLE_DEVICES"] = "" # Workaround: PyTorch ONNX, DataParallel with GPU issue, cf https://github.com/pytorch/pytorch/issues/5315 num_classes = len(config["classes"]["classes"]) num_channels = 0 for channel in config["channels"]: num_channels += len(channel["bands"]) export_channels = num_channels if not args.export_channels else args.export_channels assert num_channels >= export_channels, "Will be hard indeed, to export more channels than thoses dataset provide" def map_location(storage, _): return storage.cpu() net = UNet(num_classes, num_channels=num_channels).to("cpu") chkpt = torch.load(args.checkpoint, map_location=map_location) net = torch.nn.DataParallel(net) net.load_state_dict(chkpt["state_dict"]) if export_channels < num_channels: weights = torch.zeros((64, export_channels, 7, 7)) weights.data = net.module.resnet.conv1.weight.data[:, : export_channels, :, :] net.module.resnet.conv1 = nn.Conv2d(num_channels, 64, kernel_size=7, stride=2, padding=3, bias=False) net.module.resnet.conv1.weight = nn.Parameter(weights) if args.type == "onnx": batch = torch.autograd.Variable( torch.randn(1, export_channels, args.image_size, args.image_size)) torch.onnx.export(net, batch, args.out) elif args.type == "pth": states = { "epoch": chkpt["epoch"], "state_dict": net.state_dict(), "optimizer": chkpt["optimizer"] } torch.save(states, args.out)
def main(args): model = load_config(args.model) dataset = load_config(args.dataset) device = torch.device("cuda" if model["common"]["cuda"] else "cpu") if model["common"]["cuda"] and not torch.cuda.is_available(): sys.exit("Error: CUDA requested but not available") os.makedirs(model["common"]["checkpoint"], exist_ok=True) num_classes = len(dataset["common"]["classes"]) net = UNet(num_classes) net = DataParallel(net) net = net.to(device) if model["common"]["cuda"]: torch.backends.cudnn.benchmark = True try: weight = torch.Tensor(dataset["weights"]["values"]) except KeyError: if model["opt"]["loss"] in ("CrossEntropy", "mIoU", "Focal"): sys.exit( "Error: The loss function used, need dataset weights values") optimizer = Adam(net.parameters(), lr=model["opt"]["lr"], weight_decay=model["opt"]["decay"]) resume = 0 if args.checkpoint: def map_location(storage, _): return storage.cuda() if model["common"]["cuda"] else storage.cpu() # https://github.com/pytorch/pytorch/issues/7178 chkpt = torch.load(args.checkpoint, map_location=map_location) net.load_state_dict(chkpt["state_dict"]) if args.resume: optimizer.load_state_dict(chkpt["optimizer"]) resume = chkpt["epoch"] if model["opt"]["loss"] == "CrossEntropy": criterion = CrossEntropyLoss2d(weight=weight).to(device) elif model["opt"]["loss"] == "mIoU": criterion = mIoULoss2d(weight=weight).to(device) elif model["opt"]["loss"] == "Focal": criterion = FocalLoss2d(weight=weight).to(device) elif model["opt"]["loss"] == "Lovasz": criterion = LovaszLoss2d().to(device) else: sys.exit("Error: Unknown [opt][loss] value !") train_loader, val_loader = get_dataset_loaders(model, dataset, args.workers) num_epochs = model["opt"]["epochs"] if resume >= num_epochs: sys.exit( "Error: Epoch {} set in {} already reached by the checkpoint provided" .format(num_epochs, args.model)) history = collections.defaultdict(list) log = Log(os.path.join(model["common"]["checkpoint"], "log")) log.log("--- Hyper Parameters on Dataset: {} ---".format( dataset["common"]["dataset"])) log.log("Batch Size:\t {}".format(model["common"]["batch_size"])) log.log("Image Size:\t {}".format(model["common"]["image_size"])) log.log("Learning Rate:\t {}".format(model["opt"]["lr"])) log.log("Weight Decay:\t {}".format(model["opt"]["decay"])) log.log("Loss function:\t {}".format(model["opt"]["loss"])) if "weight" in locals(): log.log("Weights :\t {}".format(dataset["weights"]["values"])) log.log("---") for epoch in range(resume, num_epochs): log.log("Epoch: {}/{}".format(epoch + 1, num_epochs)) train_hist = train(train_loader, num_classes, device, net, optimizer, criterion) log.log( "Train loss: {:.4f}, mIoU: {:.3f}, {} IoU: {:.3f}, MCC: {:.3f}". format( train_hist["loss"], train_hist["miou"], dataset["common"]["classes"][1], train_hist["fg_iou"], train_hist["mcc"], )) for k, v in train_hist.items(): history["train " + k].append(v) val_hist = validate(val_loader, num_classes, device, net, criterion) log.log( "Validate loss: {:.4f}, mIoU: {:.3f}, {} IoU: {:.3f}, MCC: {:.3f}". format(val_hist["loss"], val_hist["miou"], dataset["common"]["classes"][1], val_hist["fg_iou"], val_hist["mcc"])) for k, v in val_hist.items(): history["val " + k].append(v) visual = "history-{:05d}-of-{:05d}.png".format(epoch + 1, num_epochs) plot(os.path.join(model["common"]["checkpoint"], visual), history) checkpoint = "checkpoint-{:05d}-of-{:05d}.pth".format( epoch + 1, num_epochs) states = { "epoch": epoch + 1, "state_dict": net.state_dict(), "optimizer": optimizer.state_dict() } torch.save(states, os.path.join(model["common"]["checkpoint"], checkpoint))
def main(args): model = load_config(args.model) dataset = load_config(args.dataset) device = torch.device("cuda" if model["common"]["cuda"] else "cpu") if model["common"]["cuda"] and not torch.cuda.is_available(): sys.exit("Error: CUDA requested but not available") # if args.batch_size < 2: # sys.exit('Error: PSPNet requires more than one image for BatchNorm in Pyramid Pooling') os.makedirs(model["common"]["checkpoint"], exist_ok=True) num_classes = len(dataset["common"]["classes"]) net = UNet(num_classes) net = DataParallel(net) net = net.to(device) if model["common"]["cuda"]: torch.backends.cudnn.benchmark = True if args.checkpoint: def map_location(storage, _): return storage.cuda() if model["common"]["cuda"] else storage.cpu() # https://github.com/pytorch/pytorch/issues/7178 chkpt = torch.load(args.checkpoint, map_location=map_location) net.load_state_dict(chkpt) optimizer = Adam(net.parameters(), lr=model["opt"]["lr"], weight_decay=model["opt"]["decay"]) weight = torch.Tensor(dataset["weights"]["values"]) criterion = CrossEntropyLoss2d(weight=weight).to(device) # criterion = FocalLoss2d(weight=weight).to(device) train_loader, val_loader = get_dataset_loaders(model, dataset) num_epochs = model["opt"]["epochs"] history = collections.defaultdict(list) for epoch in range(num_epochs): print("Epoch: {}/{}".format(epoch + 1, num_epochs)) train_hist = train(train_loader, num_classes, device, net, optimizer, criterion) print("Train loss: {:.4f}, mean IoU: {:.4f}".format( train_hist["loss"], train_hist["iou"])) for k, v in train_hist.items(): history["train " + k].append(v) val_hist = validate(val_loader, num_classes, device, net, criterion) print("Validate loss: {:.4f}, mean IoU: {:.4f}".format( val_hist["loss"], val_hist["iou"])) for k, v in val_hist.items(): history["val " + k].append(v) visual = "history-{:05d}-of-{:05d}.png".format(epoch + 1, num_epochs) plot(os.path.join(model["common"]["checkpoint"], visual), history) checkpoint = "checkpoint-{:05d}-of-{:05d}.pth".format( epoch + 1, num_epochs) torch.save(net.state_dict(), os.path.join(model["common"]["checkpoint"], checkpoint))
def main(args): model = load_config(args.model) dataset = load_config(args.dataset) device = torch.device('cuda' if model['common']['cuda'] else 'cpu') if model['common']['cuda'] and not torch.cuda.is_available(): sys.exit('Error: CUDA requested but not available') # if args.batch_size < 2: # sys.exit('Error: PSPNet requires more than one image for BatchNorm in Pyramid Pooling') os.makedirs(model['common']['checkpoint'], exist_ok=True) num_classes = len(dataset['common']['classes']) net = UNet(num_classes).to(device) if args.resume: path = os.path.join(model['common']['checkpoint'], args.resume) cuda = model['common']['cuda'] def map_location(storage, _): return storage.cuda() if cuda else storage.cpu() chkpt = torch.load(path, map_location=map_location) net.load_state_dict(chkpt) resume_at_epoch = int(args.resume[11:16]) else: resume_at_epoch = 0 if model['common']['cuda']: torch.backends.cudnn.benchmark = True net = DataParallel(net) optimizer = SGD(net.parameters(), lr=model['opt']['lr'], momentum=model['opt']['momentum']) scheduler = MultiStepLR(optimizer, milestones=model['opt']['milestones'], gamma=model['opt']['gamma']) weight = torch.Tensor(dataset['weights']['values']) for i in range(resume_at_epoch): scheduler.step() criterion = CrossEntropyLoss2d(weight=weight).to(device) # criterion = FocalLoss2d(weight=weight).to(device) train_loader, val_loader = get_dataset_loaders(model, dataset) num_epochs = model['opt']['epochs'] history = collections.defaultdict(list) for epoch in range(resume_at_epoch, num_epochs): print('Epoch: {}/{}'.format(epoch + 1, num_epochs)) train_hist = train(train_loader, num_classes, device, net, optimizer, scheduler, criterion) print('Train loss: {:.4f}, mean IoU: {:.4f}'.format( train_hist['loss'], train_hist['iou'])) for k, v in train_hist.items(): history['train ' + k].append(v) val_hist = validate(val_loader, num_classes, device, net, criterion) print('Validate loss: {:.4f}, mean IoU: {:.4f}'.format( val_hist['loss'], val_hist['iou'])) for k, v in val_hist.items(): history['val ' + k].append(v) visual = 'history-{:05d}-of-{:05d}.png'.format(epoch + 1, num_epochs) plot(os.path.join(model['common']['checkpoint'], visual), history) checkpoint = 'checkpoint-{:05d}-of-{:05d}.pth'.format( epoch + 1, num_epochs) torch.save(net.state_dict(), os.path.join(model['common']['checkpoint'], checkpoint))
def main(args): model = load_config(args.model) dataset = load_config(args.dataset) device = torch.device('cuda' if model['common']['cuda'] else 'cpu') if model['common']['cuda'] and not torch.cuda.is_available(): sys.exit('Error: CUDA requested but not available') # if args.batch_size < 2: # sys.exit('Error: PSPNet requires more than one image for BatchNorm in Pyramid Pooling') os.makedirs(model['common']['checkpoint'], exist_ok=True) num_classes = len(dataset['common']['classes']) net = UNet(num_classes).to(device) if args.resume: path = os.path.join(model['common']['checkpoint'], args.resume) cuda = model['common']['cuda'] def map_location(storage, _): return storage.cuda() if cuda else storage.cpu() chkpt = torch.load(path, map_location=map_location) net.load_state_dict(chkpt) resume_at_epoch = int(args.resume[11:16]) else: resume_at_epoch = 0 if model['common']['cuda']: torch.backends.cudnn.benchmark = True net = DataParallel(net) optimizer = SGD(net.parameters(), lr=model['opt']['lr'], momentum=model['opt']['momentum']) scheduler = MultiStepLR(optimizer, milestones=model['opt']['milestones'], gamma=model['opt']['gamma']) weight = torch.Tensor(dataset['weights']['values']) for i in range(resume_at_epoch): scheduler.step() criterion = CrossEntropyLoss2d(weight=weight).to(device) # criterion = FocalLoss2d(weight=weight).to(device) train_loader, val_loader = get_dataset_loaders(model, dataset) num_epochs = model['opt']['epochs'] history = collections.defaultdict(list) for epoch in range(resume_at_epoch, num_epochs): print('Epoch: {}/{}'.format(epoch + 1, num_epochs)) train_hist = train(train_loader, num_classes, device, net, optimizer, scheduler, criterion) print('Train loss: {:.4f}, mean IoU: {:.4f}'.format(train_hist['loss'], train_hist['iou'])) for k, v in train_hist.items(): history['train ' + k].append(v) val_hist = validate(val_loader, num_classes, device, net, criterion) print('Validate loss: {:.4f}, mean IoU: {:.4f}'.format(val_hist['loss'], val_hist['iou'])) for k, v in val_hist.items(): history['val ' + k].append(v) visual = 'history-{:05d}-of-{:05d}.png'.format(epoch + 1, num_epochs) plot(os.path.join(model['common']['checkpoint'], visual), history) checkpoint = 'checkpoint-{:05d}-of-{:05d}.pth'.format(epoch + 1, num_epochs) torch.save(net.state_dict(), os.path.join(model['common']['checkpoint'], checkpoint))