Exemple #1
0
def init_rgbd_classifier(ckpt_root):
    # transform
    transform_d = torchvision.transforms.Compose([
        # augmentation.CenterCrop((224, 224)),
        augmentation.Numpy2Tensor(),
        augmentation.Clamp((0.15, 1.0)),
        torchvision.transforms.Normalize(mean=[0.575], std=[0.425])
    ])

    # transform rgb
    transform_rgb = torchvision.transforms.Compose([
        # augmentation.CenterCrop((224, 224)),
        augmentation.Numpy2Tensor(),
        torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                         std=[0.5, 0.5, 0.5])
    ])

    # read classes name
    with open(os.path.join(ckpt_root, 'classes.json'), 'rt') as f:
        clasess = json.load(f)

    model = RGBDNet({'num_classes': len(clasess)})

    model.eval()

    # enable cuda if available
    if torch.cuda.is_available():
        model = model.cuda()

    utils.load_best(model, ckpt_root)

    return model, transform_rgb, transform_d, clasess
 def __init__(self, mode='train'):
     self.mode = mode
     self.blur_rgb = augmentation.GaussianBlur(signma=1)
     self.drop_channel = augmentation.DropChannel([(0, 1, 2), 3], -1)
     self.transform_rgb = torchvision.transforms.Compose([
         augmentation.Brightness(minmax=(0, .2)),
         augmentation.GaussianNoise(),
         augmentation.Clamp((0.0, 1.0)),
         torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5],
                                          std=[0.5, 0.5, 0.5])
     ])
     self.transform_d = torchvision.transforms.Compose([
         augmentation.Brightness(minmax=(0, .8)),
         augmentation.GaussianNoise(std=0.05),
         augmentation.Clamp((0.15, 1.0)),
         torchvision.transforms.Normalize(mean=[0.575], std=[0.425]),
         augmentation.DepthUniformNoise(p=0.01, minmax=-1),
     ])
     self.crop_resize = augmentation.CropAndResize((224, 224),
                                                   scale=(0.4, 1.0))
def train_rgb(cfg):
    imagenet_transform_train = torchvision.transforms.Compose([
        augmentation.GaussianBlur(r=1),
        torchvision.transforms.RandomResizedCrop(224, scale=(0.25, 1.0)),
        torchvision.transforms.ToTensor(),
        augmentation.GaussianNoise(),
        augmentation.Clamp((0.0, 1.0)),
        torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
    ])

    imagenet_transform_val = torchvision.transforms.Compose([
        augmentation.GaussianBlur(r=1),
        torchvision.transforms.CenterCrop(224),
        torchvision.transforms.ToTensor(),
        augmentation.GaussianNoise(),
        augmentation.Clamp((0.0, 1.0)),
        torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
    ])

    datasets = {
        "train":
        torchvision.datasets.DatasetFolder(cfg['data_root']['train'],
                                           loaders.rgb_from_image,
                                           extensions=('rgb.png'),
                                           transform=imagenet_transform_train),
        "val":
        torchvision.datasets.DatasetFolder(cfg['data_root']['val'],
                                           loaders.rgb_from_image,
                                           extensions=('rgb.png'),
                                           transform=imagenet_transform_val),
    }

    num_workers = cfg['worker']
    data_loader = {
        "train":
        torch.utils.data.DataLoader(datasets['train'],
                                    batch_size=cfg['batch_size'],
                                    num_workers=num_workers,
                                    shuffle=True,
                                    pin_memory=True,
                                    drop_last=True),
        "val":
        torch.utils.data.DataLoader(datasets['val'],
                                    batch_size=cfg['batch_size'],
                                    num_workers=num_workers,
                                    shuffle=True,
                                    pin_memory=True,
                                    drop_last=True),
    }

    assert len(datasets['train'].classes) == len(datasets['val'].classes)
    # save classes name
    classes = datasets['train'].classes
    os.makedirs(cfg['ckpt_root'], exist_ok=True)
    with open(os.path.join(cfg['ckpt_root'], 'classes.json'), 'wt') as f:
        json.dump(classes, f)

    cfg_rgb = cfg['rgb']
    cfg_rgb['num_classes'] = len(classes)
    model = RGBNet(cfg_rgb)
    # enable cuda if available
    if torch.cuda.is_available():
        model = model.cuda()

    criterion = torch.nn.CrossEntropyLoss()
    # optimizer = torch.optim.SGD(model.parameters(), lr=cfg['lr'], momentum=cfg['momentum'], weight_decay=cfg['weight_decay'])
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=cfg['lr'],
                                 weight_decay=cfg['weight_decay'])
    # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg['milestones'], gamma=cfg['gamma'])
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                10,
                                                gamma=cfg['gamma'])

    train_model(model,
                data_loader,
                criterion,
                optimizer,
                scheduler,
                cfg,
                resume=True)
def train_depth(cfg):
    imagenet_transform_train = torchvision.transforms.Compose([
        augmentation.CropAndResize((224, 224), scale=(0.4, 1.0)),
        torchvision.transforms.ToTensor(),
        augmentation.DepthTranslate(minmax=(0, .8)),
        augmentation.GaussianNoise(std=0.005),
        augmentation.DepthUniformNoise(p=0.01, minmax=(0.15, 1.0)),
        augmentation.Clamp((0.15, 1.0)),
        torchvision.transforms.Normalize(mean=[0.575], std=[0.425])
    ])

    imagenet_transform_val = torchvision.transforms.Compose([
        augmentation.CenterCrop((224, 224)),
        torchvision.transforms.ToTensor(),
        augmentation.DepthTranslate(minmax=(0, .8)),
        augmentation.GaussianNoise(std=0.005),
        augmentation.DepthUniformNoise(p=0.01, minmax=(0.15, 1.0)),
        augmentation.Clamp((0.15, 1.0)),
        torchvision.transforms.Normalize(mean=[0.575], std=[0.425])
    ])

    datasets = {
        "train":
        torchvision.datasets.DatasetFolder(cfg['data_root']['train'],
                                           loaders.depth_from_exr,
                                           extensions=('depth.exr'),
                                           transform=imagenet_transform_train),
        "val":
        torchvision.datasets.DatasetFolder(cfg['data_root']['val'],
                                           loaders.depth_from_exr,
                                           extensions=('depth.exr'),
                                           transform=imagenet_transform_val),
    }

    num_workers = cfg['worker']
    data_loader = {
        "train":
        torch.utils.data.DataLoader(datasets['train'],
                                    batch_size=cfg['batch_size'],
                                    num_workers=num_workers,
                                    shuffle=True,
                                    pin_memory=True,
                                    drop_last=True),
        "val":
        torch.utils.data.DataLoader(datasets['val'],
                                    batch_size=cfg['batch_size'],
                                    num_workers=num_workers,
                                    shuffle=True,
                                    pin_memory=True,
                                    drop_last=True),
    }

    assert len(datasets['train'].classes) == len(datasets['val'].classes)
    # save classes name
    classes = datasets['train'].classes
    os.makedirs(cfg['ckpt_root'], exist_ok=True)
    with open(os.path.join(cfg['ckpt_root'], 'classes.json'), 'wt') as f:
        json.dump(classes, f)

    cfg_depth = cfg['depth']
    cfg_depth['num_classes'] = len(classes)
    model = DepthNet(cfg_depth)
    # enable cuda if available
    if torch.cuda.is_available():
        model = model.cuda()

    criterion = torch.nn.CrossEntropyLoss()
    # optimizer = torch.optim.SGD(model.parameters(), lr=cfg['lr'], momentum=cfg['momentum'], weight_decay=cfg['weight_decay'])
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=cfg['lr'],
                                 weight_decay=cfg['weight_decay'])
    # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=cfg['milestones'], gamma=cfg['gamma'])
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                10,
                                                gamma=cfg['gamma'])

    train_model(model,
                data_loader,
                criterion,
                optimizer,
                scheduler,
                cfg,
                resume=True)