Exemple #1
0
def main():

    # path_setting 
    image_path = '../RHD_v1-1/RHD_published_v2/training/color/'
    mask_path = '../RHD_v1-1/RHD_published_v2/training/mask/'
    anno_path = '../RHD_v1-1/RHD_published_v2/training/anno_training.pickle'
    model_path = None
    # parameter 
    print_freq = 100
    batch_size = 1
    num_workers = 0
    epoch = 100


    # data load
    transform = transforms.Compose([
        transforms.RandomResizedCrop((256,256)),
        transforms.ColorJitter(hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    
    mask_transform = transforms.Compose([transforms.ToTensor()])

    train_loader = get_loader(image_path,
                              mask_path,
                              anno_path,
                              transform,
                              mask_transform,
                              batch_size=batch_size,
                              num_workers=num_workers)
    # model load
    handseg = HandSegNet()

    handseg.to(device)
    
    
    optimizer = optim.Adam(handseg.parameters(), 0.00001)

    loss = nn.CrossEntropyLoss.to(device)

    for i, (image, mask) in enumerate(train_loader):

        image = image.to(device)
        mask = mask.to(device)

        # hand segment
        _, _, _, _, hand_seg = handseg(image) #hand_seg output

        total_loss = loss(hand_seg, mask) 
        optimizer.step()
        optimizer.zero_grad()

        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Loss {loss:.4f} \t'.format(epoch, i, len(train_loader),loss=total_loss.item()))
Exemple #2
0
def main(args):

    if not os.path.exists(args.model_path):
        os.makedirs(args.model_path)

    # data load
    transform = transforms.Compose([
        transforms.RandomResizedCrop((args.crop_size, args.crop_size)),
        transforms.ColorJitter(hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    mask_transform = transforms.Compose([transforms.ToTensor()])

    train_loader = get_loader(args.image_path,
                              args.mask_path,
                              args.anno_path,
                              transform,
                              mask_transform,
                              batch_size=args.batch_size,
                              num_workers=args.num_works,
                              shuffle=True)
    # model load
    handseg = HandSegNet()
    posenet = PoseNet()
    hand3d = HandPose()

    handseg.to(device)
    posenet.to(device)
    hand3d.to(device)

    if args.pretrained:
        print("====HandsegNet, PoseNet model load====")
        handseg.load_state_dict(
            torch.load(os.path.join(args.model_path, 'HandSegnet.pth.tar')))
        posenet.load_state_dict(
            torch.load(os.path.join(args.model_path, 'PoseNet.pth.tar')))

    if args.resume:
        print("====3D Hand Pose model load====")
        hand3d.load_state_dict(
            torch.load(os.path.join(model_path, '3DhandposeNet.pth.tar')))

    optimizer = optim.Adam(hand3d.parameters(), 0.0001)

    loss = nn.MSELoss().to(device)
    for epoch in range(args.epochs):

        for i, (image, hand_sides, keypoint_gt,
                rot_mat_gt) in enumerate(train_loader):

            image = image.to(device)
            hand_sides = hand_sides.to(device)
            keypoint_gt = keypoint_gt.to(device)
            rot_mat_gt = rot_mat_gt.to(device)

            # hand segment
            image_crop, scale_crop, center, hand_mask, hand_seg = handseg(
                image)  #hand_seg output

            # detect keypoints in 2D
            keypoint_scoremap = posenet(image_crop)

            # estimate 3d pose
            keypoint_coord3d, rot_matrix, _ = hand3d(keypoint_scoremap,
                                                     hand_sides)  # (b, 21, 3)

            total_loss = loss(keypoint_coord3d, keypoint_gt) + loss(
                rot_matrix, rot_mat_gt)

            optimizer.step()
            optimizer.zero_grad()

            if i % args.print_freq == 0:
                print('Epoch: [{0}][{1}/{2}]\t'
                      'Loss {loss:.4f} \t'.format(epoch,
                                                  i,
                                                  len(train_loader),
                                                  loss=total_loss.item()))
Exemple #3
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--model', type=str, default='deeplab-largefov')
    parser.add_argument(
        '--model_file',
        type=str,
        default=
        '/home/ecust/lx/Semantic-Segmentation-PyTorch/logs/deeplab-largefov_20190417_230357/model_best.pth.tar',
        help='Model path')
    parser.add_argument('--dataset_type',
                        type=str,
                        default='voc',
                        help='type of dataset')
    parser.add_argument(
        '--dataset',
        type=str,
        default='/home/ecust/Datasets/PASCAL VOC/VOCdevkit/VOC2012',
        help='path to dataset')
    parser.add_argument('--img_size',
                        type=tuple,
                        default=None,
                        help='resize images using bilinear interpolation')
    parser.add_argument('--crop_size',
                        type=tuple,
                        default=None,
                        help='crop images')
    parser.add_argument('--n_classes',
                        type=int,
                        default=21,
                        help='number of classes')
    parser.add_argument('--pretrained',
                        type=bool,
                        default=True,
                        help='should be set the same as train.py')
    args = parser.parse_args()

    model_file = args.model_file
    root = args.dataset
    n_classes = args.n_classes

    crop = None
    # crop = Compose([RandomCrop(args.crop_size)])
    loader = get_loader(args.dataset_type)
    val_loader = DataLoader(loader(root,
                                   n_classes=n_classes,
                                   split='val',
                                   img_size=args.img_size,
                                   augmentations=crop,
                                   pretrained=args.pretrained),
                            batch_size=1,
                            shuffle=False,
                            num_workers=4)

    model, _, _ = Models.model_loader(args.model, n_classes, resume=None)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    print('==> Loading {} model file: {}'.format(model.__class__.__name__,
                                                 model_file))

    model_data = torch.load(model_file)

    try:
        model.load_state_dict(model_data)
    except Exception:
        model.load_state_dict(model_data['model_state_dict'])
    model.eval()

    print('==> Evaluating with {} dataset'.format(args.dataset_type))
    visualizations = []
    metrics = runningScore(n_classes)

    for data, target in tqdm.tqdm(val_loader,
                                  total=len(val_loader),
                                  ncols=80,
                                  leave=False):
        data, target = data.to(device), target.to(device)
        score = model(data)

        imgs = data.data.cpu()
        lbl_pred = score.data.max(1)[1].cpu().numpy()
        lbl_true = target.data.cpu()
        for img, lt, lp in zip(imgs, lbl_true, lbl_pred):
            img, lt = val_loader.dataset.untransform(img, lt)
            metrics.update(lt, lp)
            if len(visualizations) < 9:
                viz = visualize_segmentation(lbl_pred=lp,
                                             lbl_true=lt,
                                             img=img,
                                             n_classes=n_classes,
                                             dataloader=val_loader)
                visualizations.append(viz)
    acc, acc_cls, mean_iu, fwavacc, cls_iu = metrics.get_scores()
    print('''
Accuracy:       {0:.2f}
Accuracy Class: {1:.2f}
Mean IoU:       {2:.2f}
FWAV Accuracy:  {3:.2f}'''.format(acc * 100, acc_cls * 100, mean_iu *
                                  100, fwavacc * 100) + '\n')

    class_name = val_loader.dataset.class_names
    if class_name is not None:
        for index, value in enumerate(cls_iu.values()):
            offset = 20 - len(class_name[index])
            print(class_name[index] + ' ' * offset + f'{value * 100:>.2f}')
    else:
        print("\nyou don't specify class_names, use number instead")
        for key, value in cls_iu.items():
            print(key, f'{value * 100:>.2f}')

    viz = get_tile_image(visualizations)
    # img = Image.fromarray(viz)
    # img.save('viz_evaluate.png')
    scipy.misc.imsave('viz_evaluate.png', viz)
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--model', type=str, default='fcn8s')
    parser.add_argument(
        '--model_file',
        type=str,
        default=
        '/home/ecust/lx/Multimodal/logs/fcn8s_VS_B_0.001/model_best.pth.tar',
        help='Model path')
    parser.add_argument('--dataset_type',
                        type=str,
                        default='b',
                        help='type of dataset')
    parser.add_argument('--dataset',
                        type=str,
                        default='/home/ecust/Datasets/数据库B(541)',
                        help='path to dataset')
    parser.add_argument('--img_size',
                        type=tuple,
                        default=(320, 416),
                        help='resize images using bilinear interpolation')
    parser.add_argument('--crop_size',
                        type=tuple,
                        default=None,
                        help='crop images')
    parser.add_argument('--n_classes',
                        type=int,
                        default=13,
                        help='number of classes')
    parser.add_argument('--pretrained',
                        type=bool,
                        default=True,
                        help='should be set the same as train.py')
    args = parser.parse_args()

    model_file = args.model_file
    root = args.dataset
    n_classes = args.n_classes
    writer = SummaryWriter()

    crop = None
    # crop = Compose([RandomCrop(args.crop_size)])
    loader = get_loader(args.dataset_type)
    val_loader = DataLoader(loader(root,
                                   n_classes=n_classes,
                                   split='val',
                                   img_size=args.img_size,
                                   augmentations=crop,
                                   pretrained=args.pretrained),
                            batch_size=1,
                            shuffle=False,
                            num_workers=4)

    model, _, _ = Models.model_loader(args.model, n_classes, resume=None)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    print('==> Loading {} model file: {}'.format(model.__class__.__name__,
                                                 model_file))

    model_data = torch.load(model_file)

    try:
        model.load_state_dict(model_data)
    except Exception:
        model.load_state_dict(model_data['model_state_dict'])
    model.eval()

    print('==> Evaluating with {} dataset'.format(args.dataset_type))

    for rgb, ir, target in tqdm.tqdm(val_loader,
                                     total=len(val_loader),
                                     ncols=80,
                                     leave=False):
        rgb, ir, target = rgb.to(device), ir.to(device), target.to(device)
        x = rgb

        grid = torchvision.utils.make_grid(x, normalize=True)
        writer.add_image('images', grid, 0)
        writer.add_graph(model, (ir))
        # score = model(rgb, ir)
        # score = model(ir)
        for i, (name, param) in enumerate(model.named_parameters()):
            writer.add_histogram(name, param, 0)

        for name, layer in model._modules.items():

            # if 'ir' in name and 'feature' in name:
            if 'feature' in name or 'fc' in name or 'score_fr' in name:
                x = layer(x)

                x1 = x.transpose(0, 1)
                img_grid = torchvision.utils.make_grid(
                    x1, normalize=True, scale_each=True)  # normalize进行归一化处理
                writer.add_image(f'{name}_feature_maps',
                                 img_grid,
                                 global_step=0)
        break
def main():
    # parser = argparse.ArgumentParser(
    #     formatter_class=argparse.ArgumentDefaultsHelpFormatter
    # )
    # parser.add_argument('--model', type=str, default='multi-gnn1')
    # parser.add_argument('--model_file', type=str, default='/home/ecust/lx/Multimodal/logs/multi-gnn1_FS/model_best.pth.tar',help='Model path')
    # parser.add_argument('--dataset_type', type=str, default='b',help='type of dataset')
    # parser.add_argument('--dataset', type=str, default='/home/ecust/Datasets/数据库B(541)',help='path to dataset')
    # parser.add_argument('--base_size', type=tuple, default=(300, 300), help='resize images using bilinear interpolation')
    # parser.add_argument('--crop_size', type=tuple, default=None, help='crop images')
    # parser.add_argument('--n_classes', type=int, default=13, help='number of classes')
    # parser.add_argument('--pretrained', type=bool, default=True, help='should be set the same as train.py')
    # args = parser.parse_args()
    args = argparser()

    model_file = '/home/ecust/lx/Multimodal/logs/resnet_20190916_093026/model_best.pth.tar'
    root = args.dataset_root

    crop=None
    # crop = Compose([RandomCrop(args.crop_size)])
    loader = get_loader(args.dataset)
    val_loader = DataLoader(
        loader(root, split='val', base_size=args.base_size, augmentations=crop),
        batch_size=1, shuffle=False, num_workers=4)
    args.n_classes = loader.NUM_CLASS

    model = Models.model_loader(args.model, args.n_classes,
                                backbone=args.backbone, norm_layer=nn.BatchNorm2d,
                                multi_grid=args.multi_grid,
                                multi_dilation=args.multi_dilation)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    print('==> Loading {} model file: {}'.format(model.__class__.__name__, model_file))

    model_data = torch.load(model_file)

    try:
        model.load_state_dict(model_data)
    except Exception:
        model.load_state_dict(model_data['model_state_dict'])
    model.eval()

    print('==> Evaluating with {} dataset'.format(args.dataset))
    visualizations = []
    metrics = runningScore(args.n_classes)

    i = 0
    for rgb, ir, target in tqdm.tqdm(val_loader, total=len(val_loader), ncols=80, leave=False):
        rgb, ir, target = rgb.to(device), ir.to(device), target.to(device)
        score = model(rgb, ir)
        # score = model(ir)

        rgbs = rgb.data.cpu()
        irs = ir.data.cpu()
        lbl_pred = score[0].data.max(1)[1].cpu().numpy()
        lbl_true = target.data.cpu()
        for rgb, ir, lt, lp in zip(rgbs, irs, lbl_true, lbl_pred):
            rgb, ir, lt = val_loader.dataset.untransform(rgb, ir, lt)
            metrics.update(lt, lp)

            i += 1
            if i % 5 == 0:
                if len(visualizations) < 9:
                    viz = visualize_segmentation(
                        lbl_pred=lp, lbl_true=lt, img=rgb, ir=ir,
                        n_classes=args.n_classes, dataloader=val_loader)
                    visualizations.append(viz)

    acc, acc_cls, mean_iu, fwavacc, cls_iu = metrics.get_scores()
    print('''
Accuracy:       {0:.2f}
Accuracy Class: {1:.2f}
Mean IoU:       {2:.2f}
FWAV Accuracy:  {3:.2f}'''.format(acc * 100,
                                  acc_cls * 100,
                                  mean_iu * 100,
                                  fwavacc * 100) + '\n')

    class_name = val_loader.dataset.class_names
    if class_name is not None:
        for index, value in enumerate(cls_iu.values()):
            offset = 20 - len(class_name[index])
            print(class_name[index] + ' ' * offset + f'{value * 100:>.2f}')
    else:
        print("\nyou don't specify class_names, use number instead")
        for key, value in cls_iu.items():
            print(key, f'{value * 100:>.2f}')

    viz = get_tile_image(visualizations)
    # img = Image.fromarray(viz)
    # img.save('viz_evaluate.png')
    scipy.misc.imsave('viz_evaluate.png', viz)
def main():
    random.seed(1)
    torch.manual_seed(1)
    torch.cuda.manual_seed(1)
    np.random.seed(1)
    torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False
    torch.cuda.empty_cache()

    args = argparser()

    now = datetime.datetime.now()
    args.out = osp.join(here, 'logs', args.model + '_' + now.strftime('%Y%m%d_%H%M%S'))

    if not osp.exists(args.out):
        os.makedirs(args.out)
    with open(osp.join(args.out, 'config.yaml'), 'w') as f:
        yaml.safe_dump(args.__dict__, f, default_flow_style=False)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f'Start training {args.model} using {device.type}\n')

    # 1. dataset

    root = args.dataset_root
    loader = get_loader(args.dataset)

    augmentations = get_augmentations(args)

    train_loader = DataLoader(
        loader(root, split='train', base_size=args.base_size, augmentations=augmentations),
        batch_size=args.batch_size, shuffle=True, num_workers=args.workers)
    val_loader = DataLoader(
        loader(root, split='val', base_size=args.base_size),
        batch_size=1, shuffle=False, num_workers=args.workers)
    args.n_classes = loader.NUM_CLASS

    # 2. model
    model = model_loader(args.model, args.n_classes,
                         backbone=args.backbone, norm_layer=nn.BatchNorm2d,
                         multi_grid=args.multi_grid,
                         multi_dilation=args.multi_dilation)
    model = model.to(device)
    print(model)
    start_epoch = 1
    if args.resume:
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint['model_state_dict'])
        start_epoch = checkpoint['epoch']
    else:
        checkpoint = None

    # 3. optimizer
    optim = torch.optim.SGD(
        model.parameters(),
        lr=args.lr,
        momentum=args.momentum,
        weight_decay=args.weight_decay
    )
    # optim = torch.optim.SGD(
    #     [{'params': model.get_parameters(key='1x'), 'lr': args.lr},
    #      {'params': model.get_parameters(key='10x'), 'lr': args.lr * 10}],
    #      momentum=args.momentum,
    #      weight_decay=args.weight_decay
    # )
    if args.resume:
        optim.load_state_dict(checkpoint['optim_state_dict'])

    scheduler = get_scheduler(optim, args)

    # 4. train
    trainer = Trainer(
        device=device,
        model=model,
        optimizer=optim,
        scheduler=scheduler,
        train_loader=train_loader,
        val_loader=val_loader,
        out=args.out,
        epochs=args.epochs,
        n_classes=args.n_classes,
        val_epoch=args.val_epoch,
    )
    trainer.epoch = start_epoch
    trainer.train()
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
    parser.add_argument('--model',
                        type=str,
                        default='deeplab-largefov',
                        help='model to train for')
    parser.add_argument('--epochs', type=int, default=50, help='total epochs')
    parser.add_argument('--val_epoch',
                        type=int,
                        default=10,
                        help='validation interval')
    parser.add_argument('--batch_size',
                        type=int,
                        default=16,
                        help='number of batch size')
    parser.add_argument('--img_size',
                        type=tuple,
                        default=None,
                        help='resize images to proper size')
    parser.add_argument('--dataset_type',
                        type=str,
                        default='voc',
                        help='choose which dataset to use')
    parser.add_argument('--dataset_root',
                        type=str,
                        default='/home/ecust/Datasets/PASCAL VOC/VOC_Aug',
                        help='path to dataset')
    parser.add_argument('--n_classes',
                        type=int,
                        default=21,
                        help='number of classes')
    parser.add_argument('--resume', default=None, help='path to checkpoint')
    parser.add_argument('--optim', type=str, default='sgd', help='optimizer')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='learning rate')
    parser.add_argument('--lr_policy',
                        type=str,
                        default='poly',
                        help='learning rate policy')
    parser.add_argument('--weight-decay',
                        type=float,
                        default=0.0005,
                        help='weight decay')
    parser.add_argument('--beta1',
                        type=float,
                        default=0.9,
                        help='momentum for sgd, beta1 for adam')
    parser.add_argument('--lr_decay_step',
                        type=float,
                        default=10,
                        help='step size for step learning policy')
    parser.add_argument('--lr_power',
                        type=int,
                        default=0.9,
                        help='power parameter for poly learning policy')
    parser.add_argument('--pretrained',
                        type=bool,
                        default=True,
                        help='whether to use pretrained models')
    parser.add_argument('--iter_size',
                        type=int,
                        default=10,
                        help='iters to accumulate gradients')

    parser.add_argument('--crop_size',
                        type=tuple,
                        default=(321, 321),
                        help='crop sizes of images')
    parser.add_argument('--flip',
                        type=bool,
                        default=True,
                        help='whether to use horizontal flip')

    args = parser.parse_args()

    now = datetime.datetime.now()
    args.out = osp.join(here, 'logs',
                        args.model + '_' + now.strftime('%Y%m%d_%H%M%S'))

    if not osp.exists(args.out):
        os.makedirs(args.out)
    with open(osp.join(args.out, 'config.yaml'), 'w') as f:
        yaml.safe_dump(args.__dict__, f, default_flow_style=False)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f'Start training {args.model} using {device.type}\n')

    random.seed(1337)
    torch.manual_seed(1337)
    torch.cuda.manual_seed(1337)

    # 1. dataset

    root = args.dataset_root
    loader = get_loader(args.dataset_type)

    augmentations = get_augmentations(args)

    train_loader = DataLoader(loader(root,
                                     n_classes=args.n_classes,
                                     split='train_aug',
                                     img_size=args.img_size,
                                     augmentations=augmentations,
                                     pretrained=args.pretrained),
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=4)
    val_loader = DataLoader(loader(root,
                                   n_classes=args.n_classes,
                                   split='val_id',
                                   img_size=args.img_size,
                                   pretrained=args.pretrained),
                            batch_size=1,
                            shuffle=False,
                            num_workers=4)

    # 2. model
    model, start_epoch, ckpt = model_loader(args.model, args.n_classes,
                                            args.resume)
    model = model.to(device)

    # 3. optimizer
    optim = get_optimizer(args, model)
    if args.resume:
        optim.load_state_dict(ckpt['optim_state_dict'])

    scheduler = get_scheduler(optim, args)

    # 4. train
    trainer = Trainer(device=device,
                      model=model,
                      optimizer=optim,
                      scheduler=scheduler,
                      train_loader=train_loader,
                      val_loader=val_loader,
                      out=args.out,
                      epochs=args.epochs,
                      n_classes=args.n_classes,
                      val_epoch=args.val_epoch,
                      iter_size=args.iter_size)
    trainer.epoch = start_epoch
    trainer.train()