def main(): # path_setting image_path = '../RHD_v1-1/RHD_published_v2/training/color/' mask_path = '../RHD_v1-1/RHD_published_v2/training/mask/' anno_path = '../RHD_v1-1/RHD_published_v2/training/anno_training.pickle' model_path = None # parameter print_freq = 100 batch_size = 1 num_workers = 0 epoch = 100 # data load transform = transforms.Compose([ transforms.RandomResizedCrop((256,256)), transforms.ColorJitter(hue=0.1), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) mask_transform = transforms.Compose([transforms.ToTensor()]) train_loader = get_loader(image_path, mask_path, anno_path, transform, mask_transform, batch_size=batch_size, num_workers=num_workers) # model load handseg = HandSegNet() handseg.to(device) optimizer = optim.Adam(handseg.parameters(), 0.00001) loss = nn.CrossEntropyLoss.to(device) for i, (image, mask) in enumerate(train_loader): image = image.to(device) mask = mask.to(device) # hand segment _, _, _, _, hand_seg = handseg(image) #hand_seg output total_loss = loss(hand_seg, mask) optimizer.step() optimizer.zero_grad() if i % print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss:.4f} \t'.format(epoch, i, len(train_loader),loss=total_loss.item()))
def main(args): if not os.path.exists(args.model_path): os.makedirs(args.model_path) # data load transform = transforms.Compose([ transforms.RandomResizedCrop((args.crop_size, args.crop_size)), transforms.ColorJitter(hue=0.1), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) mask_transform = transforms.Compose([transforms.ToTensor()]) train_loader = get_loader(args.image_path, args.mask_path, args.anno_path, transform, mask_transform, batch_size=args.batch_size, num_workers=args.num_works, shuffle=True) # model load handseg = HandSegNet() posenet = PoseNet() hand3d = HandPose() handseg.to(device) posenet.to(device) hand3d.to(device) if args.pretrained: print("====HandsegNet, PoseNet model load====") handseg.load_state_dict( torch.load(os.path.join(args.model_path, 'HandSegnet.pth.tar'))) posenet.load_state_dict( torch.load(os.path.join(args.model_path, 'PoseNet.pth.tar'))) if args.resume: print("====3D Hand Pose model load====") hand3d.load_state_dict( torch.load(os.path.join(model_path, '3DhandposeNet.pth.tar'))) optimizer = optim.Adam(hand3d.parameters(), 0.0001) loss = nn.MSELoss().to(device) for epoch in range(args.epochs): for i, (image, hand_sides, keypoint_gt, rot_mat_gt) in enumerate(train_loader): image = image.to(device) hand_sides = hand_sides.to(device) keypoint_gt = keypoint_gt.to(device) rot_mat_gt = rot_mat_gt.to(device) # hand segment image_crop, scale_crop, center, hand_mask, hand_seg = handseg( image) #hand_seg output # detect keypoints in 2D keypoint_scoremap = posenet(image_crop) # estimate 3d pose keypoint_coord3d, rot_matrix, _ = hand3d(keypoint_scoremap, hand_sides) # (b, 21, 3) total_loss = loss(keypoint_coord3d, keypoint_gt) + loss( rot_matrix, rot_mat_gt) optimizer.step() optimizer.zero_grad() if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Loss {loss:.4f} \t'.format(epoch, i, len(train_loader), loss=total_loss.item()))
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--model', type=str, default='deeplab-largefov') parser.add_argument( '--model_file', type=str, default= '/home/ecust/lx/Semantic-Segmentation-PyTorch/logs/deeplab-largefov_20190417_230357/model_best.pth.tar', help='Model path') parser.add_argument('--dataset_type', type=str, default='voc', help='type of dataset') parser.add_argument( '--dataset', type=str, default='/home/ecust/Datasets/PASCAL VOC/VOCdevkit/VOC2012', help='path to dataset') parser.add_argument('--img_size', type=tuple, default=None, help='resize images using bilinear interpolation') parser.add_argument('--crop_size', type=tuple, default=None, help='crop images') parser.add_argument('--n_classes', type=int, default=21, help='number of classes') parser.add_argument('--pretrained', type=bool, default=True, help='should be set the same as train.py') args = parser.parse_args() model_file = args.model_file root = args.dataset n_classes = args.n_classes crop = None # crop = Compose([RandomCrop(args.crop_size)]) loader = get_loader(args.dataset_type) val_loader = DataLoader(loader(root, n_classes=n_classes, split='val', img_size=args.img_size, augmentations=crop, pretrained=args.pretrained), batch_size=1, shuffle=False, num_workers=4) model, _, _ = Models.model_loader(args.model, n_classes, resume=None) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) print('==> Loading {} model file: {}'.format(model.__class__.__name__, model_file)) model_data = torch.load(model_file) try: model.load_state_dict(model_data) except Exception: model.load_state_dict(model_data['model_state_dict']) model.eval() print('==> Evaluating with {} dataset'.format(args.dataset_type)) visualizations = [] metrics = runningScore(n_classes) for data, target in tqdm.tqdm(val_loader, total=len(val_loader), ncols=80, leave=False): data, target = data.to(device), target.to(device) score = model(data) imgs = data.data.cpu() lbl_pred = score.data.max(1)[1].cpu().numpy() lbl_true = target.data.cpu() for img, lt, lp in zip(imgs, lbl_true, lbl_pred): img, lt = val_loader.dataset.untransform(img, lt) metrics.update(lt, lp) if len(visualizations) < 9: viz = visualize_segmentation(lbl_pred=lp, lbl_true=lt, img=img, n_classes=n_classes, dataloader=val_loader) visualizations.append(viz) acc, acc_cls, mean_iu, fwavacc, cls_iu = metrics.get_scores() print(''' Accuracy: {0:.2f} Accuracy Class: {1:.2f} Mean IoU: {2:.2f} FWAV Accuracy: {3:.2f}'''.format(acc * 100, acc_cls * 100, mean_iu * 100, fwavacc * 100) + '\n') class_name = val_loader.dataset.class_names if class_name is not None: for index, value in enumerate(cls_iu.values()): offset = 20 - len(class_name[index]) print(class_name[index] + ' ' * offset + f'{value * 100:>.2f}') else: print("\nyou don't specify class_names, use number instead") for key, value in cls_iu.items(): print(key, f'{value * 100:>.2f}') viz = get_tile_image(visualizations) # img = Image.fromarray(viz) # img.save('viz_evaluate.png') scipy.misc.imsave('viz_evaluate.png', viz)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--model', type=str, default='fcn8s') parser.add_argument( '--model_file', type=str, default= '/home/ecust/lx/Multimodal/logs/fcn8s_VS_B_0.001/model_best.pth.tar', help='Model path') parser.add_argument('--dataset_type', type=str, default='b', help='type of dataset') parser.add_argument('--dataset', type=str, default='/home/ecust/Datasets/数据库B(541)', help='path to dataset') parser.add_argument('--img_size', type=tuple, default=(320, 416), help='resize images using bilinear interpolation') parser.add_argument('--crop_size', type=tuple, default=None, help='crop images') parser.add_argument('--n_classes', type=int, default=13, help='number of classes') parser.add_argument('--pretrained', type=bool, default=True, help='should be set the same as train.py') args = parser.parse_args() model_file = args.model_file root = args.dataset n_classes = args.n_classes writer = SummaryWriter() crop = None # crop = Compose([RandomCrop(args.crop_size)]) loader = get_loader(args.dataset_type) val_loader = DataLoader(loader(root, n_classes=n_classes, split='val', img_size=args.img_size, augmentations=crop, pretrained=args.pretrained), batch_size=1, shuffle=False, num_workers=4) model, _, _ = Models.model_loader(args.model, n_classes, resume=None) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) print('==> Loading {} model file: {}'.format(model.__class__.__name__, model_file)) model_data = torch.load(model_file) try: model.load_state_dict(model_data) except Exception: model.load_state_dict(model_data['model_state_dict']) model.eval() print('==> Evaluating with {} dataset'.format(args.dataset_type)) for rgb, ir, target in tqdm.tqdm(val_loader, total=len(val_loader), ncols=80, leave=False): rgb, ir, target = rgb.to(device), ir.to(device), target.to(device) x = rgb grid = torchvision.utils.make_grid(x, normalize=True) writer.add_image('images', grid, 0) writer.add_graph(model, (ir)) # score = model(rgb, ir) # score = model(ir) for i, (name, param) in enumerate(model.named_parameters()): writer.add_histogram(name, param, 0) for name, layer in model._modules.items(): # if 'ir' in name and 'feature' in name: if 'feature' in name or 'fc' in name or 'score_fr' in name: x = layer(x) x1 = x.transpose(0, 1) img_grid = torchvision.utils.make_grid( x1, normalize=True, scale_each=True) # normalize进行归一化处理 writer.add_image(f'{name}_feature_maps', img_grid, global_step=0) break
def main(): # parser = argparse.ArgumentParser( # formatter_class=argparse.ArgumentDefaultsHelpFormatter # ) # parser.add_argument('--model', type=str, default='multi-gnn1') # parser.add_argument('--model_file', type=str, default='/home/ecust/lx/Multimodal/logs/multi-gnn1_FS/model_best.pth.tar',help='Model path') # parser.add_argument('--dataset_type', type=str, default='b',help='type of dataset') # parser.add_argument('--dataset', type=str, default='/home/ecust/Datasets/数据库B(541)',help='path to dataset') # parser.add_argument('--base_size', type=tuple, default=(300, 300), help='resize images using bilinear interpolation') # parser.add_argument('--crop_size', type=tuple, default=None, help='crop images') # parser.add_argument('--n_classes', type=int, default=13, help='number of classes') # parser.add_argument('--pretrained', type=bool, default=True, help='should be set the same as train.py') # args = parser.parse_args() args = argparser() model_file = '/home/ecust/lx/Multimodal/logs/resnet_20190916_093026/model_best.pth.tar' root = args.dataset_root crop=None # crop = Compose([RandomCrop(args.crop_size)]) loader = get_loader(args.dataset) val_loader = DataLoader( loader(root, split='val', base_size=args.base_size, augmentations=crop), batch_size=1, shuffle=False, num_workers=4) args.n_classes = loader.NUM_CLASS model = Models.model_loader(args.model, args.n_classes, backbone=args.backbone, norm_layer=nn.BatchNorm2d, multi_grid=args.multi_grid, multi_dilation=args.multi_dilation) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) print('==> Loading {} model file: {}'.format(model.__class__.__name__, model_file)) model_data = torch.load(model_file) try: model.load_state_dict(model_data) except Exception: model.load_state_dict(model_data['model_state_dict']) model.eval() print('==> Evaluating with {} dataset'.format(args.dataset)) visualizations = [] metrics = runningScore(args.n_classes) i = 0 for rgb, ir, target in tqdm.tqdm(val_loader, total=len(val_loader), ncols=80, leave=False): rgb, ir, target = rgb.to(device), ir.to(device), target.to(device) score = model(rgb, ir) # score = model(ir) rgbs = rgb.data.cpu() irs = ir.data.cpu() lbl_pred = score[0].data.max(1)[1].cpu().numpy() lbl_true = target.data.cpu() for rgb, ir, lt, lp in zip(rgbs, irs, lbl_true, lbl_pred): rgb, ir, lt = val_loader.dataset.untransform(rgb, ir, lt) metrics.update(lt, lp) i += 1 if i % 5 == 0: if len(visualizations) < 9: viz = visualize_segmentation( lbl_pred=lp, lbl_true=lt, img=rgb, ir=ir, n_classes=args.n_classes, dataloader=val_loader) visualizations.append(viz) acc, acc_cls, mean_iu, fwavacc, cls_iu = metrics.get_scores() print(''' Accuracy: {0:.2f} Accuracy Class: {1:.2f} Mean IoU: {2:.2f} FWAV Accuracy: {3:.2f}'''.format(acc * 100, acc_cls * 100, mean_iu * 100, fwavacc * 100) + '\n') class_name = val_loader.dataset.class_names if class_name is not None: for index, value in enumerate(cls_iu.values()): offset = 20 - len(class_name[index]) print(class_name[index] + ' ' * offset + f'{value * 100:>.2f}') else: print("\nyou don't specify class_names, use number instead") for key, value in cls_iu.items(): print(key, f'{value * 100:>.2f}') viz = get_tile_image(visualizations) # img = Image.fromarray(viz) # img.save('viz_evaluate.png') scipy.misc.imsave('viz_evaluate.png', viz)
def main(): random.seed(1) torch.manual_seed(1) torch.cuda.manual_seed(1) np.random.seed(1) torch.backends.cudnn.deterministic = True # torch.backends.cudnn.benchmark = False torch.cuda.empty_cache() args = argparser() now = datetime.datetime.now() args.out = osp.join(here, 'logs', args.model + '_' + now.strftime('%Y%m%d_%H%M%S')) if not osp.exists(args.out): os.makedirs(args.out) with open(osp.join(args.out, 'config.yaml'), 'w') as f: yaml.safe_dump(args.__dict__, f, default_flow_style=False) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f'Start training {args.model} using {device.type}\n') # 1. dataset root = args.dataset_root loader = get_loader(args.dataset) augmentations = get_augmentations(args) train_loader = DataLoader( loader(root, split='train', base_size=args.base_size, augmentations=augmentations), batch_size=args.batch_size, shuffle=True, num_workers=args.workers) val_loader = DataLoader( loader(root, split='val', base_size=args.base_size), batch_size=1, shuffle=False, num_workers=args.workers) args.n_classes = loader.NUM_CLASS # 2. model model = model_loader(args.model, args.n_classes, backbone=args.backbone, norm_layer=nn.BatchNorm2d, multi_grid=args.multi_grid, multi_dilation=args.multi_dilation) model = model.to(device) print(model) start_epoch = 1 if args.resume: checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['model_state_dict']) start_epoch = checkpoint['epoch'] else: checkpoint = None # 3. optimizer optim = torch.optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay ) # optim = torch.optim.SGD( # [{'params': model.get_parameters(key='1x'), 'lr': args.lr}, # {'params': model.get_parameters(key='10x'), 'lr': args.lr * 10}], # momentum=args.momentum, # weight_decay=args.weight_decay # ) if args.resume: optim.load_state_dict(checkpoint['optim_state_dict']) scheduler = get_scheduler(optim, args) # 4. train trainer = Trainer( device=device, model=model, optimizer=optim, scheduler=scheduler, train_loader=train_loader, val_loader=val_loader, out=args.out, epochs=args.epochs, n_classes=args.n_classes, val_epoch=args.val_epoch, ) trainer.epoch = start_epoch trainer.train()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument('--model', type=str, default='deeplab-largefov', help='model to train for') parser.add_argument('--epochs', type=int, default=50, help='total epochs') parser.add_argument('--val_epoch', type=int, default=10, help='validation interval') parser.add_argument('--batch_size', type=int, default=16, help='number of batch size') parser.add_argument('--img_size', type=tuple, default=None, help='resize images to proper size') parser.add_argument('--dataset_type', type=str, default='voc', help='choose which dataset to use') parser.add_argument('--dataset_root', type=str, default='/home/ecust/Datasets/PASCAL VOC/VOC_Aug', help='path to dataset') parser.add_argument('--n_classes', type=int, default=21, help='number of classes') parser.add_argument('--resume', default=None, help='path to checkpoint') parser.add_argument('--optim', type=str, default='sgd', help='optimizer') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--lr_policy', type=str, default='poly', help='learning rate policy') parser.add_argument('--weight-decay', type=float, default=0.0005, help='weight decay') parser.add_argument('--beta1', type=float, default=0.9, help='momentum for sgd, beta1 for adam') parser.add_argument('--lr_decay_step', type=float, default=10, help='step size for step learning policy') parser.add_argument('--lr_power', type=int, default=0.9, help='power parameter for poly learning policy') parser.add_argument('--pretrained', type=bool, default=True, help='whether to use pretrained models') parser.add_argument('--iter_size', type=int, default=10, help='iters to accumulate gradients') parser.add_argument('--crop_size', type=tuple, default=(321, 321), help='crop sizes of images') parser.add_argument('--flip', type=bool, default=True, help='whether to use horizontal flip') args = parser.parse_args() now = datetime.datetime.now() args.out = osp.join(here, 'logs', args.model + '_' + now.strftime('%Y%m%d_%H%M%S')) if not osp.exists(args.out): os.makedirs(args.out) with open(osp.join(args.out, 'config.yaml'), 'w') as f: yaml.safe_dump(args.__dict__, f, default_flow_style=False) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f'Start training {args.model} using {device.type}\n') random.seed(1337) torch.manual_seed(1337) torch.cuda.manual_seed(1337) # 1. dataset root = args.dataset_root loader = get_loader(args.dataset_type) augmentations = get_augmentations(args) train_loader = DataLoader(loader(root, n_classes=args.n_classes, split='train_aug', img_size=args.img_size, augmentations=augmentations, pretrained=args.pretrained), batch_size=args.batch_size, shuffle=True, num_workers=4) val_loader = DataLoader(loader(root, n_classes=args.n_classes, split='val_id', img_size=args.img_size, pretrained=args.pretrained), batch_size=1, shuffle=False, num_workers=4) # 2. model model, start_epoch, ckpt = model_loader(args.model, args.n_classes, args.resume) model = model.to(device) # 3. optimizer optim = get_optimizer(args, model) if args.resume: optim.load_state_dict(ckpt['optim_state_dict']) scheduler = get_scheduler(optim, args) # 4. train trainer = Trainer(device=device, model=model, optimizer=optim, scheduler=scheduler, train_loader=train_loader, val_loader=val_loader, out=args.out, epochs=args.epochs, n_classes=args.n_classes, val_epoch=args.val_epoch, iter_size=args.iter_size) trainer.epoch = start_epoch trainer.train()