Exemple #1
0
def train(data_root):

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    # background and person
    num_classes = 2
    dataset = PennFudanDataset(data_root,get_transform(train=True))
    dataset_test = PennFudanDataset(data_root,get_transform(train=False))

    # split the dataset
    indices = torch.randperm(len(dataset)).tolist()
    dataset = Subset(dataset,indices[:-50])
    dataset_test = Subset(dataset_test,indices[-50:])

    # define data loaders
    data_loader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=4,
                             collate_fn=tools.collate_fn)
    data_loader_test = DataLoader(dataset_test, batch_size=1, shuffle=False, num_workers=4,
                                  collate_fn=tools.collate_fn)

    # get model
    model = get_model_instance_segmentation(num_classes)
    model.to(device)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params,lr=0.005,momentum=0.9,weight_decay=0.0005)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=3)
    
    num_epochs =10
    for epoch in range(num_epochs):
        train_one_epoch(model,optimizer,data_loader,device,epoch,print_freq=10)
        lr_scheduler.step()
        # evaluate(model,data_loader_test,device=device)
    torch.save(model.state_dict(),"masknet.pth")
    print("OK!")
def compute_unary_term(heatmap, grid, bbox2D, cam, imgSize):
    """
    Args:
        heatmap: array of size (n * k * h * w)
                -n: views,      -k: joints
                -h: height,     -w: width

        grid: k lists of ndarrays of size (nbins * 3)
                -k: joints; 1 when the grid is shared in PSM
                -nbins: bins in the grid

        bbox2D: bounding box on which heatmap is computed

    Returns:
        unary_of_all_joints: a list of ndarray of size nbins
    """
    device = heatmap.device
    share_grid = len(grid) == 1

    n, k = heatmap.shape[0], heatmap.shape[1]
    h, w = heatmap.shape[2], heatmap.shape[3]

    all_unary = {}
    for v in range(n):
        center = bbox2D[v]['center']
        scale = bbox2D[v]['scale']
        trans = torch.as_tensor(get_transform(center, scale, 0, imgSize),
                                dtype=torch.float,
                                device=device)

        for j in range(k):
            grid_id = 0 if len(grid) == 1 else j
            nbins = grid[grid_id].shape[0]

            if (share_grid and j == 0) or not share_grid:
                xy = cameras.project_pose(grid[grid_id], cam[v])
                xy = do_transform(xy, trans) * torch.tensor(
                    [w, h], dtype=torch.float, device=device) / torch.tensor(
                        imgSize, dtype=torch.float, device=device)

                sample_grid = xy / torch.tensor([h - 1, w - 1],
                                                dtype=torch.float,
                                                device=device) * 2.0 - 1.0
                sample_grid = sample_grid.view(1, 1, nbins, 2)

            unary_per_view_joint = F.grid_sample(
                heatmap[v:v + 1, j:j + 1, :, :], sample_grid)

            if j in all_unary:
                all_unary[j] += unary_per_view_joint
            else:
                all_unary[j] = unary_per_view_joint

    all_unary_list = []
    for j in range(k):
        all_unary_list.append(all_unary[j].view(1, -1))
    return all_unary_list
Exemple #3
0
    save_dir='./car_SE_trainset_prediction/',
    dataset= {
        'name': 'mots_cars_val',
        'kwargs': {
            'root_dir': kittiRoot,
            'mode': 'train',
            #'mode': 'val',
            # 'size': 1000,
            'transform': my_transforms.get_transform([
                {
                    'name': 'LU_Pad',
                    'opts': {
                        'keys': ('mot_image', 'mot_instance','mot_label'),
                        'size': (384, 1248),
                    }
                },
                {
                    'name': 'ToTensor',
                    'opts': {
                        'keys': ('mot_image', 'mot_instance','mot_label'),
                        'type': (torch.FloatTensor, torch.LongTensor, torch.ByteTensor),
                    }
                },
            ]),
        },
        'batch_size': 1,
        'workers': 32
    },

    max_disparity=192.0,
    with_uv=True
)
Exemple #4
0
         'root_dir':
         CITYSCAPES_DIR,
         'type':
         'crops',
         'size':
         3000,
         'transform':
         my_transforms.get_transform([
             {
                 'name': 'RandomCrop',
                 'opts': {
                     'keys': ('image', 'instance', 'label'),
                     'size': (512, 512),
                 }
             },
             {
                 'name': 'ToTensor',
                 'opts': {
                     'keys': ('image', 'instance', 'label'),
                     'type': (torch.FloatTensor, torch.ByteTensor,
                              torch.ByteTensor),
                 }
             },
         ]),
     },
     'batch_size': 8,
     'workers': 8
 },
 val_dataset={
     'name': 'cityscapes',
     'kwargs': {
Exemple #5
0
 train_dataset = {
     'name': 'mots_cars',
     'kwargs': {
         'root_dir': kittiRoot,
         'type': 'crop',
         'size': 7000,
         'transform': my_transforms.get_transform([
             {
                 'name': 'AdjustBrightness',
                 'opts': {}
             },
             {
                 'name': 'ToTensor',
                 'opts': {
                     'keys': ('image', 'instance','label'),
                     'type': (torch.FloatTensor, torch.LongTensor, torch.ByteTensor),
                 }
             },
             {
                 'name': 'Flip',
                 'opts': {
                     'keys': ('image', 'instance','label'),
                 }
             },
         ]),
     },
     'batch_size': 4,
     'workers': 1,
     # 'batch_size': 64,
     # 'workers': 32
 },
Exemple #6
0
    def get_voxel(self, heatmaps, meta, grid_size, grid_center, cube_size):
        device = heatmaps[0].device
        batch_size = heatmaps[0].shape[0]
        num_joints = heatmaps[0].shape[1]
        nbins = cube_size[0] * cube_size[1] * cube_size[2]
        n = len(heatmaps)
        cubes = torch.zeros(batch_size, num_joints, 1, nbins, n, device=device)
        # h, w = heatmaps[0].shape[2], heatmaps[0].shape[3]
        w, h = self.heatmap_size
        grids = torch.zeros(batch_size, nbins, 3, device=device)
        bounding = torch.zeros(batch_size, 1, 1, nbins, n, device=device)
        for i in range(batch_size):
            if len(grid_center[0]) == 3 or grid_center[i][3] >= 0:
                # This part of the code can be optimized because the projection operation is time-consuming.
                # If the camera locations always keep the same, the grids and sample_grids are repeated across frames
                # and can be computed only one time.
                if len(grid_center) == 1:
                    grid = self.compute_grid(grid_size,
                                             grid_center[0],
                                             cube_size,
                                             device=device)
                else:
                    grid = self.compute_grid(grid_size,
                                             grid_center[i],
                                             cube_size,
                                             device=device)
                grids[i:i + 1] = grid
                for c in range(n):
                    center = meta[c]['center'][i]
                    scale = meta[c]['scale'][i]

                    width, height = center * 2
                    trans = torch.as_tensor(get_transform(
                        center, scale, 0, self.img_size),
                                            dtype=torch.float,
                                            device=device)
                    cam = {}
                    for k, v in meta[c]['camera'].items():
                        cam[k] = v[i]
                    xy = cameras.project_pose(grid, cam)

                    bounding[i, 0, 0, :,
                             c] = (xy[:, 0] >= 0) & (xy[:, 1] >= 0) & (
                                 xy[:, 0] < width) & (xy[:, 1] < height)
                    xy = torch.clamp(xy, -1.0, max(width, height))
                    xy = do_transform(xy, trans)
                    xy = xy * torch.tensor(
                        [w, h], dtype=torch.float,
                        device=device) / torch.tensor(
                            self.img_size, dtype=torch.float, device=device)
                    sample_grid = xy / torch.tensor([w - 1, h - 1],
                                                    dtype=torch.float,
                                                    device=device) * 2.0 - 1.0
                    sample_grid = torch.clamp(sample_grid.view(1, 1, nbins, 2),
                                              -1.1, 1.1)

                    # if pytorch version < 1.3.0, align_corners=True should be omitted.
                    cubes[i:i + 1, :, :, :,
                          c] += F.grid_sample(heatmaps[c][i:i + 1, :, :, :],
                                              sample_grid,
                                              align_corners=True)

        # cubes = cubes.mean(dim=-1)
        cubes = torch.sum(torch.mul(cubes, bounding),
                          dim=-1) / (torch.sum(bounding, dim=-1) + 1e-6)
        cubes[cubes != cubes] = 0.0
        cubes = cubes.clamp(0.0, 1.0)

        cubes = cubes.view(batch_size, num_joints, cube_size[0], cube_size[1],
                           cube_size[2])  ##
        return cubes, grids
Exemple #7
0
                './pretrained_models/erfnet_encoder_pretrained.pth.tar',
            },
            dataset={
                'name': 'cityscapes',
                'kwargs': {
                    'root_dir':
                    CITYSCAPES_DIR,
                    'type':
                    'val',
                    'transform':
                    my_transforms.get_transform([
                        {
                            'name': 'ToTensor',
                            'opts': {
                                'keys': ('image', 'instance', 'label',
                                         'semantic_label'),
                                'type': (torch.FloatTensor, torch.ByteTensor,
                                         torch.ByteTensor, torch.FloatTensor),
                            }
                        },
                    ]),
                }
            },
            model={
                'name': 'branched_erfnet',
                'kwargs': {
                    'num_classes': [13, 3],
                }
            })