コード例 #1
0
ファイル: refnet.py プロジェクト: yuhaonankaka/adl
    def forward(self, data_dict, args):
        """ Forward pass of the network

        Args:
            data_dict: dict
                {
                    point_clouds, 
                    lang_feat
                }

                point_clouds: Variable(torch.cuda.FloatTensor)
                    (B, N, 3 + input_channels) tensor
                    Point cloud to run predicts on
                    Each point in the point-cloud MUST
                    be formated as (x, y, z, features...)
        Returns:
            end_points: dict
        """

        # =======================================
        # Get 3d <-> 2D Projection Mapping and 2D feature map
        # =======================================
        batch_size = len(data_dict['scan_name'])
        new_features = torch.zeros((batch_size, args.num_points, 32)).cuda()
        for idx, scene_id in enumerate(data_dict['scan_name']):
            intrinsics = get_intrinsics(scene_id, args)
            projection = ProjectionHelper(intrinsics, args.depth_min,
                                          args.depth_max, proj_image_dims)
            features_2d = scannet_projection(
                data_dict['point_clouds'][idx].cpu().numpy(), intrinsics,
                projection, scene_id, args, None, None, self.maskrcnn_model)
            new_features[idx, :] = features_2d[:]
        data_dict['new_features'] = new_features
        pcl_enriched = torch.cat(
            (data_dict['point_clouds'], data_dict['new_features']), dim=2)
        data_dict['point_clouds'] = pcl_enriched

        data_dict = self.backbone_net(data_dict)

        # --------- HOUGH VOTING ---------
        xyz = data_dict["fp2_xyz"]
        features = data_dict["fp2_features"]
        data_dict["seed_inds"] = data_dict["fp2_inds"]
        data_dict["seed_xyz"] = xyz
        data_dict["seed_features"] = features

        xyz, features = self.vgen(xyz, features)
        features_norm = torch.norm(features, p=2, dim=1)
        features = features.div(features_norm.unsqueeze(1))
        data_dict["vote_xyz"] = xyz
        data_dict["vote_features"] = features

        data_dict = self.rfnet(xyz, features, data_dict)

        return data_dict
コード例 #2
0
ファイル: train.py プロジェクト: sophiajw/adl4cv
num_classes = opt.num_classes
model2d_fixed, model2d_trainable, model2d_classifier = create_enet_for_3d(
    ENET_TYPES[opt.model2d_type], opt.model2d_path, num_classes)
model = BeachNet(num_classes,
                 num_images,
                 input_channels,
                 intrinsic,
                 proj_image_dims,
                 opt.depth_min,
                 opt.depth_max,
                 opt.accuracy,
                 fusion=True,
                 fuseAtPosition=2,
                 fuse_no_ft_pn=False,
                 pointnet_pointnet=False)
projection = ProjectionHelper(intrinsic, opt.depth_min, opt.depth_max,
                              proj_image_dims, opt.accuracy)

# create loss
criterion = util.WeightedCrossEntropyLoss()
criterion2d = torch.nn.CrossEntropyLoss().cuda()

# move to gpu
model2d_fixed = model2d_fixed.cuda()
model2d_trainable = model2d_trainable.cuda()
model2d_classifier = model2d_classifier.cuda()
model = model.cuda()
criterion = criterion.cuda()

# initialize optimizer
optimizer = torch.optim.Adam(model.parameters(),
                             lr=opt.lr_pointnet,
コード例 #3
0
def findCorrespondingImages(chunksPath, posesPath, outPath, numImgs=3):
    """
    for each scene chunk, finds the 3 (or more) images with the highest number of points in their frustum
    and stores the result in a .hdf5 file
    :param chunksPath: Path to where the precomputed scene chunks are stored
    :param posesPath: Path to where the precomputed poses are stored
    :param outPath: Path to where scene chunks with corresponding images should be stored
    :param numImgs: number of images to be used (default is 3)
    """
    print ("Finding image correspondences")

    # Initialize Projection
    projection = ProjectionHelper(intrinsic, opt.depth_min, opt.depth_max, proj_image_dims, opt.voxel_size)

    # Find all scene chunks that have been precomputed
    fileList = list()
    for file in os.listdir(chunksPath):
        if file.endswith(".npy"):
            scene = file[:-4]
            fileList.append(scene)
    count = 1

    # Iterate through all scene chunks and compute their corresponding images
    for scene in fileList:
        if(os.path.isfile(os.path.join(outPath, scene + ".hdf5"))):
            print(scene + " was already processed.")
            continue
        poseDict = {}
        count += 1
        if(count % 50 == 0):
            print(count, "/", len(fileList))

        # Find the name of the scene (we need this to find the corresponding camera poses)
        countLiterals = 0
        for i in range(len(scene)):
            if(scene[i]=="0"):
                countLiterals = i
                break
        scene_nr = int(scene[countLiterals:countLiterals+4])
        scene_version = int(scene[countLiterals+5:countLiterals+7])

        # Load data
        data = np.load(os.path.join(chunksPath,  scene + ".npy"))
        scene_points = data[:, :3]
        semantic_labels = data[:, 3]
        npoints = scene_points.shape[0]

        # Find full Scene Number (always starts with '0')
        findZero = 0
        for i in range(len(scene)):
            if(scene[i] == '0'):
                findZero = i
                break
        large_scene = scene[findZero:findZero+7] # name of whole scene, e.g. 0000_01
        posesPathScene = os.path.join(posesPath, "scene"+large_scene, "pose")

        # Check if there are image poses for this scene (a couple of scenes caused problems when extracting the poses from sensor data)
        if(not os.path.isdir(posesPathScene)):
            print("Did not find any according Image Poses")
            continue

        # Iterate through all poses
        # For each pose, compute the number of points that lie in the frustum that corresponds to the camera pose
        # Keep the 3 image IDs corresponding to the poses with the highest numbers of points in the frustum
        for poseFile in os.listdir(posesPathScene):
            pose = load_pose(os.path.join(posesPathScene,poseFile))
            corners = projection.compute_frustum_corners(pose)[:, :3, 0] # Corners of Frustum
            normals = projection.compute_frustum_normals(corners) # Normals of frustum
            num_valid_points = projection.points_in_frustum(corners.double().cuda(), normals.double().cuda(), torch.DoubleTensor(scene_points).cuda()) # Checks for each point if it lies on the correct side of the normals of the frustum
            poseDict[poseFile[:-4]] = num_valid_points
        if(len(poseDict) == 0): # If there was something wrong, skip
            continue
        poseList = list()
        poseList.append(scene_nr)
        poseList.append(scene_version)
        for i in range(numImgs): # find maxima
            maximum = max(poseDict, key=poseDict.get)
            poseList.append(int(maximum))
            del poseDict[maximum]

        # Write to file
        h5file = h5py.File(os.path.join(outPath, scene + ".hdf5"), "w")
        dset = h5file.create_dataset("points", (npoints, 3), data=scene_points)
        dset = h5file.create_dataset("labels", (npoints,), data=semantic_labels)
        dset = h5file.create_dataset("corresponding_images", (numImgs+2,), data=poseList)
        h5file.close()
コード例 #4
0
ファイル: test_my_model.py プロジェクト: sophiajw/adl4cv
import argparse
from scipy import misc

from utils import util
from data.data_util import resize_crop_image
from model import BeachNet
from utils.projection import ProjectionHelper

# initialize model and Projection Helper
proj_image_dims = [41, 32]
intrinsic = util.make_intrinsic(
    577.870605, 577.870605, 319.5,
    239.5)  # affine transformation from image plane to pixel coords
intrinsic = util.adjust_intrinsic(intrinsic, [640, 480], proj_image_dims)

projection = ProjectionHelper(intrinsic, 0.4, 4.0, proj_image_dims, 0.05)
model = BeachNet(42, 3, 128, intrinsic, proj_image_dims, 0.4, 4.0, 0.05)
model = model.cuda()

# get point cloud
input = torch.Tensor(
    np.load(
        '/media/lorenzlamm/My Book/pointnet2/scannet/preprocessing/scannet_scenes/scene0000_00.npy'
    )).cuda()
point_cloud = input[:, :3]
num_points = point_cloud.shape[0]

batch_size = 2
num_images = 3
num_points_sample = 8192
point_batch = point_cloud.new(batch_size * num_images, num_points_sample,
def main(args):
    os.environ[
        "CUDA_VISIBLE_DEVICES"] = args.gpu if args.multi_gpu is None else '0,1,2,3'
    '''CREATE DIR'''
    experiment_dir = Path('./experiment/')
    experiment_dir.mkdir(exist_ok=True)
    file_dir = Path(
        str(experiment_dir) + '/%sScanNetSemSeg-' % args.model_name +
        str(datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')))
    file_dir.mkdir(exist_ok=True)
    checkpoints_dir = file_dir.joinpath('checkpoints/')
    checkpoints_dir.mkdir(exist_ok=True)
    log_dir = file_dir.joinpath('logs/')
    log_dir.mkdir(exist_ok=True)
    '''LOG'''
    args = parse_args()
    logger = logging.getLogger(args.model_name)
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    file_handler = logging.FileHandler(
        str(log_dir) + '/train_%s_semseg.txt' % args.model_name)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)
    logger.info(
        '---------------------------------------------------TRANING---------------------------------------------------'
    )
    logger.info('PARAMETER ...')
    logger.info(args)
    print('Load data...')

    dataset = ScannetDatasetRGBImg(root='./data',
                                   split='train',
                                   npoints=8192,
                                   num_images=3)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=args.batchsize,
                                             collate_fn=dataset.collate_fn,
                                             shuffle=True,
                                             num_workers=int(args.workers))
    test_dataset = ScannetDatasetRGBImg(root='./data',
                                        split='test',
                                        npoints=8192,
                                        num_images=3)
    testdataloader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=args.batchsize,
        collate_fn=test_dataset.collate_fn,
        shuffle=True,
        num_workers=int(args.workers))

    num_classes = 21
    model = PointNet2Multiview2(num_classes)
    loss_function = torch.nn.CrossEntropyLoss(ignore_index=0, reduction='none')
    #loss_function = torch.nn.CrossEntropyLoss(reduction='none')

    if args.pretrain is not None:
        model.load_state_dict(torch.load(args.pretrain))
        print('load model %s' % args.pretrain)
        logger.info('load model %s' % args.pretrain)
    else:
        print('Training from scratch')
        logger.info('Training from scratch')
    pretrain = args.pretrain
    init_epoch = int(pretrain[-14:-11]) if args.pretrain is not None else 0

    if args.optimizer == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    elif args.optimizer == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.learning_rate,
                                     betas=(0.9, 0.999),
                                     eps=1e-08,
                                     weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=50,
                                                gamma=0.5)
    LEARNING_RATE_CLIP = 1e-5
    '''GPU selection and multi-GPU'''
    if args.multi_gpu is not None:
        device_ids = [int(x) for x in args.multi_gpu.split(',')]
        torch.backends.cudnn.benchmark = True
        model.cuda(device_ids[0])
        model = torch.nn.DataParallel(model, device_ids=device_ids)
    else:
        model.cuda()

    intrinsic = [[37.01983, 0, 20, 0], [0, 38.52470, 15.5, 0], [0, 0, 1, 0],
                 [0, 0, 0, 1]]
    projection = ProjectionHelper(intrinsic, 0.1, 4.0, [41, 32], 0.05)

    history = defaultdict(lambda: list())
    best_acc = 0
    best_acc_epoch = 0
    best_mIoU = 0
    best_mIoU_epoch = 0

    for epoch in range(init_epoch, args.epoch):
        scheduler.step()
        lr = max(optimizer.param_groups[0]['lr'], LEARNING_RATE_CLIP)
        print('Learning rate:%f' % lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        train_loss_sum = 0.0
        train_acc_sum = 0.0
        invalid_count = 0
        for i, data in enumerate(dataloader):
            points, target, sample_weights, image, depth, pose = data
            batch_size = points.shape[0]
            num_points = points.shape[1]
            num_images = image[0].shape[0]
            points, target = points.float(), target.long()
            points = points.transpose(2, 1)
            points, target, sample_weights = points.cuda(), target.cuda(
            ), sample_weights.cuda()
            depth = [d.cuda() for d in depth]
            pose = [p.cuda() for p in pose]
            # Compute projection mapping
            points_projection = torch.repeat_interleave(
                points.transpose(2, 1)[:, :, 0:3], num_images, dim=0
            )  # For each scene chunk, we have num_images images. We repeat each point cloud num_images times to compute the projection
            proj_mapping = [[
                projection.compute_projection(p, d, c, num_points)
                for p, d, c in zip(
                    points_projection[k * num_images:(k + 1) *
                                      num_images], depth[k], pose[k])
            ] for k in range(batch_size)]
            jump_flag = False
            for k in range(batch_size):
                if None in proj_mapping[k]:  #invalid sample
                    print('invalid sample')
                    invalid_count = invalid_count + 1
                    jump_flag = True
                    break
            if jump_flag:
                continue
            proj_ind_3d = []
            proj_ind_2d = []
            for k in range(batch_size):
                proj_mapping0, proj_mapping1 = zip(*proj_mapping[k])
                proj_ind_3d.append(torch.stack(proj_mapping0))
                proj_ind_2d.append(torch.stack(proj_mapping1))

            optimizer.zero_grad()
            model = model.train()
            model.enet_fixed = model.enet_fixed.eval()
            model.enet_trainable = model.enet_trainable.eval()
            for param in model.enet_trainable.parameters():
                param.requires_grad = False

            pred = model(points[:, :3, :], image, proj_ind_3d, proj_ind_2d)
            #pred = model(points[:,:3,:], points[:,3:6,:], image, proj_ind_3d, proj_ind_2d)

            pred = pred.contiguous().view(-1, num_classes)
            target = target.view(pred.size(0))
            weights = sample_weights.view(pred.size(0))
            loss = loss_function(pred, target)
            loss = loss * weights
            loss = torch.mean(loss)
            history['loss'].append(loss.item())
            train_loss_sum += loss.item()
            loss.backward()
            optimizer.step()
            # Train acc
            pred_val = torch.argmax(pred, 1)
            correct = torch.sum(
                ((pred_val == target) & (target > 0) & (weights > 0)).float())
            seen = torch.sum(((target > 0) & (weights > 0)).float()) + 1e-08
            train_acc = correct / seen if seen != 0 else correct
            train_acc_sum += train_acc.item()
            if (i + 1) % 5 == 0:
                print(
                    '[Epoch %d/%d] [Iteration %d/%d] TRAIN acc/loss: %f/%f ' %
                    (epoch + 1, args.epoch, i + 1, len(dataloader),
                     train_acc.item(), loss.item()))
                logger.info(
                    '[Epoch %d/%d] [Iteration %d/%d] TRAIN acc/loss: %f/%f ' %
                    (epoch + 1, args.epoch, i + 1, len(dataloader),
                     train_acc.item(), loss.item()))
        train_loss_avg = train_loss_sum / (len(dataloader) - invalid_count)
        train_acc_avg = train_acc_sum / (len(dataloader) - invalid_count)
        history['train_acc'].append(train_acc_avg)
        print('[Epoch %d/%d] TRAIN acc/loss: %f/%f ' %
              (epoch + 1, args.epoch, train_acc_avg, train_loss_avg))
        logger.info('[Epoch %d/%d] TRAIN acc/loss: %f/%f ' %
                    (epoch + 1, args.epoch, train_acc_avg, train_loss_avg))

        #Test acc
        test_losses = []
        total_correct = 0
        total_seen = 0
        total_correct_class = [0 for _ in range(num_classes)]
        total_seen_class = [0 for _ in range(num_classes)]
        total_intersection_class = [0 for _ in range(num_classes)]
        total_union_class = [0 for _ in range(num_classes)]

        total_correct_vox = 0
        total_seen_vox = 0
        total_seen_class_vox = [0 for _ in range(num_classes)]
        total_correct_class_vox = [0 for _ in range(num_classes)]
        total_intersection_class_vox = [0 for _ in range(num_classes)]
        total_union_class_vox = [0 for _ in range(num_classes)]

        labelweights = np.zeros(num_classes)
        labelweights_vox = np.zeros(num_classes)

        for j, data in enumerate(testdataloader):
            with torch.no_grad():
                points, target, sample_weights, image, depth, pose = data
                batch_size = points.shape[0]
                num_points = points.shape[1]
                num_images = image[0].shape[0]
                points, target, sample_weights = points.float(), target.long(
                ), sample_weights.float()
                points = points.transpose(2, 1)
                points, target, sample_weights = points.cuda(), target.cuda(
                ), sample_weights.cuda()
                depth = [d.cuda() for d in depth]
                pose = [p.cuda() for p in pose]
                # Compute projection mapping
                points_projection = torch.repeat_interleave(
                    points.transpose(2, 1)[:, :, 0:3], num_images, dim=0
                )  # For each scene chunk, we have num_images images. We repeat each point cloud num_images times to compute the projection
                proj_mapping = [[
                    projection.compute_projection(p, d, c, num_points)
                    for p, d, c in zip(
                        points_projection[k * num_images:(k + 1) *
                                          num_images], depth[k], pose[k])
                ] for k in range(batch_size)]
                jump_flag = False
                for k in range(batch_size):
                    if None in proj_mapping[k]:  #invalid sample
                        print('invalid sample')
                        jump_flag = True
                        break
                if jump_flag:
                    continue
                proj_ind_3d = []
                proj_ind_2d = []
                for k in range(batch_size):
                    proj_mapping0, proj_mapping1 = zip(*proj_mapping[k])
                    proj_ind_3d.append(torch.stack(proj_mapping0))
                    proj_ind_2d.append(torch.stack(proj_mapping1))
                model = model.eval()
                pred = model(points[:, :3, :], image, proj_ind_3d, proj_ind_2d)
                #pred = model(points[:,:3,:], points[:,3:6,:], image, proj_ind_3d, proj_ind_2d)
                pred_2d = pred.contiguous().view(-1, num_classes)
                target_1d = target.view(pred_2d.size(0))
                weights_1d = sample_weights.view(pred_2d.size(0))
                loss = loss_function(pred_2d, target_1d)
                loss = loss * weights_1d
                loss = torch.mean(loss)
                test_losses.append(loss.item())
            #first convert torch tensor to numpy array
            pred_np = pred.cpu().numpy()  #[B,N,C]
            target_np = target.cpu().numpy()  #[B,N]
            weights_np = sample_weights.cpu().numpy()  #[B,N]
            points_np = points.transpose(2, 1).cpu().numpy()  #[B,N,3]
            # point wise acc
            pred_val = np.argmax(pred_np, 2)  #[B,N]
            correct = np.sum((pred_val == target_np) & (target_np > 0)
                             & (weights_np > 0))
            total_correct += correct
            total_seen += np.sum((target_np > 0) & (weights_np > 0))

            tmp, _ = np.histogram(target_np, range(num_classes + 1))
            labelweights += tmp

            # point wise acc and IoU per class
            for l in range(num_classes):
                total_seen_class[l] += np.sum((target_np == l)
                                              & (weights_np > 0))
                total_correct_class[l] += np.sum((pred_val == l)
                                                 & (target_np == l)
                                                 & (weights_np > 0))
                total_intersection_class[l] += np.sum((pred_val == l)
                                                      & (target_np == l)
                                                      & (weights_np > 0))
                total_union_class[l] += np.sum((
                    (pred_val == l) | (target_np == l)) & (weights_np > 0))

            # voxel wise acc
            for b in range(target_np.shape[0]):
                _, uvlabel, _ = point_cloud_label_to_surface_voxel_label_fast(
                    points_np[b, weights_np[b, :] > 0, :],
                    np.concatenate(
                        (np.expand_dims(target_np[b, weights_np[b, :] > 0], 1),
                         np.expand_dims(pred_val[b, weights_np[b, :] > 0], 1)),
                        axis=1),
                    res=0.02)
                total_correct_vox += np.sum((uvlabel[:, 0] == uvlabel[:, 1])
                                            & (uvlabel[:, 0] > 0))
                total_seen_vox += np.sum(uvlabel[:, 0] > 0)
                tmp, _ = np.histogram(uvlabel[:, 0], range(num_classes + 1))
                labelweights_vox += tmp
                # voxel wise acc and IoU per class
                for l in range(num_classes):
                    total_seen_class_vox[l] += np.sum(uvlabel[:, 0] == l)
                    total_correct_class_vox[l] += np.sum((uvlabel[:, 0] == l)
                                                         & (uvlabel[:,
                                                                    1] == l))
                    total_intersection_class_vox[l] += np.sum(
                        (uvlabel[:, 0] == l) & (uvlabel[:, 1] == l))
                    total_union_class_vox[l] += np.sum((uvlabel[:, 0] == l)
                                                       | (uvlabel[:, 1] == l))

        test_loss = np.mean(test_losses)
        test_point_acc = total_correct / float(total_seen)
        history['test_point_acc'].append(test_point_acc)
        test_voxel_acc = total_correct_vox / float(total_seen_vox)
        history['test_voxel_acc'].append(test_voxel_acc)
        test_avg_class_point_acc = np.mean(
            np.array(total_correct_class[1:]) /
            (np.array(total_seen_class[1:], dtype=np.float) + 1e-6))
        history['test_avg_class_point_acc'].append(test_avg_class_point_acc)
        test_avg_class_voxel_acc = np.mean(
            np.array(total_correct_class_vox[1:]) /
            (np.array(total_seen_class_vox[1:], dtype=np.float) + 1e-6))
        history['test_avg_class_voxel_acc'].append(test_avg_class_voxel_acc)
        test_avg_class_point_IoU = np.mean(
            np.array(total_intersection_class[1:]) /
            (np.array(total_union_class[1:], dtype=np.float) + 1e-6))
        history['test_avg_class_point_IoU'].append(test_avg_class_point_IoU)
        test_avg_class_voxel_IoU = np.mean(
            np.array(total_intersection_class_vox[1:]) /
            (np.array(total_union_class_vox[1:], dtype=np.float) + 1e-6))
        history['test_avg_class_voxel_IoU'].append(test_avg_class_voxel_IoU)
        labelweights = labelweights[1:].astype(np.float32) / np.sum(
            labelweights[1:].astype(np.float32))
        labelweights_vox = labelweights_vox[1:].astype(np.float32) / np.sum(
            labelweights_vox[1:].astype(np.float32))
        #caliweights = np.array([0.388,0.357,0.038,0.033,0.017,0.02,0.016,0.025,0.002,0.002,0.002,0.007,0.006,0.022,0.004,0.0004,0.003,0.002,0.024,0.029])
        #test_cali_voxel_acc = np.average(np.array(total_correct_class_vox[1:])/(np.array(total_seen_class_vox[1:],dtype=np.float)+1e-6),weights=caliweights)
        #history['test_cali_voxel_acc'].append(test_cali_voxel_acc)
        #test_cali_point_acc = np.average(np.array(total_correct_class[1:])/(np.array(total_seen_class[1:],dtype=np.float)+1e-6),weights=caliweights)
        #history['test_cali_point_acc'].append(test_cali_point_acc)

        print('[Epoch %d/%d] TEST acc/loss: %f/%f ' %
              (epoch + 1, args.epoch, test_voxel_acc, test_loss))
        logger.info('[Epoch %d/%d] TEST acc/loss: %f/%f ' %
                    (epoch + 1, args.epoch, test_voxel_acc, test_loss))
        print('Whole scene point wise accuracy: %f' % (test_point_acc))
        logger.info('Whole scene point wise accuracy: %f' % (test_point_acc))
        print('Whole scene voxel wise accuracy: %f' % (test_voxel_acc))
        logger.info('Whole scene voxel wise accuracy: %f' % (test_voxel_acc))
        print('Whole scene class averaged point wise accuracy: %f' %
              (test_avg_class_point_acc))
        logger.info('Whole scene class averaged point wise accuracy: %f' %
                    (test_avg_class_point_acc))
        print('Whole scene class averaged voxel wise accuracy: %f' %
              (test_avg_class_voxel_acc))
        logger.info('Whole scene class averaged voxel wise accuracy: %f' %
                    (test_avg_class_voxel_acc))
        #print('Whole scene calibrated point wise accuracy: %f' % (test_cali_point_acc))
        #logger.info('Whole scene calibrated point wise accuracy: %f' % (test_cali_point_acc))
        #print('Whole scene calibrated voxel wise accuracy: %f' % (test_cali_voxel_acc))
        #logger.info('Whole scene calibrated voxel wise accuracy: %f' % (test_cali_voxel_acc))
        print('Whole scene class averaged point wise IoU: %f' %
              (test_avg_class_point_IoU))
        logger.info('Whole scene class averaged point wise IoU: %f' %
                    (test_avg_class_point_IoU))
        print('Whole scene class averaged voxel wise IoU: %f' %
              (test_avg_class_voxel_IoU))
        logger.info('Whole scene class averaged voxel wise IoU: %f' %
                    (test_avg_class_voxel_IoU))

        per_class_voxel_str = 'voxel based --------\n'
        for l in range(1, num_classes):
            per_class_voxel_str += 'class %d weight: %f, acc: %f, IoU: %f;\n' % (
                l, labelweights_vox[l - 1], total_correct_class_vox[l] / float(
                    total_seen_class_vox[l]), total_intersection_class_vox[l] /
                (float(total_union_class_vox[l]) + 1e-6))
        logger.info(per_class_voxel_str)

        per_class_point_str = 'point based --------\n'
        for l in range(1, num_classes):
            per_class_point_str += 'class %d weight: %f, acc: %f, IoU: %f;\n' % (
                l, labelweights[l - 1], total_correct_class[l] /
                float(total_seen_class[l]), total_intersection_class[l] /
                (float(total_union_class[l]) + 1e-6))
        logger.info(per_class_point_str)

        if (epoch + 1) % 5 == 0:
            torch.save(
                model.state_dict(), '%s/%s_%.3d.pth' %
                (checkpoints_dir, args.model_name, epoch + 1))
            logger.info('Save model..')
            print('Save model..')
        if test_voxel_acc > best_acc:
            best_acc = test_voxel_acc
            best_acc_epoch = epoch + 1
            torch.save(
                model.state_dict(), '%s/%s_%.3d_%.4f_bestacc.pth' %
                (checkpoints_dir, args.model_name, epoch + 1, best_acc))
            logger.info('Save best acc model..')
            print('Save best acc model..')
        if test_avg_class_voxel_IoU > best_mIoU:
            best_mIoU = test_avg_class_voxel_IoU
            best_mIoU_epoch = epoch + 1
            torch.save(
                model.state_dict(), '%s/%s_%.3d_%.4f_bestmIoU.pth' %
                (checkpoints_dir, args.model_name, epoch + 1, best_mIoU))
            logger.info('Save best mIoU model..')
            print('Save best mIoU model..')
    print('Best voxel wise accuracy is %f at epoch %d.' %
          (best_acc, best_acc_epoch))
    logger.info('Best voxel wise accuracy is %f at epoch %d.' %
                (best_acc, best_acc_epoch))
    print('Best class averaged voxel wise IoU is %f at epoch %d.' %
          (best_mIoU, best_mIoU_epoch))
    logger.info('Best class averaged voxel wise IoU is %f at epoch %d.' %
                (best_mIoU, best_mIoU_epoch))
    plot_loss_curve(history['loss'], str(log_dir))
    plot_acc_curve(history['train_acc'], history['test_voxel_acc'],
                   str(log_dir))
    plot_acc_curve(history['train_acc'], history['test_avg_class_voxel_IoU'],
                   str(log_dir))
    print('FINISH.')
    logger.info('FINISH')
コード例 #6
0
ファイル: eval.py プロジェクト: yuhaonankaka/adl
def evaluate(args):
    # init training dataset
    print("preparing data...")
    scanrefer, scene_list = get_scanrefer(args)

    # dataloader
    _, dataloader = get_dataloader(args, scanrefer, scene_list, "val", DC)

    # model
    model = get_model(args)

    # config
    POST_DICT = {
        'remove_empty_box': True,
        'use_3d_nms': True,
        'nms_iou': 0.25,
        'use_old_type_nms': False,
        'cls_nms': True,
        'per_class_proposal': True,
        'conf_thresh': 0.05,
        'dataset_config': DC
    } if not args.no_nms else None

    # evaluate
    print("evaluating...")
    ref_acc = []
    objectness_precisions, objectness_recalls, objectness_f1s = [], [], []
    ious = []
    masks = []
    maskrcnn_model = resnet_fpn_backbone('resnet18', True).fpn.cuda()
    for data in tqdm(dataloader):
        for key in data:
            if key != "scan_name":
                data[key] = data[key].cuda()

        batch_size = len(data['scan_name'])
        new_features = torch.zeros((batch_size, 40000, 32)).cuda()
        print("start to project")
        for idx, scene_id in enumerate(data['scan_name']):
            intrinsics = get_intrinsics(scene_id, args)
            projection = ProjectionHelper(intrinsics, args.depth_min,
                                          args.depth_max, proj_image_dims)
            features_2d = scannet_projection(
                data['point_clouds'][idx].cpu().numpy(), intrinsics,
                projection, scene_id, args, None, None, maskrcnn_model)
            new_features[idx, :] = features_2d[:]
        data['new_features'] = new_features
        print("finish projection")

        # feed
        data = model(data)
        _, data = get_loss(data, DC, True, True, POST_DICT)

        ref_acc += data["ref_acc"]
        objectness_precisions += data["objectness_precision"]
        objectness_recalls += data["objectness_recall"]
        objectness_f1s += data["objectness_f1"]
        ious += data["ref_iou"]
        masks += data["ref_multiple_mask"]

    # aggregate scores
    ref_acc = np.array(ref_acc)
    objectness_precisions, objectness_recalls, objectness_f1s = np.array(
        objectness_precisions), np.array(objectness_recalls), np.array(
            objectness_f1s)
    ious = np.array(ious)
    masks = np.array(masks)

    stats = {
        "unique": np.sum(masks == 0),
        "multiple": np.sum(masks == 1),
        "overall": masks.shape[0]
    }
    scores = {"unique": {}, "multiple": {}, "overall": {}}
    scores["unique"]["ref_acc"] = np.mean(
        ref_acc[masks == 0]) if np.sum(masks == 0) > 0 else 0
    scores["unique"]["objn_prec"] = np.mean(
        objectness_precisions[masks == 0]) if np.sum(masks == 0) > 0 else 0
    scores["unique"]["objn_recall"] = np.mean(
        objectness_recalls[masks == 0]) if np.sum(masks == 0) > 0 else 0
    scores["unique"]["objn_f1"] = np.mean(
        objectness_f1s[masks == 0]) if np.sum(masks == 0) > 0 else 0
    scores["unique"]["iou_rate_0.25"] = ious[masks == 0][ious[
        masks == 0] >= 0.25].shape[0] / ious[masks == 0].shape[0] if np.sum(
            masks == 0) > 0 else 0
    scores["unique"]["iou_rate_0.5"] = ious[masks == 0][ious[
        masks == 0] >= 0.5].shape[0] / ious[masks == 0].shape[0] if np.sum(
            masks == 0) > 0 else 0
    scores["multiple"]["ref_acc"] = np.mean(
        ref_acc[masks == 1]) if np.sum(masks == 1) > 0 else 0
    scores["multiple"]["objn_prec"] = np.mean(
        objectness_precisions[masks == 1]) if np.sum(masks == 1) > 0 else 0
    scores["multiple"]["objn_recall"] = np.mean(
        objectness_recalls[masks == 1]) if np.sum(masks == 1) > 0 else 0
    scores["multiple"]["objn_f1"] = np.mean(
        objectness_f1s[masks == 1]) if np.sum(masks == 1) > 0 else 0
    scores["multiple"]["iou_rate_0.25"] = ious[masks == 1][ious[
        masks == 1] >= 0.25].shape[0] / ious[masks == 1].shape[0] if np.sum(
            masks == 1) > 0 else 0
    scores["multiple"]["iou_rate_0.5"] = ious[masks == 1][ious[
        masks == 1] >= 0.5].shape[0] / ious[masks == 1].shape[0] if np.sum(
            masks == 1) > 0 else 0
    scores["overall"]["ref_acc"] = np.mean(ref_acc)
    scores["overall"]["objn_prec"] = np.mean(objectness_precisions)
    scores["overall"]["objn_recall"] = np.mean(objectness_recalls)
    scores["overall"]["objn_f1"] = np.mean(objectness_f1s)
    scores["overall"]["iou_rate_0.25"] = ious[
        ious >= 0.25].shape[0] / ious.shape[0]
    scores["overall"]["iou_rate_0.5"] = ious[
        ious >= 0.5].shape[0] / ious.shape[0]

    print("done!")

    return stats, scores
コード例 #7
0
    def _feed(self, dataloader, phase, epoch_id):
        # switch mode
        self._set_phase(phase)

        # change dataloader
        dataloader = dataloader if phase == "train" else tqdm(dataloader)

        for data_dict in dataloader:
            # move to cuda
            for key in data_dict:
                if key != 'scan_name':
                    data_dict[key] = data_dict[key].cuda()

            # =======================================
            # Get 3d <-> 2D Projection Mapping and 2D feature map
            # =======================================
            batch_size = len(data_dict['scan_name'])
            new_features = np.zeros((batch_size, self.args.num_points, 256))
            for idx, scene_id in enumerate(data_dict['scan_name']):
                intrinsics = get_intrinsics(scene_id, self.args)
                projection = ProjectionHelper(intrinsics, self.args.depth_min,
                                              self.args.depth_max,
                                              proj_image_dims)

                features_2d = scannet_projection(
                    data_dict['point_clouds'][idx].cpu().numpy(), intrinsics,
                    projection, scene_id, self.args, None, None,
                    self.maskrcnn_model)
                new_features[idx, :] = features_2d[:]
            data_dict['new_features'] = torch.tensor(
                new_features, dtype=torch.float32, requires_grad=True).cuda()

            # initialize the running loss
            self._running_log = {
                # loss
                "loss": 0,
                "ref_loss": 0,
                "lang_loss": 0,
                "objectness_loss": 0,
                "vote_loss": 0,
                "box_loss": 0,
                # acc
                "lang_acc": 0,
                "ref_acc": 0,
                "obj_acc": 0,
                "pos_ratio": 0,
                "neg_ratio": 0,
                "iou_rate_0.25": 0,
                "iou_rate_0.5": 0
            }

            # load
            self.log[phase]["fetch"].append(
                data_dict["load_time"].sum().item())

            with torch.autograd.set_detect_anomaly(False):
                # forward
                start = time.time()
                data_dict = self._forward(data_dict)
                self._compute_loss(data_dict)
                self.log[phase]["forward"].append(time.time() - start)

                # backward
                if phase == "train":
                    start = time.time()
                    self._backward()
                    self.log[phase]["backward"].append(time.time() - start)

            # eval
            start = time.time()
            self._eval(data_dict)
            self.log[phase]["eval"].append(time.time() - start)

            # record log
            self.log[phase]["loss"].append(self._running_log["loss"].item())
            self.log[phase]["ref_loss"].append(
                self._running_log["ref_loss"].item())
            self.log[phase]["lang_loss"].append(
                self._running_log["lang_loss"].item())
            self.log[phase]["objectness_loss"].append(
                self._running_log["objectness_loss"].item())
            self.log[phase]["vote_loss"].append(
                self._running_log["vote_loss"].item())
            self.log[phase]["box_loss"].append(
                self._running_log["box_loss"].item())

            self.log[phase]["lang_acc"].append(self._running_log["lang_acc"])
            self.log[phase]["ref_acc"].append(self._running_log["ref_acc"])
            self.log[phase]["obj_acc"].append(self._running_log["obj_acc"])
            self.log[phase]["pos_ratio"].append(self._running_log["pos_ratio"])
            self.log[phase]["neg_ratio"].append(self._running_log["neg_ratio"])
            self.log[phase]["iou_rate_0.25"].append(
                self._running_log["iou_rate_0.25"])
            self.log[phase]["iou_rate_0.5"].append(
                self._running_log["iou_rate_0.5"])

            # report
            if phase == "train":
                iter_time = self.log[phase]["fetch"][-1]
                iter_time += self.log[phase]["forward"][-1]
                iter_time += self.log[phase]["backward"][-1]
                iter_time += self.log[phase]["eval"][-1]
                self.log[phase]["iter_time"].append(iter_time)
                if (self._global_iter_id + 1) % self.verbose == 0:
                    self._train_report(epoch_id)

                # evaluation
                if self._global_iter_id != 0 and self._global_iter_id % self.val_step == 0:
                    print("evaluating...")
                    # val
                    self._feed(self.dataloader["val"], "val", epoch_id)
                    self._dump_log("val")
                    self._set_phase("train")
                    self._epoch_report(epoch_id)

                # dump log
                self._dump_log("train")
                self._global_iter_id += 1

        # check best
        if phase == "val":
            cur_criterion = "iou_rate_0.5"
            cur_best = np.mean(self.log[phase][cur_criterion])
            if cur_best > self.best[cur_criterion]:
                self._log("best {} achieved: {}".format(
                    cur_criterion, cur_best))
                self._log("current train_loss: {}".format(
                    np.mean(self.log["train"]["loss"])))
                self._log("current val_loss: {}".format(
                    np.mean(self.log["val"]["loss"])))
                self.best["epoch"] = epoch_id + 1
                self.best["loss"] = np.mean(self.log[phase]["loss"])
                self.best["ref_loss"] = np.mean(self.log[phase]["ref_loss"])
                self.best["lang_loss"] = np.mean(self.log[phase]["lang_loss"])
                self.best["objectness_loss"] = np.mean(
                    self.log[phase]["objectness_loss"])
                self.best["vote_loss"] = np.mean(self.log[phase]["vote_loss"])
                self.best["box_loss"] = np.mean(self.log[phase]["box_loss"])
                self.best["lang_acc"] = np.mean(self.log[phase]["lang_acc"])
                self.best["ref_acc"] = np.mean(self.log[phase]["ref_acc"])
                self.best["obj_acc"] = np.mean(self.log[phase]["obj_acc"])
                self.best["pos_ratio"] = np.mean(self.log[phase]["pos_ratio"])
                self.best["neg_ratio"] = np.mean(self.log[phase]["neg_ratio"])
                self.best["iou_rate_0.25"] = np.mean(
                    self.log[phase]["iou_rate_0.25"])
                self.best["iou_rate_0.5"] = np.mean(
                    self.log[phase]["iou_rate_0.5"])

                # save model
                self._log("saving best models...\n")
                model_root = os.path.join(CONF.PATH.OUTPUT, self.stamp)
                torch.save(self.model.state_dict(),
                           os.path.join(model_root, "model.pth"))
import torch.utils.data
from torch.utils.data import Dataset
import numpy as np
import utils.pc_util as pc_util
import utils.scene_util as scene_util
import random
import math
from scipy import misc
from PIL import Image
import torchvision.transforms as transforms
from utils.projection import ProjectionHelper
from model.pointnet_util import pc_normalize

intrinsic = [[37.01983, 0, 20, 0], [0, 38.52470, 15.5, 0], [0, 0, 1, 0],
             [0, 0, 0, 1]]
projection = ProjectionHelper(intrinsic, 0.1, 4.0, [41, 32], 0.05)


class ScannetDatasetRGBImg(Dataset):
    def __init__(self, root, npoints=8192, split='train', num_images=5):
        self.npoints = npoints
        self.root = root
        self.split = split
        self.num_images = num_images
        data_list = os.path.join(self.root, 'scannetv2_%s.txt' % (split))
        datalist = open(data_list, 'r')
        self.scenes = [x.strip() for x in datalist.readlines()]
        self.data_filename = os.path.join(self.root,
                                          'scannetv2_%s.pickle' % (split))
        with open(self.data_filename, 'rb') as fp:
            self.scene_points_list = pickle.load(fp, encoding='bytes')