コード例 #1
0
ファイル: test.py プロジェクト: chicleee/Pix2Vox
def test_net(cfg,
             epoch_idx=-1,
             output_dir=None,
             test_data_loader=None,
             test_writer=None,
             encoder=None,
             decoder=None,
             merger=None):
   
    # Load taxonomies of dataset
    taxonomies = []
    with open(cfg.DATASETS[cfg.DATASET.TEST_DATASET.upper()].TAXONOMY_FILE_PATH, encoding='utf-8') as file:
        taxonomies = json.loads(file.read())
    taxonomies = {t['taxonomy_id']: t for t in taxonomies}

    # # Set up data loader
    if test_data_loader is None:
        # Set up data augmentation
        IMG_SIZE = cfg.CONST.IMG_H, cfg.CONST.IMG_W
        CROP_SIZE = cfg.CONST.CROP_IMG_H, cfg.CONST.CROP_IMG_W
        test_transforms = utils.data_transforms.Compose([
            utils.data_transforms.CenterCrop(IMG_SIZE, CROP_SIZE),
            utils.data_transforms.RandomBackground(cfg.TEST.RANDOM_BG_COLOR_RANGE),
            utils.data_transforms.Normalize(mean=cfg.DATASET.MEAN, std=cfg.DATASET.STD),
            utils.data_transforms.ToTensor(),
        ])

        dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[cfg.DATASET.TEST_DATASET](cfg)
        test_data_loader = paddle.io.DataLoader(dataset=dataset_loader.get_dataset(
            utils.data_loaders.DatasetType.TEST, cfg.CONST.N_VIEWS_RENDERING, test_transforms),
                                                       batch_size=1,
                                                    #    num_workers=1,
                                                       shuffle=False)
        mode = 'test'
    else:
        mode = 'val'

    
    # paddle.io.Dataset not support 'str' input
    dataset_taxonomy = None
    rendering_image_path_template = cfg.DATASETS.SHAPENET.RENDERING_PATH
    volume_path_template = cfg.DATASETS.SHAPENET.VOXEL_PATH

    # Load all taxonomies of the dataset
    with open('./datasets/ShapeNet.json', encoding='utf-8') as file:
        dataset_taxonomy = json.loads(file.read())
        # print("[INFO]TEST-- open TAXONOMY_FILE_PATH succeess")

    all_test_taxonomy_id_and_sample_name = []
    # Load data for each category
    for taxonomy in dataset_taxonomy:
        taxonomy_folder_name = taxonomy['taxonomy_id']
        # print('[INFO] %set -- Collecting files of Taxonomy[ID=%s, Name=%s]' %
        #         (mode, taxonomy['taxonomy_id'], taxonomy['taxonomy_name']))
        samples = taxonomy[mode]
        for sample in samples:
            all_test_taxonomy_id_and_sample_name.append([taxonomy_folder_name, sample])
    # print(len(all_test_taxonomy_id_and_sample_name))
    # print(all_test_taxonomy_id_and_sample_name)
    print('[INFO] Collected files of %set' % (mode))   
    # Set up networks
    if decoder is None or encoder is None:
        encoder = Encoder(cfg)
        decoder = Decoder(cfg)
        merger = Merger(cfg)

        # if torch.cuda.is_available():
        #     encoder = paddle.DataParallel(encoder)
        #     decoder = paddle.DataParallel(decoder)
        #     merger = paddle.DataParallel(merger)

        print('[INFO] %s Loading weights from %s ...' % (dt.now(), cfg.CONST.WEIGHTS))
        encoder_state_dict = paddle.load(os.path.join(cfg.CONST.WEIGHTS, "encoder.pdparams"))
        # encoder_solver_state_dict = paddle.load(os.path.join(cfg.CONST.WEIGHTS, "encoder_solver.pdopt"))
        encoder.set_state_dict(encoder_state_dict)
        # encoder_solver.set_state_dict(encoder_solver_state_dict)
        decoder_state_dict = paddle.load(os.path.join(cfg.CONST.WEIGHTS, "decoder.pdparams"))
        # decoder_solver_state_dict = paddle.load(os.path.join(cfg.CONST.WEIGHTS, "decoder_solver.pdopt"))
        decoder.set_state_dict(decoder_state_dict)
        # decoder_solver.set_state_dict(decoder_solver_state_dict)

        if cfg.NETWORK.USE_MERGER:
            merger_state_dict = paddle.load(os.path.join(cfg.CONST.WEIGHTS, "merger.pdparams"))
            # merger_solver_state_dict = paddle.load(os.path.join(cfg.CONST.WEIGHTS, "merger_solver.pdopt"))
            merger.set_state_dict(merger_state_dict)
            # merger_solver.set_state_dict(merger_solver_state_dict)

    # Set up loss functions
    bce_loss = paddle.nn.BCELoss()

    # Testing loop
    n_samples = len(test_data_loader)
    test_iou = dict()
    encoder_losses = utils.network_utils.AverageMeter()

    # Switch models to evaluation mode
    encoder.eval()
    decoder.eval()
    merger.eval()

    for sample_idx, (rendering_images, ground_truth_volume) in enumerate(test_data_loader):
        taxonomy_id = all_test_taxonomy_id_and_sample_name[sample_idx][0]
        sample_name = all_test_taxonomy_id_and_sample_name[sample_idx][1]
        # print("all_test_taxonomy_id_and_sample_name")
        # print(taxonomy_id)
        # print(sample_name)

        with paddle.no_grad():
            # Get data from data loader
            # rendering_images = utils.network_utils.var_or_cuda(rendering_images)
            # ground_truth_volume = utils.network_utils.var_or_cuda(ground_truth_volume)

            # Test the encoder, decoder and merger
            image_features = encoder(rendering_images)
            raw_features, generated_volume = decoder(image_features)

            if cfg.NETWORK.USE_MERGER and epoch_idx >= cfg.TRAIN.EPOCH_START_USE_MERGER:
                generated_volume = merger(raw_features, generated_volume)
            else:
                generated_volume = paddle.mean(generated_volume, axis=1)

            encoder_loss = bce_loss(generated_volume, ground_truth_volume) * 10

            # Append loss and accuracy to average metrics
            encoder_losses.update(encoder_loss)

            # IoU per sample
            sample_iou = []
            for th in cfg.TEST.VOXEL_THRESH:
                # _volume = torch.ge(generated_volume, th).float()
                # intersection = torch.sum(_volume.mul(ground_truth_volume)).float()
                # union = torch.sum(torch.ge(_volume.add(ground_truth_volume), 1)).float()
                # print("#################")
                _volume = paddle.greater_equal(generated_volume, paddle.to_tensor(th)).astype("float32")
                # print(_volume)
                # print("@@@@@@@")
                # print(ground_truth_volume)
                intersection = paddle.sum(paddle.multiply(_volume, ground_truth_volume))
                # print(paddle.greater_equal(paddle.add(_volume, ground_truth_volume).astype("float32"), paddle.to_tensor(1., dtype='float32')).astype("float32"))
                union = paddle.sum(paddle.greater_equal(paddle.add(_volume, ground_truth_volume).astype("float32"), paddle.to_tensor(1., dtype='float32')).astype("float32"))
                # print(union)
                sample_iou.append((intersection / union))

            # IoU per taxonomy
            if taxonomy_id not in test_iou:
                test_iou[taxonomy_id] = {'n_samples': 0, 'iou': []}
            test_iou[taxonomy_id]['n_samples'] += 1
            test_iou[taxonomy_id]['iou'].append(sample_iou)

            # Append generated volumes to TensorBoard
            if output_dir and sample_idx < 1:
                img_dir = output_dir % 'images'
                # Volume Visualization
                gv = generated_volume.cpu().numpy()
                rendering_views = utils.binvox_visualization.get_volume_views(gv, os.path.join(img_dir, 'Reconstructed'),
                                                                              epoch_idx)
                test_writer.add_image(tag='Reconstructed', img=rendering_views, step=epoch_idx)
                gtv = ground_truth_volume.cpu().numpy()
                rendering_views = utils.binvox_visualization.get_volume_views(gtv, os.path.join(img_dir, 'GroundTruth'),
                                                                              epoch_idx)
                test_writer.add_image(tag='GroundTruth', img=rendering_views, step=epoch_idx)

            # Print sample loss and IoU
            print('[INFO] %s Test[%d/%d] Taxonomy = %s Sample = %s EDLoss = %.4f IoU = %s' %
                  (dt.now(), sample_idx + 1, n_samples, taxonomy_id, sample_name, encoder_loss,
                   ['%.4f' % si for si in sample_iou]))

    # Output testing results
    mean_iou = []
    for taxonomy_id in test_iou:
        test_iou[taxonomy_id]['iou'] = np.mean(test_iou[taxonomy_id]['iou'], axis=0)
        mean_iou.append(test_iou[taxonomy_id]['iou'] * test_iou[taxonomy_id]['n_samples'])
    mean_iou = np.sum(mean_iou, axis=0) / n_samples

    # Print header
    print('============================ TEST RESULTS ============================')
    print('Taxonomy', end='\t')
    print('#Sample', end='\t')
    print('Baseline', end='\t')
    for th in cfg.TEST.VOXEL_THRESH:
        print('t=%.2f' % th, end='\t')
    print()
    # Print body
    for taxonomy_id in test_iou:
        print('%s' % taxonomies[taxonomy_id]['taxonomy_name'].ljust(8), end='\t')
        print('%d' % test_iou[taxonomy_id]['n_samples'], end='\t')
        if 'baseline' in taxonomies[taxonomy_id]:
            print('%.4f' % taxonomies[taxonomy_id]['baseline']['%d-view' % cfg.CONST.N_VIEWS_RENDERING], end='\t\t')
        else:
            print('N/a', end='\t\t')

        for ti in test_iou[taxonomy_id]['iou']:
            print('%.4f' % ti, end='\t')
        print()
    # Print mean IoU for each threshold
    print('Overall ', end='\t\t\t\t')
    for mi in mean_iou:
        print('%.4f' % mi, end='\t')
    print('\n')

    # Add testing results to TensorBoard
    max_iou = np.max(mean_iou)
    if test_writer is not None:
        test_writer.add_scalar(tag='EncoderDecoder/EpochLoss', value=encoder_losses.avg, step=epoch_idx)
        test_writer.add_scalar(tag='EncoderDecoder/IoU', value=max_iou, step=epoch_idx)

    return max_iou
コード例 #2
0
def train_net(cfg):
    # Set up data augmentation
    IMG_SIZE = cfg.CONST.IMG_H, cfg.CONST.IMG_W
    CROP_SIZE = cfg.CONST.CROP_IMG_H, cfg.CONST.CROP_IMG_W
    train_transforms = utils.data_transforms.Compose([
        utils.data_transforms.RandomCrop(IMG_SIZE, CROP_SIZE),
        utils.data_transforms.RandomBackground(
            cfg.TRAIN.RANDOM_BG_COLOR_RANGE),
        utils.data_transforms.ColorJitter(cfg.TRAIN.BRIGHTNESS,
                                          cfg.TRAIN.CONTRAST,
                                          cfg.TRAIN.SATURATION),
        utils.data_transforms.RandomNoise(cfg.TRAIN.NOISE_STD),
        utils.data_transforms.Normalize(mean=cfg.DATASET.MEAN,
                                        std=cfg.DATASET.STD),
        utils.data_transforms.RandomFlip(),
        utils.data_transforms.RandomPermuteRGB(),
        utils.data_transforms.ToTensor(),
    ])
    val_transforms = utils.data_transforms.Compose([
        utils.data_transforms.CenterCrop(IMG_SIZE, CROP_SIZE),
        utils.data_transforms.RandomBackground(cfg.TEST.RANDOM_BG_COLOR_RANGE),
        utils.data_transforms.Normalize(mean=cfg.DATASET.MEAN,
                                        std=cfg.DATASET.STD),
        utils.data_transforms.ToTensor(),
    ])

    # Set up data loader
    train_dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[
        cfg.DATASET.TRAIN_DATASET](cfg)
    val_dataset_loader = utils.data_loaders.DATASET_LOADER_MAPPING[
        cfg.DATASET.TEST_DATASET](cfg)
    train_data_loader = paddle.io.DataLoader(
        dataset=train_dataset_loader.get_dataset(
            utils.data_loaders.DatasetType.TRAIN, cfg.CONST.N_VIEWS_RENDERING,
            train_transforms),
        batch_size=cfg.CONST.BATCH_SIZE,
        #num_workers=0  , # cfg.TRAIN.NUM_WORKER>0时报错,因为dev/shm/太小  https://blog.csdn.net/ctypyb2002/article/details/107914643
        #pin_memory=True,
        use_shared_memory=False,
        shuffle=True,
        drop_last=True)
    val_data_loader = paddle.io.DataLoader(
        dataset=val_dataset_loader.get_dataset(
            utils.data_loaders.DatasetType.VAL, cfg.CONST.N_VIEWS_RENDERING,
            val_transforms),
        batch_size=1,
        #num_workers=1,
        #pin_memory=True,
        shuffle=False)

    # Set up networks # paddle.Model prepare fit save
    encoder = Encoder(cfg)
    decoder = Decoder(cfg)
    merger = Merger(cfg)
    refiner = Refiner(cfg)
    print('[DEBUG] %s Parameters in Encoder: %d.' %
          (dt.now(), utils.network_utils.count_parameters(encoder)))
    print('[DEBUG] %s Parameters in Decoder: %d.' %
          (dt.now(), utils.network_utils.count_parameters(decoder)))
    print('[DEBUG] %s Parameters in Merger: %d.' %
          (dt.now(), utils.network_utils.count_parameters(merger)))
    print('[DEBUG] %s Parameters in Refiner: %d.' %
          (dt.now(), utils.network_utils.count_parameters(refiner)))

    # # Initialize weights of networks # paddle的参数化不同,参见API
    # encoder.apply(utils.network_utils.init_weights)
    # decoder.apply(utils.network_utils.init_weights)
    # merger.apply(utils.network_utils.init_weights)

    # Set up learning rate scheduler to decay learning rates dynamically
    encoder_lr_scheduler = paddle.optimizer.lr.MultiStepDecay(
        learning_rate=cfg.TRAIN.ENCODER_LEARNING_RATE,
        milestones=cfg.TRAIN.ENCODER_LR_MILESTONES,
        gamma=cfg.TRAIN.GAMMA,
        verbose=True)
    decoder_lr_scheduler = paddle.optimizer.lr.MultiStepDecay(
        learning_rate=cfg.TRAIN.DECODER_LEARNING_RATE,
        milestones=cfg.TRAIN.DECODER_LR_MILESTONES,
        gamma=cfg.TRAIN.GAMMA,
        verbose=True)
    merger_lr_scheduler = paddle.optimizer.lr.MultiStepDecay(
        learning_rate=cfg.TRAIN.MERGER_LEARNING_RATE,
        milestones=cfg.TRAIN.MERGER_LR_MILESTONES,
        gamma=cfg.TRAIN.GAMMA,
        verbose=True)
    refiner_lr_scheduler = paddle.optimizer.lr.MultiStepDecay(
        learning_rate=cfg.TRAIN.REFINER_LEARNING_RATE,
        milestones=cfg.TRAIN.REFINER_LR_MILESTONES,
        gamma=cfg.TRAIN.GAMMA,
        verbose=True)
    # Set up solver
    # if cfg.TRAIN.POLICY == 'adam':
    encoder_solver = paddle.optimizer.Adam(learning_rate=encoder_lr_scheduler,
                                           parameters=encoder.parameters())
    decoder_solver = paddle.optimizer.Adam(learning_rate=decoder_lr_scheduler,
                                           parameters=decoder.parameters())
    merger_solver = paddle.optimizer.Adam(learning_rate=merger_lr_scheduler,
                                          parameters=merger.parameters())
    refiner_solver = paddle.optimizer.Adam(learning_rate=refiner_lr_scheduler,
                                           parameters=refiner.parameters())

    # if torch.cuda.is_available():
    #     encoder = torch.nn.DataParallel(encoder).cuda()
    #     decoder = torch.nn.DataParallel(decoder).cuda()
    #     merger = torch.nn.DataParallel(merger).cuda()

    # Set up loss functions
    bce_loss = paddle.nn.BCELoss()

    # Load pretrained model if exists
    init_epoch = 0
    best_iou = -1
    best_epoch = -1
    if 'WEIGHTS' in cfg.CONST and cfg.TRAIN.RESUME_TRAIN:
        print('[INFO] %s Recovering from %s ...' %
              (dt.now(), cfg.CONST.WEIGHTS))
        # load
        encoder_state_dict = paddle.load(
            os.path.join(cfg.CONST.WEIGHTS, "encoder.pdparams"))
        encoder_solver_state_dict = paddle.load(
            os.path.join(cfg.CONST.WEIGHTS, "encoder_solver.pdopt"))
        encoder.set_state_dict(encoder_state_dict)
        encoder_solver.set_state_dict(encoder_solver_state_dict)
        decoder_state_dict = paddle.load(
            os.path.join(cfg.CONST.WEIGHTS, "decoder.pdparams"))
        decoder_solver_state_dict = paddle.load(
            os.path.join(cfg.CONST.WEIGHTS, "decoder_solver.pdopt"))
        decoder.set_state_dict(decoder_state_dict)
        decoder_solver.set_state_dict(decoder_solver_state_dict)

        if cfg.NETWORK.USE_MERGER:
            merger_state_dict = paddle.load(
                os.path.join(cfg.CONST.WEIGHTS, "merger.pdparams"))
            merger_solver_state_dict = paddle.load(
                os.path.join(cfg.CONST.WEIGHTS, "merger_solver.pdopt"))
            merger.set_state_dict(merger_state_dict)
            merger_solver.set_state_dict(merger_solver_state_dict)

        if cfg.NETWORK.USE_REFINER:
            refiner_state_dict = paddle.load(
                os.path.join(cfg.CONST.WEIGHTS, "refiner.pdparams"))
            refiner_solver_state_dict = paddle.load(
                os.path.join(cfg.CONST.WEIGHTS, "refiner_solver.pdopt"))
            refiner.set_state_dict(refiner_state_dict)
            refiner_solver.set_state_dict(refiner_solver_state_dict)

        print(
            '[INFO] %s Recover complete. Current epoch #%d, Best IoU = %.4f at epoch #%d.'
            % (dt.now(), init_epoch, best_iou, best_epoch))

    # Summary writer for TensorBoard
    output_dir = os.path.join(cfg.DIR.OUT_PATH, '%s', dt.now().isoformat())
    log_dir = output_dir % 'logs'
    ckpt_dir = output_dir % 'checkpoints'
    # train_writer = SummaryWriter()
    # val_writer = SummaryWriter(os.path.join(log_dir, 'test'))
    train_writer = LogWriter(os.path.join(log_dir, 'train'))
    val_writer = LogWriter(os.path.join(log_dir, 'val'))

    # Training loop
    for epoch_idx in range(init_epoch, cfg.TRAIN.NUM_EPOCHES):
        # Tick / tock
        epoch_start_time = time()

        # Batch average meterics
        batch_time = utils.network_utils.AverageMeter()
        data_time = utils.network_utils.AverageMeter()
        encoder_losses = utils.network_utils.AverageMeter()
        refiner_losses = utils.network_utils.AverageMeter()

        # # switch models to training mode
        encoder.train()
        decoder.train()
        merger.train()
        refiner.train()

        batch_end_time = time()
        n_batches = len(train_data_loader)

        # print("****debug: length of train data loder",n_batches)
        for batch_idx, (rendering_images, ground_truth_volumes) in enumerate(
                train_data_loader()):
            # # debug
            # if batch_idx>1:
            #     break

            # Measure data time
            data_time.update(time() - batch_end_time)
            # print("****debug: batch_idx",batch_idx)
            # print(rendering_images.shape)
            # print(ground_truth_volumes.shape)
            # Get data from data loader
            rendering_images = utils.network_utils.var_or_cuda(
                rendering_images)
            ground_truth_volumes = utils.network_utils.var_or_cuda(
                ground_truth_volumes)

            # Train the encoder, decoder, and merger
            image_features = encoder(rendering_images)
            raw_features, generated_volumes = decoder(image_features)

            if cfg.NETWORK.USE_MERGER and epoch_idx >= cfg.TRAIN.EPOCH_START_USE_MERGER:
                generated_volumes = merger(raw_features, generated_volumes)
            # else:
            #     mergered_volumes = paddle.mean(generated_volumes, aixs=1)

            encoder_loss = bce_loss(generated_volumes,
                                    ground_truth_volumes) * 10

            if cfg.NETWORK.USE_REFINER and epoch_idx >= cfg.TRAIN.EPOCH_START_USE_REFINER:
                generated_volumes = refiner(generated_volumes)
                refiner_loss = bce_loss(generated_volumes,
                                        ground_truth_volumes) * 10
            # else:
            #     refiner_loss = encoder_loss

            # Gradient decent
            encoder_solver.clear_grad()
            decoder_solver.clear_grad()
            merger_solver.clear_grad()
            refiner_solver.clear_grad()

            if cfg.NETWORK.USE_REFINER and epoch_idx >= cfg.TRAIN.EPOCH_START_USE_REFINER:
                encoder_loss.backward(retain_graph=True)
                refiner_loss.backward()
            # else:
            #     encoder_loss.backward()

            encoder_solver.step()
            decoder_solver.step()
            merger_solver.step()
            refiner_solver.step()

            # Append loss to average metrics
            encoder_losses.update(encoder_loss.numpy())
            refiner_losses.update(refiner_loss.numpy())

            # Append loss to TensorBoard
            n_itr = epoch_idx * n_batches + batch_idx
            train_writer.add_scalar(tag='EncoderDecoder/BatchLoss',
                                    step=n_itr,
                                    value=encoder_loss.numpy())
            train_writer.add_scalar('Refiner/BatchLoss',
                                    value=refiner_loss.numpy(),
                                    step=n_itr)

            # Tick / tock
            batch_time.update(time() - batch_end_time)
            batch_end_time = time()
            if (batch_idx % int(cfg.CONST.INFO_BATCH)) == 0:
                print(
                    '[INFO] %s [Epoch %d/%d][Batch %d/%d] BatchTime = %.3f (s) DataTime = %.3f (s) EDLoss = %.4f RLoss = %.4f'
                    % (dt.now(), epoch_idx + 1, cfg.TRAIN.NUM_EPOCHES,
                       batch_idx + 1, n_batches, batch_time.val, data_time.val,
                       encoder_loss.numpy(), refiner_loss.numpy()))

        # Append epoch loss to TensorBoard
        train_writer.add_scalar(tag='EncoderDecoder/EpochLoss',
                                step=epoch_idx + 1,
                                value=encoder_losses.avg)
        train_writer.add_scalar('Refiner/EpochLoss',
                                value=refiner_losses.avg,
                                step=epoch_idx + 1)

        # update scheduler each step
        encoder_lr_scheduler.step()
        decoder_lr_scheduler.step()
        merger_lr_scheduler.step()
        refiner_lr_scheduler.step()

        # Tick / tock
        epoch_end_time = time()
        print(
            '[INFO] %s Epoch [%d/%d] EpochTime = %.3f (s) EDLoss = %.4f RLoss = %.4f'
            % (dt.now(), epoch_idx + 1, cfg.TRAIN.NUM_EPOCHES, epoch_end_time -
               epoch_start_time, encoder_losses.avg, refiner_losses.avg))

        # Update Rendering Views
        if cfg.TRAIN.UPDATE_N_VIEWS_RENDERING:
            n_views_rendering = random.randint(1, cfg.CONST.N_VIEWS_RENDERING)
            train_data_loader.dataset.set_n_views_rendering(n_views_rendering)
            print('[INFO] %s Epoch [%d/%d] Update #RenderingViews to %d' %
                  (dt.now(), epoch_idx + 2, cfg.TRAIN.NUM_EPOCHES,
                   n_views_rendering))

        # Validate the training models
        iou = test_net(cfg, epoch_idx + 1, output_dir, val_data_loader,
                       val_writer, encoder, decoder, merger, refiner)

        # Save weights to file
        if (epoch_idx + 1) % cfg.TRAIN.SAVE_FREQ == 0:
            if not os.path.exists(ckpt_dir):
                os.makedirs(ckpt_dir)

            utils.network_utils.save_checkpoints(
                cfg, os.path.join(ckpt_dir,
                                  'ckpt-epoch-%04d' % (epoch_idx + 1)),
                epoch_idx + 1, encoder, encoder_solver, decoder,
                decoder_solver, merger, merger_solver, refiner, refiner_solver,
                best_iou, best_epoch)
        if iou > best_iou:
            if not os.path.exists(ckpt_dir):
                os.makedirs(ckpt_dir)

            best_iou = iou
            best_epoch = epoch_idx + 1
            utils.network_utils.save_checkpoints(
                cfg, os.path.join(ckpt_dir, 'best-ckpt'), epoch_idx + 1,
                encoder, encoder_solver, decoder, decoder_solver, merger,
                merger_solver, refiner, refiner_solver, best_iou, best_epoch)
コード例 #3
0
ファイル: demo.py プロジェクト: chicleee/Pix2Vox
def demo_net(cfg, imgs_path):
    encoder = Encoder(cfg)
    decoder = Decoder(cfg)
    merger = Merger(cfg)

    print('[INFO] %s Loading weights from %s ...' %
          (dt.now(), cfg.CONST.WEIGHTS))
    encoder_state_dict = paddle.load(
        os.path.join(cfg.CONST.WEIGHTS, "encoder.pdparams"))
    encoder.set_state_dict(encoder_state_dict)
    decoder_state_dict = paddle.load(
        os.path.join(cfg.CONST.WEIGHTS, "decoder.pdparams"))
    decoder.set_state_dict(decoder_state_dict)

    if cfg.NETWORK.USE_MERGER:
        merger_state_dict = paddle.load(
            os.path.join(cfg.CONST.WEIGHTS, "merger.pdparams"))
        merger.set_state_dict(merger_state_dict)

    # Switch models to evaluation mode
    encoder.eval()
    decoder.eval()
    merger.eval()

    rendering_images = []
    if os.path.isfile(imgs_path):
        print("demo img")
        rendering_image = cv2.imread(imgs_path, cv2.IMREAD_UNCHANGED).astype(
            np.float32) / 255.
        rendering_image = np.asarray(rendering_image)[np.newaxis, :, :, :]
        # print(rendering_image.shape)
        IMG_SIZE = cfg.CONST.IMG_H, cfg.CONST.IMG_W
        CROP_SIZE = cfg.CONST.CROP_IMG_H, cfg.CONST.CROP_IMG_W
        test_transforms = utils.data_transforms.Compose([
            utils.data_transforms.CenterCrop(IMG_SIZE, CROP_SIZE),
            utils.data_transforms.RandomBackground(
                cfg.TEST.RANDOM_BG_COLOR_RANGE),
            utils.data_transforms.Normalize(mean=cfg.DATASET.MEAN,
                                            std=cfg.DATASET.STD),
            utils.data_transforms.ToTensor(),
        ])

        rendering_image = test_transforms(rendering_image)
        # print(rendering_image)
        rendering_image = paddle.reshape(rendering_image, [1, 1, 3, 224, 224])
        with paddle.no_grad():
            # Get data from data loader
            rendering_image = utils.network_utils.var_or_cuda(rendering_image)

            # Test the encoder, decoder and merger
            image_features = encoder(rendering_image)
            raw_features, generated_volume = decoder(image_features)

            if cfg.NETWORK.USE_MERGER:
                generated_volume = merger(raw_features, generated_volume)
            else:
                generated_volume = paddle.mean(generated_volume, axis=1)

            for th in cfg.TEST.DEMO_VOXEL_THRESH:
                _volume = paddle.greater_equal(
                    generated_volume, paddle.to_tensor(th)).astype("float32")
                _volume = paddle.reshape(_volume, [32, 32, 32])
                # print(_volume.shape)
                # print(_volume)
                # Append generated volumes to TensorBoard
                if cfg.DIR.OUT_PATH:
                    # Volume Visualization
                    pred_file_name = os.path.join(
                        cfg.DIR.OUT_PATH,
                        imgs_path.split('/')[-1].split('.')[0] + '.obj')
                    print("save ", pred_file_name)
                    utils.voxel.voxel2obj(pred_file_name,
                                          _volume.cpu().numpy())

    elif os.path.isdir(imgs_path):
        print("demo dir")
        rendering_files_path = os.listdir(imgs_path)
        for rendering_file_path in rendering_files_path:
            if '.png' not in rendering_file_path:
                continue
            print(os.path.join(imgs_path, rendering_file_path))
            rendering_image = cv2.imread(
                os.path.join(imgs_path, rendering_file_path),
                cv2.IMREAD_UNCHANGED).astype(np.float32) / 255.
            rendering_image = np.asarray(rendering_image)[np.newaxis, :, :, :]
            # print(rendering_image.shape)
            IMG_SIZE = cfg.CONST.IMG_H, cfg.CONST.IMG_W
            CROP_SIZE = cfg.CONST.CROP_IMG_H, cfg.CONST.CROP_IMG_W
            test_transforms = utils.data_transforms.Compose([
                utils.data_transforms.CenterCrop(IMG_SIZE, CROP_SIZE),
                utils.data_transforms.RandomBackground(
                    cfg.TEST.RANDOM_BG_COLOR_RANGE),
                utils.data_transforms.Normalize(mean=cfg.DATASET.MEAN,
                                                std=cfg.DATASET.STD),
                utils.data_transforms.ToTensor(),
            ])

            rendering_image = test_transforms(rendering_image)
            # print(rendering_image)
            rendering_image = paddle.reshape(rendering_image,
                                             [1, 1, 3, 224, 224])
            with paddle.no_grad():
                # Get data from data loader
                rendering_image = utils.network_utils.var_or_cuda(
                    rendering_image)

                # Test the encoder, decoder and merger
                image_features = encoder(rendering_image)
                raw_features, generated_volume = decoder(image_features)

                if cfg.NETWORK.USE_MERGER:
                    generated_volume = merger(raw_features, generated_volume)
                else:
                    generated_volume = paddle.mean(generated_volume, axis=1)

                # for th in cfg.TEST.VOXEL_THRESH:
                #     _volume = paddle.greater_equal(generated_volume, paddle.to_tensor(th)).astype("float32")
                #     print(_volume.shape)

                # Append generated volumes to TensorBoard
                if cfg.DIR.OUT_PATH:
                    # Volume Visualization
                    gv = generated_volume.detach().cpu().numpy()
                    pred_file_name = os.path.join(
                        cfg.DIR.OUT_PATH, imgs_path,
                        rendering_file_path.split('.')[0] + '.obj')
                    utils.voxel.voxel2obj(
                        pred_file_name, gv[0, 1] > cfg.TEST.DEMO_VOXEL_THRESH)
    else:
        raise Exception("error input path")