Beispiel #1
0
def main(args):
    flow.env.init()
    flow.enable_eager_execution()

    start_t = time.time()
    posenet_module = PoseNet()
    end_t = time.time()
    print("init time : {}".format(end_t - start_t))

    start_t = time.time()
    pretrain_models = flow.load(args.model_path)
    posenet_module.load_state_dict(pretrain_models)
    end_t = time.time()
    print("load params time : {}".format(end_t - start_t))

    posenet_module.eval()
    posenet_module.to("cuda")

    start_t = time.time()
    image = load_image(args.image_path)
    image = flow.Tensor(image, device=flow.device("cuda"))
    logits = posenet_module(image)
    predictions = logits.softmax()
    predictions = predictions.numpy()
    end_t = time.time()
    print("infer time : {}".format(end_t - start_t))
    clsidx = np.argmax(predictions)
    print("predict prob: %f, class name: %s" %
          (np.max(predictions), clsidx_2_labels[clsidx]))
Beispiel #2
0
def main():
    args = get_args()

    print('----- Params for debug: ----------------')
    print(args)

    print('data = {}'.format(args.data))
    print('road = {}'.format(args.road))

    print('Train model ...')

    # Imagenet normalization in case of pre-trained network
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    # Resize data before using
    transform = transforms.Compose([
        transforms.Resize(260),
        transforms.CenterCrop(250),
        transforms.ToTensor(), normalize
    ])

    train_record = None  # 'Record001'
    train_dataset = Apolloscape(root=args.data,
                                road=args.road,
                                transform=transform,
                                record=train_record,
                                normalize_poses=True,
                                pose_format='quat',
                                train=True,
                                cache_transform=not args.no_cache_transform,
                                stereo=args.stereo)

    val_record = None  # 'Record011'
    val_dataset = Apolloscape(root=args.data,
                              road=args.road,
                              transform=transform,
                              record=val_record,
                              normalize_poses=True,
                              pose_format='quat',
                              train=False,
                              cache_transform=not args.no_cache_transform,
                              stereo=args.stereo)

    # Show datasets
    print(train_dataset)
    print(val_dataset)

    shuffle_data = True

    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=shuffle_data)  # batch_size = 75
    val_dataloader = DataLoader(val_dataset,
                                batch_size=args.batch_size,
                                shuffle=shuffle_data)  # batch_size = 75

    # Get mean and std from dataset
    poses_mean = val_dataset.poses_mean
    poses_std = val_dataset.poses_std

    # Select active device
    if torch.cuda.is_available() and args.device == 'cuda':
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
    print('device = {}'.format(device))

    # Used as prefix for filenames
    time_str = datetime.now().strftime('%Y%m%d_%H%M%S')

    # Create pretrained feature extractor
    if args.feature_net == 'resnet18':
        feature_extractor = models.resnet18(pretrained=args.pretrained)
    elif args.feature_net == 'resnet34':
        feature_extractor = models.resnet34(pretrained=args.pretrained)
    elif args.feature_net == 'resnet50':
        feature_extractor = models.resnet50(pretrained=args.pretrained)

    # Num features for the last layer before pose regressor
    num_features = args.feature_net_features  # 2048

    experiment_name = get_experiment_name(args)

    # Create model
    model = PoseNet(feature_extractor, num_features=num_features)
    model = model.to(device)

    # Criterion
    criterion = PoseNetCriterion(stereo=args.stereo,
                                 beta=args.beta,
                                 learn_beta=args.learn_beta)
    criterion.to(device)

    # Add all params for optimization
    param_list = [{'params': model.parameters()}]
    if criterion.learn_beta:
        param_list.append({'params': criterion.parameters()})

    # Create optimizer
    optimizer = optim.Adam(params=param_list, lr=args.lr, weight_decay=0.0005)

    start_epoch = 0

    # Restore from checkpoint is present
    if args.checkpoint is not None:
        checkpoint_file = args.checkpoint

        if os.path.isfile(checkpoint_file):
            print('\nLoading from checkpoint: {}'.format(checkpoint_file))
            checkpoint = torch.load(checkpoint_file)
            model.load_state_dict(checkpoint['model_state_dict'])
            optimizer.load_state_dict(checkpoint['optim_state_dict'])
            start_epoch = checkpoint['epoch']
            if 'criterion_state_dict' in checkpoint:
                criterion.load_state_dict(checkpoint['criterion_state_dict'])
                print('Loaded criterion params too.')

    n_epochs = start_epoch + args.epochs

    print('\nTraining ...')
    val_freq = args.val_freq
    for e in range(start_epoch, n_epochs):

        # Train for one epoch
        train(train_dataloader,
              model,
              criterion,
              optimizer,
              e,
              n_epochs,
              log_freq=args.log_freq,
              poses_mean=train_dataset.poses_mean,
              poses_std=train_dataset.poses_std,
              device=device,
              stereo=args.stereo)

        # Run validation loop
        if e > 0 and e % val_freq == 0:
            end = time.time()
            validate(val_dataloader,
                     model,
                     criterion,
                     e,
                     log_freq=args.log_freq,
                     device=device,
                     stereo=args.stereo)

        # Make figure
        if e > 0 and args.fig_save > 0 and e % args.fig_save == 0:
            exp_name = '{}_{}'.format(time_str, experiment_name)
            make_figure(model,
                        train_dataloader,
                        poses_mean=poses_mean,
                        poses_std=poses_std,
                        epoch=e,
                        experiment_name=exp_name,
                        device=device,
                        stereo=args.stereo)

        # Make checkpoint
        if e > 0 and e % args.checkpoint_save == 0:
            make_checkpoint(model,
                            optimizer,
                            criterion,
                            epoch=e,
                            time_str=time_str,
                            args=args)

    print('\nn_epochs = {}'.format(n_epochs))

    print('\n=== Test Training Dataset ======')
    pred_poses, gt_poses = model_results_pred_gt(model,
                                                 train_dataloader,
                                                 poses_mean,
                                                 poses_std,
                                                 device=device,
                                                 stereo=args.stereo)

    print('gt_poses = {}'.format(gt_poses.shape))
    print('pred_poses = {}'.format(pred_poses.shape))
    t_loss = np.asarray([
        np.linalg.norm(p - t)
        for p, t in zip(pred_poses[:, :3], gt_poses[:, :3])
    ])
    q_loss = np.asarray([
        quaternion_angular_error(p, t)
        for p, t in zip(pred_poses[:, 3:], gt_poses[:, 3:])
    ])

    print('poses_std = {:.3f}'.format(np.linalg.norm(poses_std)))
    print('T: median = {:.3f}, mean = {:.3f}'.format(np.median(t_loss),
                                                     np.mean(t_loss)))
    print('R: median = {:.3f}, mean = {:.3f}'.format(np.median(q_loss),
                                                     np.mean(q_loss)))

    # Save for later visualization
    pred_poses_train = pred_poses
    gt_poses_train = gt_poses

    print('\n=== Test Validation Dataset ======')
    pred_poses, gt_poses = model_results_pred_gt(model,
                                                 val_dataloader,
                                                 poses_mean,
                                                 poses_std,
                                                 device=device,
                                                 stereo=args.stereo)

    print('gt_poses = {}'.format(gt_poses.shape))
    print('pred_poses = {}'.format(pred_poses.shape))
    t_loss = np.asarray([
        np.linalg.norm(p - t)
        for p, t in zip(pred_poses[:, :3], gt_poses[:, :3])
    ])
    q_loss = np.asarray([
        quaternion_angular_error(p, t)
        for p, t in zip(pred_poses[:, 3:], gt_poses[:, 3:])
    ])

    print('poses_std = {:.3f}'.format(np.linalg.norm(poses_std)))
    print('T: median = {:.3f}, mean = {:.3f}'.format(np.median(t_loss),
                                                     np.mean(t_loss)))
    print('R: median = {:.3f}, mean = {:.3f}'.format(np.median(q_loss),
                                                     np.mean(q_loss)))

    # Save for later visualization
    pred_poses_val = pred_poses
    gt_poses_val = gt_poses

    # Save checkpoint
    print('\nSaving model params ....')
    make_checkpoint(model,
                    optimizer,
                    criterion,
                    epoch=n_epochs,
                    time_str=time_str,
                    args=args)
Beispiel #3
0
def main(args):
    flow.enable_eager_execution()

    train_data_loader = OFRecordDataLoader(
        ofrecord_root=args.ofrecord_path,
        mode="train",
        # NOTE(Liang Depeng): needs to explictly set the dataset size
        dataset_size=7459,
        batch_size=args.train_batch_size,
    )

    val_data_loader = OFRecordDataLoader(
        ofrecord_root=args.ofrecord_path,
        mode="val",
        dataset_size=1990,
        batch_size=args.val_batch_size,
    )

    # oneflow init
    start_t = time.time()
    posenet_module = PoseNet()

    if args.load_checkpoint != "":
        posenet_module.load_state_dict(flow.load(args.load_checkpoint))

    end_t = time.time()
    print("init time : {}".format(end_t - start_t))

    of_cross_entropy = flow.nn.CrossEntropyLoss()

    posenet_module.to("cuda")
    of_cross_entropy.to("cuda")

    of_sgd = flow.optim.SGD(posenet_module.parameters(),
                            lr=args.learning_rate,
                            momentum=args.mom)

    of_losses = []
    all_samples = len(val_data_loader) * args.val_batch_size
    print_interval = 100

    for epoch in range(args.epochs):
        posenet_module.train()

        for b in range(len(train_data_loader)):
            image, label = train_data_loader.get_batch()

            # oneflow train
            start_t = time.time()
            image = image.to("cuda")
            label = label.to("cuda")
            logits = posenet_module(image)
            loss = of_cross_entropy(logits, label)

            loss.backward()
            of_sgd.step()
            of_sgd.zero_grad()
            end_t = time.time()
            if b % print_interval == 0:
                l = loss.numpy()
                of_losses.append(l)
                print(
                    "epoch {} train iter {} oneflow loss {}, train time : {}".
                    format(epoch, b, l, end_t - start_t))

        print("epoch %d train done, start validation" % epoch)
        posenet_module.eval()
        correct_of = 0.0
        for b in range(len(val_data_loader)):
            image, label = val_data_loader.get_batch()

            start_t = time.time()
            image = image.to("cuda")
            with flow.no_grad():
                logits = posenet_module(image)
                predictions = logits.softmax()
            of_predictions = predictions.numpy()
            clsidxs = np.argmax(of_predictions, axis=1)

            label_nd = label.numpy()
            for i in range(args.val_batch_size):
                if clsidxs[i] == label_nd[i]:
                    correct_of += 1
            end_t = time.time()

        print("epoch %d, oneflow top1 val acc: %f" %
              (epoch, correct_of / all_samples))
        flow.save(
            posenet_module.state_dict(),
            os.path.join(
                args.save_checkpoint_path,
                "epoch_%d_val_acc_%f" % (epoch, correct_of / all_samples),
            ),
        )

    writer = open("of_losses.txt", "w")
    for o in of_losses:
        writer.write("%f\n" % o)
    writer.close()
Beispiel #4
0
posenet = PoseNet(opt.nstack,
                  opt.hourglass_inp_dim,
                  config.num_layers,
                  bn=False)
optimizer = optim.SGD(posenet.parameters(),
                      lr=opt.learning_rate,
                      momentum=0.9,
                      weight_decay=1e-4)

if args.resume:
    print('\nResuming from checkpoint ...... ')
    checkpoint = torch.load(
        opt.ckpt_path,
        map_location=torch.device('cpu'))  # map to cpu to save the gpu memory
    posenet.load_state_dict(checkpoint['weights'])
    print('\nNetwork weights have been resumed from checkpoint...')

    optimizer.load_state_dict(checkpoint['optimizer_weight'])
    # We must convert the resumed state data of optimizer to gpu
    """It is because the previous training was done on gpu, so when saving the optimizer.state_dict, the stored
     states(tensors) are of cuda version. During resuming, when we load the saved optimizer, load_state_dict()
     loads this cuda version to cpu. But in this project, we use map_location to map the state tensors to cpu.
     In the training process, we need cuda version of state tensors, so we have to convert them to gpu."""
    for state in optimizer.state.values():
        for k, v in state.items():
            if torch.is_tensor(v):
                state[k] = v.cuda()
    print('\nOptimizer has been resumed from checkpoint...')

    best_loss = checkpoint['train_loss']