def evaluate(model, dataset, hand_param, debug):
    transformed_dataset = TransformDataset(dataset, model.encode)
    avg_distances = []
    max_distances = []
    length = len(transformed_dataset) if not debug else 10

    for idx in tqdm.tqdm(range(length)):
        image, gt_2dj, gt_3dj = transformed_dataset.get_example(idx)
        example = dataset.get_example(idx)
        pred_j = model.predict(np.array([image], dtype=np.float32))
        with chainer.using_config('train', False):
            loss = model.forward(
                np.expand_dims(image, axis=0),
                np.expand_dims(gt_3dj, axis=0),
                np.expand_dims(gt_2dj, axis=0),
            )
        pred_j = pred_j.array.reshape(hand_param["n_joints"], -1)
        dim = pred_j.shape[-1]
        if dim == 5:
            pred_3d = pred_j[:, :3]
            pred_2d = pred_j[:, 3:]
        else:
            pred_3d = pred_j

        logger.debug("> {}".format(pred_j))
        logger.debug("> loss {}".format(loss))
        logger.debug("> visualize pred_joint")

        z_half = hand_param["cube"][0] / 2
        pred_3d = z_half * pred_3d
        gt_3dj = example["rgb_joint"] if hand_param["use_rgb"] else example[
            "depth_joint"]
        gt_3dj = gt_3dj - calc_com(gt_3dj)
        dist = np.sqrt(np.sum(np.square(pred_3d - gt_3dj), axis=1))
        avg_dist = np.mean(dist)
        max_dist = np.max(dist)
        avg_distances.append(avg_dist)
        max_distances.append(max_dist)

    print(np.array(avg_distances).mean())
    max_distances = np.array(max_distances)
    ps = []
    max_threshold = 80
    for threshold in range(3, max_threshold):
        oks = np.sum(max_distances <= threshold)
        percent = 100 * (oks / len(max_distances))
        ps.append(percent)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.set_xlabel("Distance threshold / mm")
    ax.set_ylabel("Fraction of frames iwth mean below distance / %")
    ax.set_ylim(0, 100)
    ax.set_xlim(0, max_threshold)
    ax.plot(ps)
    ax.grid(True, linestyle="--")
    plt.savefig("plot.png")
예제 #2
0
def visualize_dataset(config):
    from matplotlib import pyplot as plt
    dataset = select_dataset(config, return_data=["train_set"])
    hand_class = config.get('model_param', 'hand_class').split(",")
    hand_class = [k.strip() for k in hand_class]
    class_converter, flip_converter = create_converter(hand_class)
    logger.info("hand_class = {}".format(hand_class))
    logger.info("done get dataset")

    idx = random.randint(0, len(dataset) - 1)
    logger.info("get example")
    rgb, rgb_bbox, rgb_class = dataset.get_example(idx)
    logger.info("Done get example")
    fig = plt.figure(figsize=(5, 10))
    ax1 = fig.add_subplot(211)
    ax2 = fig.add_subplot(212)

    label = rgb_class
    class_converter = {v: k for k, v in class_converter.items()}
    color = [COLOR_MAP[class_converter[c]] for c in label]
    print(label)
    vis_bbox(
        rgb,
        rgb_bbox,
        instance_colors=color,
        label=label,
        label_names=hand_class,
        ax=ax1,
    )

    model = create_ssd_model()
    transform_dataset = TransformDataset(
        dataset, Transform(model.coder, model.insize, model.mean, train=True))

    img, mb_loc, mb_label = transform_dataset.get_example(idx)
    mb_color = [COLOR_MAP[class_converter[c]] for c in mb_label]
    vis_bbox(
        img,
        mb_loc,
        instance_colors=mb_color,
        label=mb_label,
        label_names=hand_class,
        ax=ax2,
    )
    plt.savefig("vis.png")
    plt.show()
# # visualize transformed dataset

# +
from collections import defaultdict

from chainer.datasets import TransformDataset
from pose.models.selector import select_model
from pose.hand_dataset import common_dataset

config = defaultdict(dict)
config["model"]["name"] = "ganerated"
hand_param["inH"] = 224
hand_param["inW"] = 224
hand_param["inC"] = 3
hand_param["n_joints"] = common_dataset.NUM_KEYPOINTS
hand_param["edges"] = common_dataset.EDGES
model = select_model(config, hand_param)
transform_dataset = TransformDataset(dataset, model.encode)

# +
print(current_idx)

rgb, hm, intermediate3d, rgb_joint = transform_dataset.get_example(current_idx)
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(121)
ax.imshow(np.max(hm, axis=0))
ax2 = fig.add_subplot(122, projection="3d")
ax2.scatter(*rgb_joint[:, ::-1].transpose())
예제 #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('dataset', help="path to train json file")
    parser.add_argument('test_dataset', help="path to test dataset json file")
    parser.add_argument(
        '--dataset-root',
        help=
        "path to dataset root if dataset file is not already in root folder of dataset"
    )
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd512')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--gpu', type=int, nargs='*', default=[])
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help="default learning rate")
    parser.add_argument('--port',
                        type=int,
                        default=1337,
                        help="port for bbox sending")
    parser.add_argument('--ip',
                        default='127.0.0.1',
                        help="destination ip for bbox sending")
    parser.add_argument(
        '--test-image',
        help="path to test image that shall be displayed in bbox vis")
    args = parser.parse_args()

    if args.dataset_root is None:
        args.dataset_root = os.path.dirname(args.dataset)

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=1, pretrained_model='imagenet')
        image_size = (300, 300)
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=1, pretrained_model='imagenet')
        image_size = (512, 512)
    else:
        raise NotImplementedError("The model you want to train does not exist")

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)

    train = TransformDataset(
        SheepDataset(args.dataset_root, args.dataset, image_size=image_size),
        Transform(model.coder, model.insize, model.mean))

    if len(args.gpu) > 1:
        gpu_datasets = split_dataset_n_random(train, len(args.gpu))
        if not len(gpu_datasets[0]) == len(gpu_datasets[-1]):
            adapted_second_split = split_dataset(gpu_datasets[-1],
                                                 len(gpu_datasets[0]))[0]
            gpu_datasets[-1] = adapted_second_split
    else:
        gpu_datasets = [train]

    train_iter = [
        ThreadIterator(gpu_dataset, args.batchsize)
        for gpu_dataset in gpu_datasets
    ]

    test = SheepDataset(args.dataset_root,
                        args.test_dataset,
                        image_size=image_size)
    test_iter = chainer.iterators.MultithreadIterator(test,
                                                      args.batchsize,
                                                      repeat=False,
                                                      shuffle=False,
                                                      n_threads=2)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.Adam(alpha=args.lr)
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    if len(args.gpu) <= 1:
        updater = training.updaters.StandardUpdater(
            train_iter[0],
            optimizer,
            device=args.gpu[0] if len(args.gpu) > 0 else -1,
        )
    else:
        updater = training.updaters.MultiprocessParallelUpdater(
            train_iter, optimizer, devices=args.gpu)
        updater.setup_workers()

    if len(args.gpu) > 0 and args.gpu[0] >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu[0]).use()
        model.to_gpu()

    trainer = training.Trainer(updater, (200, 'epoch'), args.out)

    trainer.extend(DetectionVOCEvaluator(test_iter,
                                         model,
                                         use_07_metric=True,
                                         label_names=voc_bbox_label_names),
                   trigger=(1000, 'iteration'))

    # build logger
    # make sure to log all data necessary for prediction
    log_interval = 100, 'iteration'
    data_to_log = {
        'image_size': image_size,
        'model_type': args.model,
    }

    # add all command line arguments
    for argument in filter(lambda x: not x.startswith('_'), dir(args)):
        data_to_log[argument] = getattr(args, argument)

    # create callback that logs all auxiliary data the first time things get logged
    def backup_train_config(stats_cpu):
        if stats_cpu['iteration'] == log_interval:
            stats_cpu.update(data_to_log)

    trainer.extend(
        extensions.LogReport(trigger=log_interval,
                             postprocess=backup_train_config))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
        'main/loss/conf', 'validation/main/map'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=(5000, 'iteration'))

    if args.test_image is not None:
        plot_image = train._dataset.load_image(args.test_image,
                                               resize_to=image_size)
    else:
        plot_image, _, _ = train.get_example(0)
        plot_image += train._transform.mean

    bbox_plotter = BBOXPlotter(
        plot_image,
        os.path.join(args.out, 'bboxes'),
        send_bboxes=True,
        upstream_port=args.port,
        upstream_ip=args.ip,
    )
    trainer.extend(bbox_plotter, trigger=(10, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
def predict_sample(model, dataset, hand_param):
    transformed_dataset = TransformDataset(dataset, model.encode)
    idx = np.random.randint(0, len(transformed_dataset))
    image, gt_2dj, gt_3dj = transformed_dataset.get_example(idx)
    example = dataset.get_example(idx)

    vis_vu = gt_2dj * np.array([[hand_param["inH"], hand_param["inW"]]])
    pred_j = model.predict(np.array([image], dtype=np.float32))
    with chainer.using_config('train', False):
        loss = model.forward(
            np.expand_dims(image, axis=0),
            np.expand_dims(gt_3dj, axis=0),
            np.expand_dims(gt_2dj, axis=0),
        )
    pred_j = pred_j.array.reshape(hand_param["n_joints"], -1)
    dim = pred_j.shape[-1]
    if dim == 5:
        pred_3d = pred_j[:, :3]
        pred_2d = pred_j[:, 3:]
        pred_2d = pred_2d * np.array([[hand_param["inH"], hand_param["inW"]]])
    else:
        pred_3d = pred_j
    logger.info("> {}".format(pred_j))
    logger.info("> loss {}".format(loss))
    logger.info("> visualize pred_joint")
    plot_direction = "horizontal"
    if plot_direction == "horizontal":
        space = (1, 2)
        figsize = (10, 5)
    else:
        space = (2, 1)
        figsize = (5, 10)
    z_half = hand_param["cube"][0] / 2
    pred_3d = z_half * pred_3d
    gt_3dj = example["rgb_joint"] if hand_param["use_rgb"] else example[
        "depth_joint"]
    gt_3dj = gt_3dj - calc_com(gt_3dj)
    distance = np.sqrt(np.sum(np.square(pred_3d - gt_3dj), axis=1)).mean()
    logger.info("> mean distance {:0.2f}".format(distance))
    fig = plt.figure(figsize=figsize)
    fig.suptitle("mean distance = {:0.2f}".format(distance))
    ax1 = fig.add_subplot(*space, 1)
    ax1.set_title("result 2D")
    ax2 = fig.add_subplot(*space, 2, projection="3d")
    ax2.set_title("result 3D")
    color_map = hand_param["color_map"]
    keypoint_names = hand_param["keypoint_names"]
    edges = hand_param["edges"]
    color = [color_map[k] for k in keypoint_names]
    pred_color = [color_map[s, t] for s, t in edges]
    gt2_color = [[255, 255, 255] for k in keypoint_names]
    gt3_color = [[50, 50, 50] for k in keypoint_names]
    if hand_param["use_rgb"]:
        image = denormalize_rgb(image)
        chainercv.visualizations.vis_image(image, ax=ax1)
    else:
        image = image.squeeze()
        ax1.imshow(image, cmap="gray")
    vis_pose(vis_vu, edges, point_color=color, edge_color=gt2_color, ax=ax1)
    vis_pose(pred_2d, edges, point_color=color, edge_color=pred_color, ax=ax1)
    vis_pose(gt_3dj, edges, point_color=color, edge_color=gt3_color, ax=ax2)
    if dim != 2:
        vis_pose(pred_3d,
                 edges,
                 point_color=color,
                 edge_color=pred_color,
                 ax=ax2)
    # set layout
    for ax in [ax2]:
        ax.set_xlabel("x")
        ax.set_ylabel("y")
        ax.set_zlabel("z")
        ax.view_init(-65, -90)
    # show
    plt.show()