Esempio n. 1
0
def sample():
    """Get samples from a model and visualize them"""
    path = '{}/samples_sh'.format(FLAGS.train_dir)
    if not os.path.exists(path):
        os.makedirs(path)
    actions = data_utils.define_actions(FLAGS.action)

    # Load camera parameters
    SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11]
    rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS)
    n_joints = 17 if not (FLAGS.predict_14) else 14

    # Load 3d data and load (or create) 2d projections
    train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data(
        actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14)

    if FLAGS.use_sh:
        train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(
            actions, FLAGS.data_dir)
    else:
        train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d, _ = data_utils.create_2d_data(
            actions, FLAGS.data_dir, rcams)

    device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1}
    with tf.Session(config=tf.ConfigProto(device_count=device_count)) as sess:
        # === Create the model ===

        batch_size = 128
        model = create_model(sess, actions, batch_size)
        print("Model loaded")

        for key2d in test_set_2d.keys():

            (subj, b, fname) = key2d

            # choose SittingDown action to visualize
            if b == 'SittingDown':
                print("Subject: {}, action: {}, fname: {}".format(
                    subj, b, fname))

                # keys should be the same if 3d is in camera coordinates
                key3d = key2d if FLAGS.camera_frame else (
                    subj, b, '{0}.h5'.format(fname.split('.')[0]))
                key3d = (subj, b, fname[:-3]) if (
                    fname.endswith('-sh')) and FLAGS.camera_frame else key3d

                enc_in = test_set_2d[key2d]
                n2d, _ = enc_in.shape
                dec_out = test_set_3d[key3d]
                n3d, _ = dec_out.shape
                assert n2d == n3d

                # Split into about-same-size batches

                enc_in = np.array_split(enc_in, n2d // batch_size)
                dec_out = np.array_split(dec_out, n3d // batch_size)

                # store all pose hypotheses in a list
                pose_3d_mdm = [[], [], [], [], []]

                for bidx in range(len(enc_in)):

                    # Dropout probability 0 (keep probability 1) for sampling
                    dp = 1.0
                    loss, _, out_all_components = model.step(sess,
                                                             enc_in[bidx],
                                                             dec_out[bidx],
                                                             dp,
                                                             isTraining=False)

                    # denormalize the input 2d pose, ground truth 3d pose as well as 3d pose hypotheses from mdm
                    out_all_components = np.reshape(
                        out_all_components,
                        [-1, model.HUMAN_3D_SIZE + 2, model.num_models])
                    out_mean = out_all_components[:, :model.HUMAN_3D_SIZE, :]

                    enc_in[bidx] = data_utils.unNormalizeData(
                        enc_in[bidx], data_mean_2d, data_std_2d,
                        dim_to_ignore_2d)
                    dec_out[bidx] = data_utils.unNormalizeData(
                        dec_out[bidx], data_mean_3d, data_std_3d,
                        dim_to_ignore_3d)
                    poses3d = np.zeros(
                        (out_mean.shape[0], 96, out_mean.shape[-1]))
                    for j in range(out_mean.shape[-1]):
                        poses3d[:, :, j] = data_utils.unNormalizeData(
                            out_mean[:, :, j], data_mean_3d, data_std_3d,
                            dim_to_ignore_3d)

                    # extract the 17 joints
                    dtu3d = np.hstack(
                        (np.arange(3), dim_to_use_3d
                         )) if not (FLAGS.predict_14) else dim_to_use_3d
                    dec_out_17 = dec_out[bidx][:, dtu3d]
                    pose_3d_17 = poses3d[:, dtu3d, :]
                    sqerr = (pose_3d_17 -
                             np.expand_dims(dec_out_17, axis=2))**2
                    dists = np.zeros(
                        (sqerr.shape[0], n_joints, sqerr.shape[2]))
                    for m in range(dists.shape[-1]):
                        dist_idx = 0
                        for k in np.arange(0, n_joints * 3, 3):
                            dists[:, dist_idx, m] = np.sqrt(
                                np.sum(sqerr[:, k:k + 3, m], axis=1))
                            dist_idx = dist_idx + 1

                    [
                        pose_3d_mdm[i].append(poses3d[:, :, i])
                        for i in range(poses3d.shape[-1])
                    ]

                # Put all the poses together
                enc_in, dec_out = map(np.vstack, [enc_in, dec_out])
                for i in range(poses3d.shape[-1]):
                    pose_3d_mdm[i] = np.vstack(pose_3d_mdm[i])

                    # Convert back to world coordinates
                if FLAGS.camera_frame:
                    N_CAMERAS = 4
                    N_JOINTS_H36M = 32

                    # Add global position back
                    dec_out = dec_out + np.tile(test_root_positions[key3d],
                                                [1, N_JOINTS_H36M])
                    for i in range(poses3d.shape[-1]):
                        pose_3d_mdm[i] = pose_3d_mdm[i] + np.tile(
                            test_root_positions[key3d], [1, N_JOINTS_H36M])

                    # Load the appropriate camera
                    subj, action, sname = key3d

                    cname = sname.split('.')[1]  # <-- camera name
                    scams = {(subj, c + 1): rcams[(subj, c + 1)]
                             for c in range(N_CAMERAS)}  # cams of this subject
                    scam_idx = [
                        scams[(subj, c + 1)][-1] for c in range(N_CAMERAS)
                    ].index(cname)  # index of camera used
                    the_cam = scams[(subj,
                                     scam_idx + 1)]  # <-- the camera used
                    R, T, f, c, k, p, name = the_cam
                    assert name == cname

                    def cam2world_centered(data_3d_camframe):
                        data_3d_worldframe = cameras.camera_to_world_frame(
                            data_3d_camframe.reshape((-1, 3)), R, T)
                        data_3d_worldframe = data_3d_worldframe.reshape(
                            (-1, N_JOINTS_H36M * 3))
                        # subtract root translation
                        return data_3d_worldframe - np.tile(
                            data_3d_worldframe[:, :3], (1, N_JOINTS_H36M))

                    # Apply inverse rotation and translation
                    dec_out = cam2world_centered(dec_out)
                    for i in range(poses3d.shape[-1]):
                        pose_3d_mdm[i] = cam2world_centered(pose_3d_mdm[i])

                # sample some results to visualize
                np.random.seed(42)
                idx = np.random.permutation(enc_in.shape[0])
                enc_in, dec_out = enc_in[idx, :], dec_out[idx, :]
                for i in range(poses3d.shape[-1]):
                    pose_3d_mdm[i] = pose_3d_mdm[i][idx, :]

                exidx = 1
                nsamples = 20

                for i in np.arange(nsamples):
                    fig = plt.figure(figsize=(20, 5))

                    subplot_idx = 1
                    gs1 = gridspec.GridSpec(1, 7)  # 5 rows, 9 columns
                    gs1.update(wspace=-0.00,
                               hspace=0.05)  # set the spacing between axes.
                    plt.axis('off')

                    # Plot 2d pose
                    ax1 = plt.subplot(gs1[subplot_idx - 1])
                    p2d = enc_in[exidx, :]
                    viz.show2Dpose(p2d, ax1)
                    ax1.invert_yaxis()

                    # Plot 3d gt
                    ax2 = plt.subplot(gs1[subplot_idx], projection='3d')
                    p3d = dec_out[exidx, :]
                    viz.show3Dpose(p3d, ax2)

                    # Plot 3d pose hypotheses

                    for i in range(poses3d.shape[-1]):
                        ax3 = plt.subplot(gs1[subplot_idx + i + 1],
                                          projection='3d')
                        p3d = pose_3d_mdm[i][exidx]
                        viz.show3Dpose(p3d,
                                       ax3,
                                       lcolor="#9b59b6",
                                       rcolor="#2ecc71")
                    # plt.show()
                    plt.savefig('{}/sample_{}_{}_{}_{}.png'.format(
                        path, subj, action, scam_idx, exidx))
                    plt.close(fig)
                    exidx = exidx + 1
Esempio n. 2
0
    def load_data(self, path, load_metrics):
        filename, _ = os.path.splitext(os.path.basename(path))

        indices_to_select_2d = [0, 6, 7, 8, 1, 2, 3, 12, 13, 15, 25, 26, 27, 17, 18, 19]
        indices_to_select_3d = [1, 2, 3, 6, 7, 8, 12, 13, 14, 15, 17, 18, 19, 25, 26, 27]

        self.cameras = cameras.load_cameras(os.path.join(path, "cameras.h5"))

        TRAIN_SUBJECTS = [1, 5, 6, 7, 8]
        TEST_SUBJECTS  = [9, 11]

        actions = ["Directions","Discussion","Eating","Greeting",
           "Phoning","Photo","Posing","Purchases",
           "Sitting","SittingDown","Smoking","Waiting",
           "WalkDog","Walking","WalkTogether"]

        trainset = self.load_3d_data(path, TRAIN_SUBJECTS, actions)
        testset = self.load_3d_data(path, TEST_SUBJECTS, actions)

        d2d_train, d3d_train = self.project_to_cameras(trainset, augment_count=self.augment_count)
        d2d_valid, d3d_valid = self.project_to_cameras(testset)

        if self.center_2d:
            self._data_train['2d'] = self.root_center(np.array(d2d_train))[:, indices_to_select_2d, :]
        else:
            self._data_train['2d'] = np.array(d2d_train)[:, indices_to_select_2d, :]
        
        self._data_train['3d'] = self.root_center(np.array(d3d_train))[:, indices_to_select_2d, :]
        
        if self.center_2d:
            self._data_valid['2d'] = self.root_center(np.array(d2d_valid))[:, indices_to_select_2d, :]
        else:
            self._data_valid['2d'] = np.array(d2d_valid)[:, indices_to_select_2d, :]

        self._data_valid['3d'] = self.root_center(np.array(d3d_valid))[:, indices_to_select_2d, :]

        self.plot_random()

        if not load_metrics:
            self.mean_3d = np.mean(self._data_train['3d'], axis=0)
            self.std_3d = np.std(self._data_train['3d'], axis=0)
            self.mean_2d = np.mean(self._data_train['2d'], axis=0)
            self.std_2d = np.std(self._data_train['2d'], axis=0)

            if not os.path.exists(os.path.join("metrics/", filename + "_metrics.npz")):
                np.savez_compressed(
                    os.path.join("metrics/", filename + "_metrics"),
                    mean_2d=self.mean_2d, std_2d=self.std_2d,
                    mean_3d=self.mean_3d, std_3d=self.std_3d)
        else:
            data = np.load(load_metrics)
            self.mean_2d = data['mean_2d']
            self.std_2d = data['std_2d']
            self.mean_3d = data['mean_3d']
            self.std_3d = data['std_3d']

        self._data_train['3d'] = normalize_data(self._data_train['3d'], self.mean_3d, self.std_3d, skip_root=True)
        self._data_train['2d'] = normalize_data(self._data_train['2d'], self.mean_2d, self.std_2d, skip_root=self.center_2d)

        self._data_valid['3d'] = normalize_data(self._data_valid['3d'], self.mean_3d, self.std_3d, skip_root=True)
        self._data_valid['2d'] = normalize_data(self._data_valid['2d'], self.mean_2d, self.std_2d, skip_root=self.center_2d)
Esempio n. 3
0
def test():

    actions = data_utils.define_actions(FLAGS.action)

    # Load camera parameters
    SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11]
    rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS)

    # Load 3d data and load (or create) 2d projections
    train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data(
        actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14)

    # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections
    if FLAGS.use_sh:
        train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(
            actions, FLAGS.data_dir)
    else:
        train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data(
            actions, FLAGS.data_dir, rcams)

    # Avoid using the GPU if requested
    device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1}
    with tf.Session(config=tf.ConfigProto(device_count=device_count,
                                          allow_soft_placement=True)) as sess:

        # === Create the model ===
        print("Creating %d bi-layers of %d units." %
              (FLAGS.num_layers, FLAGS.linear_size))
        model = create_model(sess, actions, FLAGS.batch_size)
        model.train_writer.add_graph(sess.graph)

        current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1

        if FLAGS.evaluateActionWise:

            logger.info("{0:=^12} {1:=^6}".format(
                "Action", "mm"))  # line of 30 equal signs

            cum_err = 0  # select the mixture model which has mininum error
            for action in actions:

                # Get 2d and 3d testing data for this action
                action_test_set_2d = get_action_subset(test_set_2d, action)
                action_test_set_3d = get_action_subset(test_set_3d, action)
                encoder_inputs, decoder_outputs, repro_info = model.get_all_batches(
                    action_test_set_2d,
                    action_test_set_3d,
                    FLAGS.camera_frame,
                    training=False)

                act_err, step_time, loss = evaluate_batches(
                    sess, model, data_mean_3d, data_std_3d, dim_to_use_3d,
                    dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d,
                    dim_to_ignore_2d, current_step, encoder_inputs,
                    decoder_outputs)

                cum_err = cum_err + act_err
                logger.info('{0:<12} {1:>6.2f}'.format(action, act_err))

            summaries = sess.run(
                model.err_mm_summary,
                {model.err_mm: float(cum_err / float(len(actions)))})
            model.test_writer.add_summary(summaries, current_step)

            logger.info('{0:<12} {1:>6.2f}'.format(
                "Average", cum_err / float(len(actions))))

            logger.info('{0:=^19}'.format(''))
Esempio n. 4
0
def train():
    """Train a linear model for 3d pose estimation"""

    actions = data_utils.define_actions(FLAGS.action)

    # Load camera parameters
    SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11]
    rcams = cameras.load_cameras(FLAGS.cameras_path, SUBJECT_IDS)

    # Load 3d data and load (or create) 2d projections
    train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data(
        actions, FLAGS.data_dir, FLAGS.camera_frame, rcams, FLAGS.predict_14)

    # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections
    if FLAGS.use_sh:
        train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(
            actions, FLAGS.data_dir)
    else:
        train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data(
            actions, FLAGS.data_dir, rcams)

    # Avoid using the GPU if requested
    device_count = {"GPU": 0} if FLAGS.use_cpu else {"GPU": 1}
    with tf.Session(config=tf.ConfigProto(device_count=device_count,
                                          allow_soft_placement=True)) as sess:

        # === Create the model ===
        print("Creating %d bi-layers of %d units." %
              (FLAGS.num_layers, FLAGS.linear_size))
        model = create_model(sess, actions, FLAGS.batch_size)
        model.train_writer.add_graph(sess.graph)

        #=== This is the training loop ===
        step_time, loss, val_loss = 0.0, 0.0, 0.0
        current_step = 0 if FLAGS.load <= 0 else FLAGS.load + 1

        current_epoch = 0
        log_every_n_batches = 100

        for epoch in xrange(FLAGS.epochs):
            current_epoch = current_epoch + 1

            # === Load training batches for one epoch ===
            encoder_inputs, decoder_outputs, _ = model.get_all_batches(
                train_set_2d, train_set_3d, FLAGS.camera_frame, training=True)
            nbatches = len(encoder_inputs)
            start_time, loss = time.time(), 0.

            # === Loop through all the training batches ===
            for i in range(nbatches):

                if (i + 1) % log_every_n_batches == 0:
                    # Print progress every log_every_n_batches batches
                    print("Working on epoch {0}, batch {1} / {2}... ".format(
                        current_epoch, i + 1, nbatches),
                          end="")

                enc_in, dec_out = encoder_inputs[i], decoder_outputs[i]
                # enc_in = data_utils.generage_missing_data(enc_in, FLAGS.miss_num)
                step_loss, loss_summary, lr_summary, comp = model.step(
                    sess, enc_in, dec_out, FLAGS.dropout, isTraining=True)

                if (i + 1) % log_every_n_batches == 0:

                    # Log and print progress every log_every_n_batches batches

                    model.train_writer.add_summary(loss_summary, current_step)
                    model.train_writer.add_summary(lr_summary, current_step)
                    step_time = (time.time() - start_time)
                    start_time = time.time()
                    print("done in {0:.2f} ms".format(1000 * step_time /
                                                      log_every_n_batches))

                loss += step_loss
                current_step += 1
                # === end looping through training batches ===

            loss = loss / nbatches

            logger.info("=============================\n"
                        "Epoch:               %d\n"
                        "Global step:         %d\n"
                        "Learning rate:       %.2e\n"
                        "Train loss avg:      %.4f\n"
                        "=============================" %
                        (epoch, model.global_step.eval(),
                         model.learning_rate.eval(), loss))
            # === End training for an epoch ===

            # === Testing after this epoch ===

            if FLAGS.evaluateActionWise:

                logger.info("{0:=^12} {1:=^6}".format(
                    "Action", "mm"))  # line of 30 equal signs

                cum_err = 0  # select the mixture model which has mininum error
                for action in actions:

                    # Get 2d and 3d testing data for this action
                    action_test_set_2d = get_action_subset(test_set_2d, action)
                    action_test_set_3d = get_action_subset(test_set_3d, action)
                    encoder_inputs, decoder_outputs, repro_info = model.get_all_batches(
                        action_test_set_2d,
                        action_test_set_3d,
                        FLAGS.camera_frame,
                        training=False)

                    act_err, step_time, loss = evaluate_batches(
                        sess, model, data_mean_3d, data_std_3d, dim_to_use_3d,
                        dim_to_ignore_3d, data_mean_2d, data_std_2d,
                        dim_to_use_2d, dim_to_ignore_2d, current_step,
                        encoder_inputs, decoder_outputs)

                    cum_err = cum_err + act_err
                    logger.info('{0:<12} {1:>6.2f}'.format(action, act_err))

                summaries = sess.run(
                    model.err_mm_summary,
                    {model.err_mm: float(cum_err / float(len(actions)))})
                model.test_writer.add_summary(summaries, current_step)

                logger.info('{0:<12} {1:>6.2f}'.format(
                    "Average", cum_err / float(len(actions))))

                logger.info('{0:=^19}'.format(''))

            # Save the model
            print("Saving the model... ", end="")
            best_so_far = 90.00
            start_time = time.time()
            if cum_err / float(len(actions)) < best_so_far:
                print("Criteria passed...", end="")
                model.saver.save(sess,
                                 os.path.join(train_dir, 'checkpoint'),
                                 global_step=current_step)
                best_so_far = cum_err / float(len(actions))

            print("done in {0:.2f} ms".format(1000 *
                                              (time.time() - start_time)))

            # Reset global time and loss
            step_time, loss = 0, 0

            sys.stdout.flush()
Esempio n. 5
0
def main(opt):
    err_best = 1000
    glob_step = 0
    lr_now = opt.lr
    lr_decay = opt.lr_decay
    lr_init = opt.lr
    lr_gamma = opt.lr_gamma
    start_epoch = 0

    file_path = os.path.join(opt.ckpt, 'opt.json')
    with open(file_path, 'w') as f:
        f.write(json.dumps(vars(opt), sort_keys=True, indent=4))

    # create model
    print(">>> creating model")
    model = LinearModel(opt.batch_size, opt.predict_14)
    # = refine_2d_model(opt.batch_size,opt.predict_14)
    model = model.cuda()
    model.apply(weight_init)

    #refine_2d_model = refine_2d_model.cuda()
    #refine_2d_model.apply(weight_init)
    print(">>> total params: {:.2f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0
    ))  #+ sum(p.numel() for p in refine_2d_model.parameters()) / 1000000.0))
    criterion = nn.MSELoss(size_average=True).cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
    #refine_2d_model_optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)

    # 加载checkpoint
    if opt.resume:
        print(">>> loading ckpt from '{}'".format(opt.load))
        ckpt = torch.load(opt.load)
        start_epoch = ckpt['epoch']
        err_best = ckpt['err']
        glob_step = ckpt['step']
        lr_now = ckpt['lr']
        model.load_state_dict(ckpt['state_dict'])
        #refine_2d_model.load_state_dict[ckpt['refine_state_dict']]
        optimizer.load_state_dict(ckpt['optimizer'])
        #refine_2d_model_optimizer.load_state_dict(ckpt['refine_optimizer'])
        print(">>> ckpt loaded (epoch: {} | err: {})".format(
            start_epoch, err_best))

    # 包含动作的 list
    actions = data_utils.define_actions(opt.action)
    num_actions = len(actions)
    print(">>> actions to use (total: {}):".format(num_actions))
    pprint(actions, indent=4)
    print(">>>")

    # data loading
    print(">>> loading data")

    # Load camera parameters
    SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11]
    rcams = cameras.load_cameras(opt.cameras_path, SUBJECT_IDS)

    # Load 3d data and load (or create) 2d projections
    train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data(
        actions, opt.data_dir, opt.camera_frame, rcams, opt.predict_14)

    # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections
    if opt.use_hg:
        train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(
            actions, opt.data_dir)
    else:
        train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data(
            actions, opt.data_dir, rcams)

    #gt_train_set_2d, gt_test_set_2d, gt_data_mean_2d, gt_data_std_2d, gt_dim_to_ignore_2d, gt_dim_to_use_2d = data_utils.create_2d_data( actions, opt.data_dir, rcams )

    print("done reading and normalizing data.")

    step_time, loss = 0, 0
    current_epoch = start_epoch
    log_every_n_batches = 100

    cudnn.benchmark = True
    best_error = 10000
    while current_epoch < opt.epochs:
        current_epoch = current_epoch + 1

        # === Load training batches for one epoch ===
        encoder_inputs, decoder_outputs = get_all_batches(opt,
                                                          train_set_2d,
                                                          train_set_3d,
                                                          training=True)

        nbatches = len(encoder_inputs)
        print("There are {0} train batches".format(nbatches))
        start_time = time.time()

        # === Loop through all the training batches ===
        current_step = 0
        for i in range(nbatches):

            if (i + 1) % log_every_n_batches == 0:
                # Print progress every log_every_n_batches batches
                print("Working on epoch {0}, batch {1} / {2}... \n".format(
                    current_epoch, i + 1, nbatches),
                      end="")

            model.train()

            if glob_step % lr_decay == 0 or glob_step == 1:
                lr_now = utils.lr_decay(optimizer, glob_step, lr_init,
                                        lr_decay, lr_gamma)
                #utils.lr_decay(refine_2d_model_optimizer, glob_step, lr_init, lr_decay, lr_gamma)

            enc_in = torch.from_numpy(encoder_inputs[i]).float()
            dec_out = torch.from_numpy(decoder_outputs[i]).float()

            inputs = Variable(enc_in.cuda())
            targets = Variable(dec_out.cuda())

            outputs = model(inputs)

            # calculate loss
            optimizer.zero_grad()

            step_loss = criterion(outputs, targets)
            step_loss.backward()

            if opt.max_norm:
                nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
                #nn.utils.clip_grad_norm_(refine_2d_model.parameters(), max_norm=1)

            optimizer.step()

            loss += float(step_loss)

            current_step += 1
            glob_step += 1
            # === end looping through training batches ===

        loss = loss / nbatches

        print("=============================\n"
              "Global step:         %d\n"
              "Learning rate:       %.2e\n"
              "Train loss avg:      %.4f\n"
              "=============================" % (glob_step, lr_now, loss))
        # === End training for an epoch ===

        # clear useless chache
        torch.cuda.empty_cache()

        # === Testing after this epoch ===
        model.eval()
        if opt.evaluateActionWise:
            print("{0:=^12} {1:=^6}".format("Action",
                                            "mm"))  # line of 30 equal signs

            cum_err = 0
            record = ''
            for action in actions:

                print("{0:<12} ".format(action), end="")
                # Get 2d and 3d testing data for this action
                action_test_set_2d = get_action_subset(test_set_2d, action)
                action_test_set_3d = get_action_subset(test_set_3d, action)
                encoder_inputs, decoder_outputs = get_all_batches(
                    opt,
                    action_test_set_2d,
                    action_test_set_3d,
                    training=False)

                total_err, joint_err, step_time = evaluate_batches(
                    opt, criterion, model, data_mean_3d, data_std_3d,
                    dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d,
                    dim_to_use_2d, dim_to_ignore_2d, current_step,
                    encoder_inputs, decoder_outputs)
                cum_err = cum_err + total_err

                print("{0:>6.2f}".format(total_err))

                record = record + "{}   :   {}  (mm) \n".format(
                    action, total_err)
            avg_val = cum_err / float(len(actions))
            print("{0:<12} {1:>6.2f}".format("Average", avg_val))
            print("{0:=^19}".format(''))

            f = open("records.txt", 'a')
            f.write("epoch: {} , avg_error: {}  loss : {} \n".format(
                current_epoch, avg_val, loss))

            if best_error > avg_val:
                print("=============================")
                print("==== save best record   =====")
                print("=============================")
                best_error = avg_val
                # save ckpt
                file_path = os.path.join(opt.ckpt, 'ckpt_last.pth.tar')
                torch.save(
                    {
                        'epoch': current_epoch,
                        'lr': lr_now,
                        'step': glob_step,
                        'err': avg_val,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict()
                    }, file_path)

                f.write("epoch: {} , avg_error: {} \n".format(
                    current_epoch, best_error))
                f.write(record)

            f.write("=======================================\n")
            f.close()

        else:

            n_joints = 17 if not (opt.predict_14) else 14

            encoder_inputs, decoder_outputs = get_all_batches(opt,
                                                              test_set_2d,
                                                              test_set_3d,
                                                              training=False)

            total_err, joint_err, step_time = evaluate_batches(
                opt, criterion, model, data_mean_3d, data_std_3d,
                dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d,
                dim_to_use_2d, dim_to_ignore_2d, current_step, encoder_inputs,
                decoder_outputs, current_epoch)

            print("=============================\n"
                  "Step-time (ms):      %.4f\n"
                  "Val loss avg:        %.4f\n"
                  "Val error avg (mm):  %.2f\n"
                  "=============================" %
                  (1000 * step_time, loss, total_err))

            for i in range(n_joints):
                # 6 spaces, right-aligned, 5 decimal places
                print("Error in joint {0:02d} (mm): {1:>5.2f}".format(
                    i + 1, joint_err[i]))

                if save_flag is True:
                    f.write("Error in joint {0:02d} (mm): {1:>5.2f} \n".format(
                        i + 1, joint_err[i]))
            print("=============================")

            save_flag = False
            f.close()

    print("done in {0:.2f} ms".format(1000 * (time.time() - start_time)))
    # Reset global time and loss
    step_time, loss = 0, 0
Esempio n. 6
0
def testFunc(opt):
    start_epoch = 0
    print("procrustes          {}".format(opt.procrustes))
    # create model
    print(">>> creating model")
    model = LinearModel(opt.batch_size, opt.predict_14)
    # = refine_2d_model(opt.batch_size,opt.predict_14)
    model = model.cuda()
    model.apply(weight_init)
    model.eval()
    #refine_2d_model = refine_2d_model.cuda()
    #refine_2d_model.apply(weight_init)
    print(">>> total params: {:.2f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0
    ))  #+ sum(p.numel() for p in refine_2d_model.parameters()) / 1000000.0))
    criterion = nn.MSELoss(size_average=True).cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
    #refine_2d_model_optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)

    # 加载checkpoint
    print(">>> loading ckpt from '{}'".format(opt.load))
    ckpt = torch.load(opt.load)
    start_epoch = ckpt['epoch']
    err_best = ckpt['err']
    glob_step = ckpt['step']
    model.load_state_dict(ckpt['state_dict'])
    #refine_2d_model.load_state_dict[ckpt['refine_state_dict']]
    optimizer.load_state_dict(ckpt['optimizer'])
    #refine_2d_model_optimizer.load_state_dict(ckpt['refine_optimizer'])
    print(">>> ckpt loaded (epoch: {} | err: {})".format(
        start_epoch, err_best))

    # 包含动作的 list
    actions = data_utils.define_actions(opt.action)
    num_actions = len(actions)
    print(">>> actions to use (total: {}):".format(num_actions))
    pprint(actions, indent=4)
    print(">>>")

    # data loading
    print(">>> loading data")

    # Load camera parameters
    SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11]
    rcams = cameras.load_cameras(opt.cameras_path, SUBJECT_IDS)

    # Load 3d data and load (or create) 2d projections
    train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data(
        actions, opt.data_dir, opt.camera_frame, rcams, opt.predict_14)

    # Read stacked hourglass 2D predictions if use_sh, otherwise use groundtruth 2D projections
    if opt.use_hg:
        train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(
            actions, opt.data_dir)
    else:
        train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data(
            actions, opt.data_dir, rcams)

    #gt_train_set_2d, gt_test_set_2d, gt_data_mean_2d, gt_data_std_2d, gt_dim_to_ignore_2d, gt_dim_to_use_2d = data_utils.create_2d_data( actions, opt.data_dir, rcams )

    print("done reading and normalizing data.")

    cudnn.benchmark = True

    # === Testing after this epoch ===
    if opt.evaluateActionWise:
        print("{0:=^12} {1:=^6}".format("Action",
                                        "mm"))  # line of 30 equal signs

        cum_err = 0
        record = ''
        for action in actions:

            print("{0:<12} ".format(action), end="")
            # Get 2d and 3d testing data for this action
            action_test_set_2d = get_action_subset(test_set_2d, action)
            action_test_set_3d = get_action_subset(test_set_3d, action)
            encoder_inputs, decoder_outputs = get_all_batches(
                opt, action_test_set_2d, action_test_set_3d, rcams)

            total_err, joint_err, step_time = evaluate_batches(
                opt, criterion, model, data_mean_3d, data_std_3d,
                dim_to_use_3d, dim_to_ignore_3d, data_mean_2d, data_std_2d,
                dim_to_use_2d, dim_to_ignore_2d, glob_step, encoder_inputs,
                decoder_outputs)
            cum_err = cum_err + total_err

            print("{0:>6.2f}".format(total_err))

            record = record + "{}   :   {}  (mm) \n".format(action, total_err)
        avg_val = cum_err / float(len(actions))
        print("{0:<12} {1:>6.2f}".format("Average", avg_val))
        print("{0:=^19}".format(''))

        f = open(opt.ckpt + "records.txt", 'a')
        f.write("Test --- epoch: {} , avg_error: {}  loss : {} \n".format(
            start_epoch, avg_val, err_best))
        f.write(record)
        f.write("=======================================\n")
        f.close()

    else:

        n_joints = 17 if not (opt.predict_14) else 14

        encoder_inputs, decoder_outputs = get_all_batches(opt,
                                                          test_set_2d,
                                                          test_set_3d,
                                                          rcams,
                                                          training=False)

        total_err, joint_err, step_time = evaluate_batches(
            opt, criterion, model, data_mean_3d, data_std_3d, dim_to_use_3d,
            dim_to_ignore_3d, data_mean_2d, data_std_2d, dim_to_use_2d,
            dim_to_ignore_2d, glob_step, encoder_inputs, decoder_outputs,
            start_epoch)

        print("=============================\n"
              "Step-time (ms):      %.4f\n"
              "Val loss avg:        %.4f\n"
              "Val error avg (mm):  %.2f\n"
              "=============================" %
              (1000 * step_time, loss, total_err))

        for i in range(n_joints):
            # 6 spaces, right-aligned, 5 decimal places
            print("Error in joint {0:02d} (mm): {1:>5.2f}".format(
                i + 1, joint_err[i]))

            if save_flag is True:
                f.write("Error in joint {0:02d} (mm): {1:>5.2f} \n".format(
                    i + 1, joint_err[i]))
            print("=============================")

        save_flag = False
        f.close()
Esempio n. 7
0
def sample(opt):
    """Get samples from a model and visualize them"""

    actions = data_utils.define_actions(opt.action)

    # Load camera parameters
    SUBJECT_IDS = [1, 5, 6, 7, 8, 9, 11]
    rootPath = os.getcwd()
    rcams = cameras.load_cameras(os.path.join(rootPath, opt.cameras_path),
                                 SUBJECT_IDS)

    # Load 3d data and load (or create) 2d projections
    train_set_3d, test_set_3d, data_mean_3d, data_std_3d, dim_to_ignore_3d, dim_to_use_3d, train_root_positions, test_root_positions = data_utils.read_3d_data(
        actions, opt.data_dir, opt.camera_frame, rcams, opt.predict_14)

    if opt.use_hg:
        train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.read_2d_predictions(
            actions, opt.data_dir)
    else:
        train_set_2d, test_set_2d, data_mean_2d, data_std_2d, dim_to_ignore_2d, dim_to_use_2d = data_utils.create_2d_data(
            actions, opt.data_dir, rcams)
    print("done reading and normalizing data.")

    # create model
    print(">>> creating model")
    model = LinearModel(opt.batch_size, opt.predict_14)
    model = model.cuda()
    model.apply(weight_init)

    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)

    print(">>> total params: {:.2f}M".format(
        sum(p.numel() for p in model.parameters()) / 1000000.0))

    print(">>> loading ckpt from '{}'".format(opt.load))

    ckpt = torch.load(opt.load)
    model.load_state_dict(ckpt['state_dict'])
    optimizer.load_state_dict(ckpt['optimizer'])
    print("Model loaded")
    model.eval()

    for key2d in test_set_2d.keys():

        (subj, b, fname) = key2d
        print("Subject: {}, action: {}, fname: {}".format(subj, b, fname))

        # keys should be the same if 3d is in camera coordinates
        key3d = key2d if opt.camera_frame else (subj, b, '{0}.h5'.format(
            fname.split('.')[0]))
        key3d = (subj, b, fname[:-3]
                 ) if (fname.endswith('-sh')) and opt.camera_frame else key3d

        enc_in = test_set_2d[key2d]
        n2d, _ = enc_in.shape
        dec_out = test_set_3d[key3d]
        n3d, _ = dec_out.shape
        assert n2d == n3d

        # Split into about-same-size batches
        enc_in = np.array_split(enc_in, n2d // opt.batch_size)
        dec_out = np.array_split(dec_out, n3d // opt.batch_size)
        all_poses_3d = []

        for bidx in range(len(enc_in)):

            # Dropout probability 0 (keep probability 1) for sampling
            dp = 1.0
            ei = torch.from_numpy(enc_in[bidx]).float()
            inputs = Variable(ei.cuda())
            outputs = model(inputs)

            # denormalize
            enc_in[bidx] = data_utils.unNormalizeData(enc_in[bidx],
                                                      data_mean_2d,
                                                      data_std_2d,
                                                      dim_to_ignore_2d)
            dec_out[bidx] = data_utils.unNormalizeData(dec_out[bidx],
                                                       data_mean_3d,
                                                       data_std_3d,
                                                       dim_to_ignore_3d)
            poses3d = data_utils.unNormalizeData(outputs.data.cpu().numpy(),
                                                 data_mean_3d, data_std_3d,
                                                 dim_to_ignore_3d)
            all_poses_3d.append(poses3d)

        # Put all the poses together
        enc_in, dec_out, poses3d = map(np.vstack,
                                       [enc_in, dec_out, all_poses_3d])

        # Convert back to world coordinates
        if opt.camera_frame:
            N_CAMERAS = 4
            N_JOINTS_H36M = 32

            # Add global position back
            dec_out = dec_out + np.tile(test_root_positions[key3d],
                                        [1, N_JOINTS_H36M])

            # Load the appropriate camera
            subj, _, sname = key3d

            cname = sname.split('.')[1]  # <-- camera name
            scams = {(subj, c + 1): rcams[(subj, c + 1)]
                     for c in range(N_CAMERAS)}  # cams of this subject
            scam_idx = [scams[(subj, c + 1)][-1] for c in range(N_CAMERAS)
                        ].index(cname)  # index of camera used
            the_cam = scams[(subj, scam_idx + 1)]  # <-- the camera used
            R, T, f, c, k, p, name = the_cam
            assert name == cname

            def cam2world_centered(data_3d_camframe):
                data_3d_worldframe = cameras.camera_to_world_frame(
                    data_3d_camframe.reshape((-1, 3)), R, T)
                data_3d_worldframe = data_3d_worldframe.reshape(
                    (-1, N_JOINTS_H36M * 3))
                # subtract root translation
                return data_3d_worldframe - np.tile(data_3d_worldframe[:, :3],
                                                    (1, N_JOINTS_H36M))

                # Apply inverse rotation and translation

            dec_out = cam2world_centered(dec_out)
            poses3d = cam2world_centered(poses3d)

    # Grab a random batch to visualize
    enc_in, dec_out, poses3d = map(np.vstack, [enc_in, dec_out, poses3d])
    idx = np.random.permutation(enc_in.shape[0])
    enc_in, dec_out, poses3d = enc_in[idx, :], dec_out[idx, :], poses3d[idx, :]

    # Visualize random samples
    import matplotlib.gridspec as gridspec

    # 1080p	= 1,920 x 1,080
    fig = plt.figure(figsize=(19.2, 10.8))

    gs1 = gridspec.GridSpec(5, 9)  # 5 rows, 9 columns
    gs1.update(wspace=-0.00, hspace=0.05)  # set the spacing between axes.
    plt.axis('off')

    subplot_idx, exidx = 1, 1
    nsamples = 15
    for i in np.arange(nsamples):

        # Plot 2d pose
        ax1 = plt.subplot(gs1[subplot_idx - 1])
        p2d = enc_in[exidx, :]
        viz.show2Dpose(p2d, ax1)
        ax1.invert_yaxis()

        # Plot 3d gt
        ax2 = plt.subplot(gs1[subplot_idx], projection='3d')
        p3d = dec_out[exidx, :]
        viz.show3Dpose(p3d, ax2)

        # Plot 3d predictions
        ax3 = plt.subplot(gs1[subplot_idx + 1], projection='3d')
        p3d = poses3d[exidx, :]
        viz.show3Dpose(p3d, ax3, lcolor="#9b59b6", rcolor="#2ecc71")

        exidx = exidx + 1
        subplot_idx = subplot_idx + 3

    plt.show()