Esempio n. 1
0
def add_keypoint_rcnn_blobs(
    blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx
):
    """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary."""
    # Note: gt_inds must match how they're computed in
    # datasets.json_dataset._merge_proposal_boxes_into_roidb
    gt_inds = np.where(roidb['gt_classes'] > 0)[0]
    max_overlaps = roidb['max_overlaps']
    gt_keypoints = roidb['gt_keypoints']

    ind_kp = gt_inds[roidb['box_to_gt_ind_map']]
    within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes'])
    vis_kp = gt_keypoints[ind_kp, 2, :] > 0
    is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0
    kp_fg_inds = np.where(
        np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible)
    )[0]

    kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size)
    if kp_fg_inds.size > kp_fg_rois_per_this_image:
        kp_fg_inds = np.random.choice(
            kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False
        )

    sampled_fg_rois = roidb['boxes'][kp_fg_inds]
    box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds]

    num_keypoints = gt_keypoints.shape[2]
    sampled_keypoints = -np.ones(
        (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints),
        dtype=gt_keypoints.dtype
    )
    for ii in range(len(sampled_fg_rois)):
        ind = box_to_gt_ind_map[ii]
        if ind >= 0:
            sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :]
            assert np.sum(sampled_keypoints[ii, 2, :]) > 0

    heats, weights = keypoint_utils.keypoints_to_heatmap_labels(
        sampled_keypoints, sampled_fg_rois
    )

    shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1)
    heats = heats.reshape(shape)
    weights = weights.reshape(shape)

    sampled_fg_rois *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones(
        (sampled_fg_rois.shape[0], 1)
    )
    sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois))

    blobs['keypoint_rois'] = sampled_fg_rois
    blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False)
    blobs['keypoint_weights'] = weights
    def forward(self, inputs, outputs):
        data = inputs[0].data
        keypoint_probs = inputs[1].data
        keypoint_rois = inputs[2].data

        # output indicator resolution
        M = self.resolution
        up_scale = self.up_scale
        num_rois = keypoint_rois.shape[0]
        num_keypoints = keypoint_probs.shape[1]

        # first expand the keypoint rois
        height, width = data.shape[2], data.shape[3]
        pad_rois = box_utils.expand_boxes(keypoint_rois[:, 1:5], up_scale)
        pad_rois = box_utils.clip_boxes_to_image(pad_rois, height, width)

        # get keypoint predictions and their probs
        # output shape is (#rois, 3, #keypoints) and 3 means (x, y, prob)
        pred_rois = keypoint_utils.probs_to_keypoints(keypoint_probs, keypoint_rois)
        
        # map keypoint position to the pad_rois
        # output shape is (#rois, #keypoints), locations flatter out
        locations_on_pad_rois, _ = keypoint_utils.keypoints_to_heatmap_labels(
            pred_rois, pad_rois, M
        )
        locations_on_pad_rois = locations_on_pad_rois.astype(np.int32)

        # and now generate keypoint indicators
        keypoint_indicators = blob_utils.zeros((num_rois, num_keypoints, M**2))
        for i in range(num_rois):
            locations = locations_on_pad_rois[i] # shape (#keypoints, )
            for k in range(num_keypoints):
                keypoint_indicators[i, k, locations[k]] = pred_rois[i, 2, k]

        # and reshape to 4 dimension
        keypoint_indicators = keypoint_indicators.reshape(
            (num_rois, num_keypoints, M, M)
        )

        outputs[0].reshape(keypoint_indicators.shape)
        outputs[0].data[...] = keypoint_indicators
Esempio n. 3
0
def _sample_human_object(rois, rois_to_gt_ind, roidb, im_info):
    '''
    Sample human rois and target_object rois
    :param rois: rois correspond to feature map
    :param rois_to_gt_ind:
    :param roidb: box correspond to origin image
    :return:
    '''
    # ipdb.set_trace()
    human_num_per_image = int(cfg.VCOCO.HUMAN_NUM_PER_IM)
    target_object_num_per_image = int(cfg.VCOCO.TARGET_OBJECT_NUM_PER_IM)
    kp_human_num_per_image = int(cfg.VCOCO.KP_HUMAN_NUM_PER_IM)

    # Add keypoints
    all_human_gt_inds = np.where(roidb['gt_classes'] == 1)[0]
    gt_keypoints = roidb['gt_keypoints']

    # get gt human ids that with action
    # ToDo: name change
    # add all human(even without action) to human-centric branch
    human_with_action_gt_inds = np.where(roidb['gt_actions'][:, 0] >= 0)[0]
    gt_objects_num = roidb['gt_actions'].shape[0]
    # human_with_action_gt_inds = np.where(roidb['gt_classes'][:gt_objects_num] == 1)[0]
    # gt_boxes, for calculating action targets location
    # roidb['boxes'] = gt_boxes + scaled_rois(from RPN module)
    # ipdb.set_trace()
    gt_boxes = roidb['boxes'][:gt_objects_num, :]

    # -------------------------------------------------------------------------
    # Human-Centric Branch: sample human rois and calculate targets
    # -------------------------------------------------------------------------

    # get proposals(rois) that assigned to gt human with action
    # and corresponding target_objects
    rois_human_with_action_inds = []
    rois_human_without_action_inds = []

    for human_gt_i in all_human_gt_inds:
        if human_gt_i in human_with_action_gt_inds:
            rois_human_with_action_inds.append(
                np.where(rois_to_gt_ind == human_gt_i)[0])
        else:
            rois_human_without_action_inds.append(
                np.where(rois_to_gt_ind == human_gt_i)[0])

    rois_human_with_action_inds = np.concatenate(rois_human_with_action_inds)

    # select 16 rois of human
    human_num_this_image = min(human_num_per_image,
                               rois_human_with_action_inds.size)
    if rois_human_with_action_inds.size > 0:
        rois_human_with_action_inds = npr.choice(rois_human_with_action_inds,
                                                 size=human_num_this_image,
                                                 replace=False)

    if cfg.VCOCO.KEYPOINTS_ON:
        if len(rois_human_without_action_inds) > 0:
            rois_human_without_action_inds = np.concatenate(
                rois_human_without_action_inds)
            human_num_without_action = min(
                kp_human_num_per_image - rois_human_with_action_inds.size,
                rois_human_without_action_inds.size)
            rois_human_without_action_inds = npr.choice(
                rois_human_without_action_inds,
                size=human_num_without_action,
                replace=False)
            rois_kp_inds = np.concatenate(
                [rois_human_with_action_inds, rois_human_without_action_inds])
            kp_inds_of_sampled_rois = np.zeros(rois_kp_inds.size,
                                               dtype=np.int32)
            kp_inds_of_sampled_rois[:rois_human_with_action_inds.size] = 1
        else:
            rois_kp_inds = rois_human_with_action_inds
            kp_inds_of_sampled_rois = np.ones(rois_human_with_action_inds.size,
                                              dtype=np.int32)

        sampled_kp_rois = rois[rois_kp_inds]
        sampled_keypoints = gt_keypoints[rois_to_gt_ind[rois_kp_inds]]
        heats, kp_weights = keypoints_to_heatmap_labels(
            sampled_keypoints, sampled_kp_rois[:, 1:] / float(im_info[2]))

        shape = (sampled_kp_rois.shape[0] * gt_keypoints.shape[2], )
        heats = heats.reshape(shape)
        kp_weights = kp_weights.reshape(shape)

        min_count = cfg.KRCNN.MIN_KEYPOINT_COUNT_FOR_VALID_MINIBATCH
        num_visible_keypoints = np.sum(kp_weights)
        kp_norm = num_visible_keypoints / (
            cfg.TRAIN.IMS_PER_BATCH * cfg.TRAIN.BATCH_SIZE_PER_IM *
            cfg.TRAIN.FG_FRACTION * cfg.KRCNN.NUM_KEYPOINTS)

    # get human action targets relative location
    human_rois = rois[rois_human_with_action_inds]
    human_action_labels = roidb['gt_actions'][
        rois_to_gt_ind[rois_human_with_action_inds]]
    human_action_labels[human_action_labels < 0] = 0

    rois_human_role_ids = roidb['gt_role_id'][
        rois_to_gt_ind[rois_human_with_action_inds]]
    # scale rois to original image size
    human_action_targets, action_target_weights = \
        _compute_action_targets(human_rois[:, 1:]/float(im_info[2]), gt_boxes, rois_human_role_ids)

    # -------------------------------------------------------------------------
    # Interaction Branch: sample target_object rois and sample positive triplets
    # -------------------------------------------------------------------------

    # Select role objects
    #
    # get gt role object inds
    target_object_gt_inds = np.unique(rois_human_role_ids)
    target_object_gt_inds = target_object_gt_inds[np.where(
        target_object_gt_inds > -1)]

    # get rois that assigned to gt role object
    if target_object_gt_inds.size > 0:
        rois_target_object_inds = []
        for role_gt_i in target_object_gt_inds:
            rois_target_object_inds.append(
                np.where(rois_to_gt_ind == role_gt_i)[0])
        rois_target_object_inds = np.concatenate(rois_target_object_inds)
    else:
        # some actions don't have target_objects
        rois_target_object_inds = np.empty((0, ), dtype=np.int64)

    # select 32 role objects
    # ToDo: 32 or no limitation?
    # min(target_object_num_per_image, rois_target_object_inds.size)
    target_object_num_this_image = rois_target_object_inds.size
    if rois_target_object_inds.size > 0:
        rois_target_object_inds = npr.choice(rois_target_object_inds,
                                             size=target_object_num_this_image,
                                             replace=False)
    target_object_rois = rois[rois_target_object_inds]
    # target_object_feature_mapping_index = mapping_original_inds[rois_target_object_inds]

    # Sample positive triplets
    #
    human_rois_inds, target_object_rois_inds, interaction_action_labels = \
        generate_positive_triplets(rois_human_with_action_inds, rois_target_object_inds,
                                   rois_to_gt_ind, roidb['gt_role_id'])
    interaction_batch_idx = np.full_like(human_rois_inds,
                                         rois[0, 0],
                                         dtype=np.int32)

    sampled_rois = np.vstack((human_rois, target_object_rois))
    human_inds_of_sampled_rois = np.zeros(sampled_rois.shape[0],
                                          dtype=np.int32)
    human_inds_of_sampled_rois[:human_rois.shape[0]] = 1

    if not cfg.VCOCO.KEYPOINTS_ON:
        return_dict = dict(
            boxes=sampled_rois,
            human_inds_of_sampled_boxes=human_inds_of_sampled_rois,
            human_action_labels=human_action_labels,
            human_action_targets=human_action_targets,
            action_target_weights=action_target_weights,
            interaction_human_inds=human_rois_inds,
            interaction_target_object_inds=target_object_rois_inds,
            interaction_action_labels=interaction_action_labels,
            interaction_batch_idx=interaction_batch_idx)
    else:
        return_dict = dict(
            boxes=sampled_rois,
            human_inds_of_sampled_boxes=human_inds_of_sampled_rois,
            human_action_labels=human_action_labels,
            human_action_targets=human_action_targets,
            action_target_weights=action_target_weights,
            interaction_human_inds=human_rois_inds,
            interaction_target_object_inds=target_object_rois_inds,
            interaction_action_labels=interaction_action_labels,
            interaction_batch_idx=interaction_batch_idx,
            keypoint_rois=sampled_kp_rois,
            keypoint_locations_int32=heats.astype(np.int32, copy=False),
            keypoint_weights=kp_weights,
            keypoint_loss_normalizer=np.array([kp_norm], dtype=np.float32),
        )
    return return_dict
Esempio n. 4
0
def add_keypoint_rcnn_blobs_softmax(
    blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx
):
    """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary."""
    # Note: gt_inds must match how they're computed in
    # datasets.json_dataset._merge_proposal_boxes_into_roidb
    gt_inds = np.where(roidb['gt_classes'] > 0)[0]
    max_overlaps = roidb['max_overlaps']
    gt_keypoints = roidb['gt_keypoints']

    ind_kp = gt_inds[roidb['box_to_gt_ind_map']]
    within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes'])
    vis_kp = gt_keypoints[ind_kp, 2, :] > 0
    is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0
    kp_fg_inds = np.where(
        np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible)
    )[0]

    kp_fg_rois_per_this_image = np.minimum(fg_rois_per_image, kp_fg_inds.size)
    if kp_fg_inds.size > kp_fg_rois_per_this_image:
        kp_fg_inds = np.random.choice(
            kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False
        )

    if kp_fg_inds.shape[0] > 0:
        sampled_fg_rois = roidb['boxes'][kp_fg_inds]
        box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds]

        num_keypoints = gt_keypoints.shape[2]
        sampled_keypoints = -np.ones(
            (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints),
            dtype=gt_keypoints.dtype
        )
        for ii in range(len(sampled_fg_rois)):
            ind = box_to_gt_ind_map[ii]
            if ind >= 0:
                sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :]
                assert np.sum(sampled_keypoints[ii, 2, :]) > 0

        heats, weights = keypoint_utils.keypoints_to_heatmap_labels(
            sampled_keypoints, sampled_fg_rois, M=cfg.KRCNN.HEATMAP_SIZE
        )

        shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1)
        heats = heats.reshape(shape)
        weights = weights.reshape(shape)

    else:# If there are no fg keypoint rois (it does happen)
        # The network cannot handle empty blobs, so we must provide a heatmap
        # We simply take the first bg roi, given it an all zero heatmap, and
        # set its weights to zero (ignore label).
        roi_inds = np.where(roidb['gt_classes'] == 0)[0]
        # sampled_fg_rois is actually one random roi, but that's ok because ...
        sampled_fg_rois = roidb['boxes'][roi_inds[0]].reshape((1, -1))
        # We give it an 0's blob 
        heats = blob_utils.zeros((1 * cfg.KRCNN.NUM_KEYPOINTS, 1))
        # We set weights to 0 (ignore label)
        weights = blob_utils.zeros((1 * cfg.KRCNN.NUM_KEYPOINTS, 1))

    sampled_fg_rois *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones(
        (sampled_fg_rois.shape[0], 1)
    )
    sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois))

    blobs['keypoint_rois'] = sampled_fg_rois
    blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False)
    blobs['keypoint_weights'] = weights

    # Since in this function we may random sample a subset of bbox as the roi, 
    # we need to make sure it's the same subset for the refined_keypoint_rois,
    # so we pass out the inds for the subset too. 
    blobs['keypoint_fg_inds'] = kp_fg_inds.astype(np.int32, copy=False)
Esempio n. 5
0
def add_refine_keypoints_blobs_softmax(blobs, roidb, fg_rois_per_image,
                                       fg_inds, im_scale, batch_idx, data):
    """Add Mask R-CNN keypoint specific blobs to the given blobs dictionary."""
    # Note: gt_inds must match how they're computed in
    # datasets.json_dataset._merge_proposal_boxes_into_roidb
    gt_inds = np.where(roidb['gt_classes'] > 0)[0]
    gt_keypoints = roidb['gt_keypoints']
    # Load the kp_fg_inds generated by keypoint_rcnn.py. So we avoid the issue
    # of mismatched keypoint_rois and refined_keypoint_rois, which cause a big
    # issue for training.
    kp_fg_inds = blobs['keypoint_fg_inds']
    if kp_fg_inds.shape[0] > 0:
        sampled_fg_rois = roidb['boxes'][kp_fg_inds]
        box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds]

        # Let's expand the rois
        up_scale = cfg.REFINENET.UP_SCALE
        inp_h, inp_w = data.shape[2], data.shape[3]
        pad_img_h, pad_img_w = inp_h / im_scale, inp_w / im_scale

        pad_fg_rois = box_utils.expand_boxes(sampled_fg_rois, up_scale)
        pad_fg_rois = box_utils.clip_boxes_to_image(pad_fg_rois, pad_img_h,
                                                    pad_img_w)

        num_keypoints = gt_keypoints.shape[2]
        sampled_keypoints = -np.ones(
            (len(pad_fg_rois), gt_keypoints.shape[1], num_keypoints),
            dtype=gt_keypoints.dtype)
        for ii in range(len(pad_fg_rois)):
            ind = box_to_gt_ind_map[ii]
            if ind >= 0:
                sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :]
                assert np.sum(sampled_keypoints[ii, 2, :]) > 0

        heats, weights = keypoint_utils.keypoints_to_heatmap_labels(
            sampled_keypoints, pad_fg_rois, M=cfg.REFINENET.KRCNN.HEATMAP_SIZE)

        shape = (pad_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS, 1)
        heats = heats.reshape(shape)
        weights = weights.reshape(shape)

    else:  # If there are no fg keypoint rois (it does happen)
        # The network cannot handle empty blobs, so we must provide a heatmap
        # We simply take the first bg roi, given it an all zero heatmap, and
        # set its weights to zero (ignore label).
        roi_inds = np.where(roidb['gt_classes'] == 0)[0]
        # sampled_fg_rois is actually one random roi, but that's ok because ...
        pad_fg_rois = roidb['boxes'][roi_inds[0]].reshape((1, -1))
        # We give it an 0's blob
        heats = blob_utils.zeros((1 * cfg.KRCNN.NUM_KEYPOINTS, 1))
        # We set weights to 0 (ignore label)
        weights = blob_utils.zeros((1 * cfg.KRCNN.NUM_KEYPOINTS, 1))

    pad_fg_rois *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((pad_fg_rois.shape[0], 1))
    pad_fg_rois = np.hstack((repeated_batch_idx, pad_fg_rois))

    blobs['refined_keypoint_rois'] = pad_fg_rois
    blobs['refined_keypoint_locations_int32'] = heats.astype(np.int32,
                                                             copy=False)
    blobs['refined_keypoint_weights'] = weights
Esempio n. 6
0
def add_keypoint_rcnn_blobs(
        blobs, roidb, fg_rois_per_image, fg_inds, im_scale, batch_idx):
    # Note: gt_inds must match how they're computed in
    # datasets.json_dataset._merge_proposal_boxes_into_roidb
    gt_inds = np.where(roidb['gt_classes'] > 0)[0]
    max_overlaps = roidb['max_overlaps']
    gt_keypoints = roidb['gt_keypoints']

    ind_kp = gt_inds[roidb['box_to_gt_ind_map']]
    within_box = _within_box(gt_keypoints[ind_kp, :, :], roidb['boxes'])
    vis_kp = gt_keypoints[ind_kp, 2, :] > 0
    is_visible = np.sum(np.logical_and(vis_kp, within_box), axis=1) > 0
    kp_fg_inds = np.where(
        np.logical_and(max_overlaps >= cfg.TRAIN.FG_THRESH, is_visible))[0]

    kp_fg_rois_per_this_image = np.minimum(
        fg_rois_per_image, kp_fg_inds.size)
    if kp_fg_inds.size > kp_fg_rois_per_this_image:
        kp_fg_inds = np.random.choice(
            kp_fg_inds, size=kp_fg_rois_per_this_image, replace=False)

    if kp_fg_inds.shape[0] == 0:
        kp_fg_inds = gt_inds
    sampled_fg_rois = roidb['boxes'][kp_fg_inds]
    box_to_gt_ind_map = roidb['box_to_gt_ind_map'][kp_fg_inds]

    num_keypoints = gt_keypoints.shape[-1]
    sampled_keypoints = -np.ones(
        (len(sampled_fg_rois), gt_keypoints.shape[1], num_keypoints),
        dtype=gt_keypoints.dtype)
    for ii in range(len(sampled_fg_rois)):
        ind = box_to_gt_ind_map[ii]
        if ind >= 0:
            sampled_keypoints[ii, :, :] = gt_keypoints[gt_inds[ind], :, :]
            # assert np.sum(sampled_keypoints[ii, 2, :]) > 0

    all_heats = []
    all_weights = []
    time_dim = sampled_fg_rois.shape[-1] // 4
    per_frame_nkps = num_keypoints // time_dim
    for t in range(time_dim):
        heats, weights = keypoint_utils.keypoints_to_heatmap_labels(
            sampled_keypoints[..., t * per_frame_nkps: (t + 1) * per_frame_nkps],
            sampled_fg_rois[..., t * 4: (t + 1) * 4])
        all_heats.append(heats)
        all_weights.append(weights)
    heats = np.concatenate(all_heats, axis=-1)
    weights = np.concatenate(all_weights, axis=-1)

    shape = (sampled_fg_rois.shape[0] * cfg.KRCNN.NUM_KEYPOINTS * time_dim, 1)
    heats = heats.reshape(shape)
    weights = weights.reshape(shape)

    sampled_fg_rois *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones(
        (sampled_fg_rois.shape[0], 1))
    sampled_fg_rois = np.hstack((repeated_batch_idx, sampled_fg_rois))

    blobs['keypoint_rois'] = sampled_fg_rois
    blobs['keypoint_locations_int32'] = heats.astype(np.int32, copy=False)
    blobs['keypoint_weights'] = weights