Example #1
0
    def get_minibatch(self):
        blobs = {}
        idx = np.random.choice(len(self.rdata['annotation_train']), self._batch_size)

        # labels_blob = np.zeros(self._batch_size,np.int32)
        data = []
        visual = []
        classeme = []
        classeme_s = []
        classeme_o = []
        visual_s = []
        visual_o = []
        loc_s = []
        loc_o = []
        location = []
        labels = []
        cnt = 0
        while cnt < self._batch_size:
            idx = np.random.choice(len(self.rdata['annotation_train']), 1)
            anno = self.rdata['annotation_train'][idx[0]]
            objs = []
            im_id = anno.filename.split('.')[0]
            if im_id not in self.vgg_data:
                continue
            classemes = self.vgg_data[im_id]['classemes']
            visuals = self.vgg_data[im_id]['visuals']
            locations = self.vgg_data[im_id]['locations']
            cls_confs = self.vgg_data[im_id]['cls_confs']

            w, h = self.meta['train/' + im_id + '/w'][...], self.meta['train/' + im_id + '/h'][...]
            if hasattr(anno, 'relationship'):

                if not isinstance(anno.relationship, np.ndarray):
                    anno.relationship = [anno.relationship]
                for r in xrange(len(anno.relationship)):
                    if not hasattr(anno.relationship[r], 'phrase'):
                        continue
                    predicate = anno.relationship[r].phrase[1]
                    ymin, ymax, xmin, xmax = anno.relationship[r].subBox
                    sub_bbox = [xmin, ymin, xmax, ymax]

                    ymin, ymax, xmin, xmax = anno.relationship[r].objBox
                    obj_bbox = [xmin, ymin, xmax, ymax]
                    overlaps = bbox_overlaps(
                        np.ascontiguousarray([sub_bbox, obj_bbox], dtype=np.float),
                        np.ascontiguousarray(locations, dtype=np.float))
                    if overlaps.shape[0] == 0:
                        continue
                    try:
                        assignment = overlaps.argmax(axis=1)
                    except:
                        continue

                    sub_sorted = overlaps[0].argsort()[-30:][::-1]
                    obj_sorted = overlaps[1].argsort()[-30:][::-1]
                    while len(sub_sorted) > 0 and overlaps[0][sub_sorted[-1]] < .7: sub_sorted = sub_sorted[:-1]
                    while len(obj_sorted) > 0 and overlaps[1][obj_sorted[-1]] < .7: obj_sorted = obj_sorted[:-1]

                    if len(sub_sorted) <= 0 or len(obj_sorted) <= 0:
                        continue

                    sub_idx = np.random.choice(len(sub_sorted), 1)
                    obj_idx = np.random.choice(len(obj_sorted), 1)

                    for s in sub_sorted[:1]:  # sub_idx:
                        for o in obj_sorted[:1]:  # obj_idx:
                            if s != o and cnt < self._batch_size:
                                sub_visual = visuals[s]
                                obj_visual = visuals[o]
                                sub_clsmemes = classemes[s]
                                obj_clsmemes = classemes[o]
                                sub_box_encoded = bbox_transform(np.array([locations[o]]), np.array([locations[s]]))[0]
                                obj_box_encoded = bbox_transform(np.array([locations[s]]), np.array([locations[o]]))[0]

                                #sub_box_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([locations[s]]))[0]
                                #obj_box_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([locations[o]]))[0]
                                relation = self.meta['meta/pre/name2idx/' + predicate][...]
                                labels.append(np.float32(relation))
                                classeme_s.append(sub_clsmemes)
                                classeme_o.append(obj_clsmemes)
                                visual_s.append(sub_visual)
                                visual_o.append(obj_visual)
                                loc_s.append(sub_box_encoded)
                                loc_o.append(obj_box_encoded)
                                #visual.append(np.hstack((sub_visual, obj_visual)))
                                #classeme.append(np.hstack((sub_clsmemes, obj_clsmemes)))
                                location.append(sub_box_encoded)
                                cnt += 1
                    if cnt >= self._batch_size:
                        break
                        # bbox_transform()
        # blobs['visual'] = np.array(visual)
        blobs['classeme_s'] = np.array(classeme_s)
        blobs['classeme_o'] = np.array(classeme_o)
        blobs['visual_s'] = np.array(visual_s)
        blobs['visual_o'] = np.array(visual_o)
        blobs['location_s'] = np.array(loc_s)
        blobs['location_o'] = np.array(loc_o)
        # blobs['classeme'] = np.array(classeme)
        # blobs['location'] = np.array(location)
        blobs['label'] = np.array(labels)

        return blobs
    def forward(self, bottom, top):
        # prep incoming data==========
        rpn_boxes = bottom[0].data.copy()
        bbox_pred = bottom[1].data
        scores = bottom[2].data
        im_info = bottom[3].data[0]
        im_idx = int(bottom[4].data)
        im_data = bottom[5].data[0, :, :, :].transpose((1, 2, 0)).copy()
        m = self.meta
        im_id = self._image_id[im_idx]
        r_anno = self.r_anno[im_id]
        # prep done============

        # prep blobs for forward
        blobs = {}
        s_classeme = []
        s_rois = []
        s_rois_encoded = []
        o_classeme = []
        o_rois = []
        o_rois_encoded = []
        relation_label = []

        gt_boxes = []
        if hasattr(r_anno, 'relationship'):
            rpn_boxes_img_coor = rpn_boxes[:, 1:5] / im_info[2]
            boxes = rpn_boxes_img_coor
            boxes = bbox_transform_inv(boxes, bbox_pred)
            boxes = clip_boxes(
                boxes, (im_info[0] / im_info[2], im_info[1] / im_info[2]))

            cv2.normalize(im_data, im_data, 255, 0, cv2.NORM_MINMAX)
            im_data = im_data.astype(np.uint8)

            origsz = (im_info[1] / im_info[2], im_info[0] / im_info[2])
            im_data = cv2.resize(im_data, origsz)
            thresh_final = .5

            res_locations = []
            res_classemes = []
            res_cls_confs = []
            boxes_tosort = []
            for j in xrange(1, 101):
                inds = np.where(scores[:, j] > .3)[0]
                cls_scores = scores[inds, j]
                cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], inds[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                # pred_boxes = clip_boxes(pred_boxes, im.shape)
                if len(cls_scores) <= 0:
                    boxes_tosort.append(cls_dets)
                    continue

                res_loc = np.hstack((cls_boxes, inds[:, np.newaxis]))
                res_classeme = scores[inds]
                res_cls_conf = np.column_stack(
                    (np.zeros(cls_scores.shape[0]) + j, cls_scores))
                keep = nms(cls_dets[:, :5], .3)  # nms threshold
                cls_dets = cls_dets[keep, :]
                res_loc = res_loc[keep]
                res_classeme = res_classeme[keep]
                res_cls_conf = res_cls_conf[keep]
                res_classemes.extend(res_classeme)
                res_locations.extend(res_loc)
                res_cls_confs.extend(res_cls_conf)
                boxes_tosort.append(cls_dets)
            try:
                # final class confidence
                inds = np.where(
                    np.array(res_cls_confs)[:, 1] > thresh_final)[0]

                classemes = np.array(res_classemes)[inds]
                locations = np.array(res_locations)[inds]
                cls_confs = np.array(res_cls_confs)[inds]
                # decide what to pass to top

                # limit max
                w, h = self.meta['train/' + im_id +
                                 '/w'][...], self.meta['train/' + im_id +
                                                       '/h'][...]
                if not isinstance(r_anno.relationship, np.ndarray):
                    r_anno.relationship = [r_anno.relationship]
                for r in xrange(len(r_anno.relationship)):
                    if not hasattr(r_anno.relationship[r], 'phrase'):
                        continue
                    predicate = r_anno.relationship[r].phrase[1]
                    ymin, ymax, xmin, xmax = r_anno.relationship[r].subBox
                    sub_bbox = [xmin, ymin, xmax, ymax]
                    gt_boxes.append(sub_bbox)

                    ymin, ymax, xmin, xmax = r_anno.relationship[r].objBox

                    obj_bbox = [xmin, ymin, xmax, ymax]
                    gt_boxes.append(obj_bbox)
                    overlaps = bbox_overlaps(
                        np.ascontiguousarray([sub_bbox, obj_bbox],
                                             dtype=np.float),
                        np.ascontiguousarray(locations, dtype=np.float))
                    if overlaps.shape[0] == 0:
                        continue

                    sub_sorted = overlaps[0].argsort()[-40:][::-1]
                    obj_sorted = overlaps[1].argsort()[-40:][::-1]
                    while len(sub_sorted) > 0 and overlaps[0][
                            sub_sorted[-1]] < .6:
                        sub_sorted = sub_sorted[:-1]
                    while len(obj_sorted) > 0 and overlaps[1][
                            obj_sorted[-1]] < .6:
                        obj_sorted = obj_sorted[:-1]

                    if len(sub_sorted) <= 0 or len(obj_sorted) <= 0:
                        continue

                    cnt = 0
                    for s in sub_sorted[:1]:  # sub_idx:
                        for o in obj_sorted[:1]:  # obj_idx:
                            if s != o and cnt < 20:
                                sub_clsmemes = classemes[s]
                                obj_clsmemes = classemes[o]
                                sub_box_encoded = bbox_transform(
                                    np.array([[0, 0, w, h]]),
                                    np.array([locations[s]]))[0]
                                obj_box_encoded = bbox_transform(
                                    np.array([[0, 0, w, h]]),
                                    np.array([locations[o]]))[0]
                                relation = self.meta['meta/pre/name2idx/' +
                                                     predicate][...]
                                # all done, now we put forward
                                s_classeme.append(sub_clsmemes)
                                o_classeme.append(obj_clsmemes)
                                s_rois.append(rpn_boxes[locations[s][-1]])
                                o_rois.append(rpn_boxes[locations[o][-1]])
                                s_rois_encoded.append(sub_box_encoded)
                                o_rois_encoded.append(obj_box_encoded)
                                relation_label.append(np.float32(relation))
                                cnt += 1
                # final step copy all the stuff for forward
                blobs['s_classeme'] = np.array(s_classeme)
                blobs['o_classeme'] = np.array(o_classeme)
                blobs['s_rois'] = np.array(s_rois)
                blobs['o_rois'] = np.array(o_rois)
                blobs['s_rois_encoded'] = np.array(s_rois_encoded)
                blobs['o_rois_encoded'] = np.array(o_rois_encoded)
                blobs['relation_label'] = np.array(relation_label)
            except:
                blobs = self._prev_blob
            if blobs['s_classeme'].shape[0] == 0:
                blobs = self._prev_blob
        else:
            blobs = self._prev_blob
        visualize_gt(im_data, gt_boxes)
        visualize(im_data, boxes_tosort, rpn_boxes_img_coor, m, thresh_final)
        for blob_name, blob in blobs.iteritems():
            top_ind = self._name_to_top_map[blob_name]
            # Reshape net's input blobs
            top[top_ind].reshape(*(blob.shape))
            # Copy data into net's input blobs
            top[top_ind].data[...] = blob.astype(np.float32, copy=False)

        # this becomes a dummy for forward in case things fail
        if blobs['relation_label'][0] != -1:
            for blob_name, blob in blobs.iteritems():
                blobs[blob_name] = blob[0, np.newaxis]
                if blob_name == 'relation_label':
                    blobs[blob_name][...] = -1
        self._prev_blob = blobs
Example #3
0
    def get_minibatch(self):
        blobs = {}

        data = []
        visual = []
        classeme = []
        classeme_s = []
        classeme_o = []
        visual_s = []
        visual_o = []
        loc_s = []
        loc_o = []
        location = []
        labels = []
        cnt = 0
        while cnt < self._batch_size:
            if self.imidx >= len(self.imids):
                random.shuffle(self.imids)
                self.imidx = 0
            imid = self.imids[self.imidx]
            self.imidx += 1
            gt_rlp_labels = self.gt_labels[imid]['rlp_labels']
            gt_sub_boxes = self.gt_labels[imid]['sub_boxes']
            gt_obj_boxes = self.gt_labels[imid]['obj_boxes']

            classemes = self.vgg_data[imid]['classemes']
            visuals = self.vgg_data[imid]['visuals']
            locations = self.vgg_data[imid]['locations']
            cls_confs = self.vgg_data[imid]['cls_confs']
            for i in xrange(gt_rlp_labels.shape[0]):
                gt_rlp_label = gt_rlp_labels[i]
                gt_sub_box = gt_sub_boxes[i]
                gt_obj_box = gt_obj_boxes[i]
                overlaps = bbox_overlaps(np.array([gt_sub_box, gt_obj_box]),
                                         locations.astype(np.float))

                if overlaps.shape[0] == 0:
                    continue
                sub_sorted = overlaps[0].argsort()[-30:][::-1]
                obj_sorted = overlaps[1].argsort()[-30:][::-1]
                while len(sub_sorted) > 0 and overlaps[0][sub_sorted[-1]] < .7:
                    sub_sorted = sub_sorted[:-1]
                while len(obj_sorted) > 0 and overlaps[1][obj_sorted[-1]] < .7:
                    obj_sorted = obj_sorted[:-1]
                if len(sub_sorted) <= 0 or len(obj_sorted) <= 0:
                    continue

                for s in sub_sorted[:1]:
                    for o in obj_sorted[:1]:
                        if s != o and cnt < self._batch_size:
                            sub_visual = visuals[s]
                            obj_visual = visuals[o]
                            sub_clsmemes = classemes[s]
                            obj_clsmemes = classemes[o]
                            sub_box_encoded = bbox_transform(
                                np.array([locations[o]]),
                                np.array([locations[s]]))[0]
                            obj_box_encoded = bbox_transform(
                                np.array([locations[s]]),
                                np.array([locations[o]]))[0]
                            pre_lbl = gt_rlp_label[1]
                            labels.append(np.float32(pre_lbl))
                            classeme_s.append(sub_clsmemes)
                            classeme_o.append(obj_clsmemes)
                            visual_s.append(sub_visual)
                            visual_o.append(obj_visual)
                            loc_s.append(sub_box_encoded)
                            loc_o.append(obj_box_encoded)
                            visual.append(np.hstack((sub_visual, obj_visual)))
                            classeme.append(
                                np.hstack((sub_clsmemes, obj_clsmemes)))
                            location.append(
                                np.hstack((sub_box_encoded, obj_box_encoded)))
                            cnt += 1
                if cnt >= self._batch_size:
                    break
        #print visual
        # blobs['classeme'] = np.array(classeme)
        blobs['visual'] = np.array(visual)  #.astype(np.float32)
        # blobs['location'] = np.array(location)
        blobs['label'] = np.array(labels)  #.astype(np.float32)

        return blobs