def get_minibatch(self): blobs = {} idx = np.random.choice(len(self.rdata['annotation_train']), self._batch_size) # labels_blob = np.zeros(self._batch_size,np.int32) data = [] visual = [] classeme = [] classeme_s = [] classeme_o = [] visual_s = [] visual_o = [] loc_s = [] loc_o = [] location = [] labels = [] cnt = 0 while cnt < self._batch_size: idx = np.random.choice(len(self.rdata['annotation_train']), 1) anno = self.rdata['annotation_train'][idx[0]] objs = [] im_id = anno.filename.split('.')[0] if im_id not in self.vgg_data: continue classemes = self.vgg_data[im_id]['classemes'] visuals = self.vgg_data[im_id]['visuals'] locations = self.vgg_data[im_id]['locations'] cls_confs = self.vgg_data[im_id]['cls_confs'] w, h = self.meta['train/' + im_id + '/w'][...], self.meta['train/' + im_id + '/h'][...] if hasattr(anno, 'relationship'): if not isinstance(anno.relationship, np.ndarray): anno.relationship = [anno.relationship] for r in xrange(len(anno.relationship)): if not hasattr(anno.relationship[r], 'phrase'): continue predicate = anno.relationship[r].phrase[1] ymin, ymax, xmin, xmax = anno.relationship[r].subBox sub_bbox = [xmin, ymin, xmax, ymax] ymin, ymax, xmin, xmax = anno.relationship[r].objBox obj_bbox = [xmin, ymin, xmax, ymax] overlaps = bbox_overlaps( np.ascontiguousarray([sub_bbox, obj_bbox], dtype=np.float), np.ascontiguousarray(locations, dtype=np.float)) if overlaps.shape[0] == 0: continue try: assignment = overlaps.argmax(axis=1) except: continue sub_sorted = overlaps[0].argsort()[-30:][::-1] obj_sorted = overlaps[1].argsort()[-30:][::-1] while len(sub_sorted) > 0 and overlaps[0][sub_sorted[-1]] < .7: sub_sorted = sub_sorted[:-1] while len(obj_sorted) > 0 and overlaps[1][obj_sorted[-1]] < .7: obj_sorted = obj_sorted[:-1] if len(sub_sorted) <= 0 or len(obj_sorted) <= 0: continue sub_idx = np.random.choice(len(sub_sorted), 1) obj_idx = np.random.choice(len(obj_sorted), 1) for s in sub_sorted[:1]: # sub_idx: for o in obj_sorted[:1]: # obj_idx: if s != o and cnt < self._batch_size: sub_visual = visuals[s] obj_visual = visuals[o] sub_clsmemes = classemes[s] obj_clsmemes = classemes[o] sub_box_encoded = bbox_transform(np.array([locations[o]]), np.array([locations[s]]))[0] obj_box_encoded = bbox_transform(np.array([locations[s]]), np.array([locations[o]]))[0] #sub_box_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([locations[s]]))[0] #obj_box_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([locations[o]]))[0] relation = self.meta['meta/pre/name2idx/' + predicate][...] labels.append(np.float32(relation)) classeme_s.append(sub_clsmemes) classeme_o.append(obj_clsmemes) visual_s.append(sub_visual) visual_o.append(obj_visual) loc_s.append(sub_box_encoded) loc_o.append(obj_box_encoded) #visual.append(np.hstack((sub_visual, obj_visual))) #classeme.append(np.hstack((sub_clsmemes, obj_clsmemes))) location.append(sub_box_encoded) cnt += 1 if cnt >= self._batch_size: break # bbox_transform() # blobs['visual'] = np.array(visual) blobs['classeme_s'] = np.array(classeme_s) blobs['classeme_o'] = np.array(classeme_o) blobs['visual_s'] = np.array(visual_s) blobs['visual_o'] = np.array(visual_o) blobs['location_s'] = np.array(loc_s) blobs['location_o'] = np.array(loc_o) # blobs['classeme'] = np.array(classeme) # blobs['location'] = np.array(location) blobs['label'] = np.array(labels) return blobs
def forward(self, bottom, top): # prep incoming data========== rpn_boxes = bottom[0].data.copy() bbox_pred = bottom[1].data scores = bottom[2].data im_info = bottom[3].data[0] im_idx = int(bottom[4].data) im_data = bottom[5].data[0, :, :, :].transpose((1, 2, 0)).copy() m = self.meta im_id = self._image_id[im_idx] r_anno = self.r_anno[im_id] # prep done============ # prep blobs for forward blobs = {} s_classeme = [] s_rois = [] s_rois_encoded = [] o_classeme = [] o_rois = [] o_rois_encoded = [] relation_label = [] gt_boxes = [] if hasattr(r_anno, 'relationship'): rpn_boxes_img_coor = rpn_boxes[:, 1:5] / im_info[2] boxes = rpn_boxes_img_coor boxes = bbox_transform_inv(boxes, bbox_pred) boxes = clip_boxes( boxes, (im_info[0] / im_info[2], im_info[1] / im_info[2])) cv2.normalize(im_data, im_data, 255, 0, cv2.NORM_MINMAX) im_data = im_data.astype(np.uint8) origsz = (im_info[1] / im_info[2], im_info[0] / im_info[2]) im_data = cv2.resize(im_data, origsz) thresh_final = .5 res_locations = [] res_classemes = [] res_cls_confs = [] boxes_tosort = [] for j in xrange(1, 101): inds = np.where(scores[:, j] > .3)[0] cls_scores = scores[inds, j] cls_boxes = boxes[inds, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], inds[:, np.newaxis])) \ .astype(np.float32, copy=False) # pred_boxes = clip_boxes(pred_boxes, im.shape) if len(cls_scores) <= 0: boxes_tosort.append(cls_dets) continue res_loc = np.hstack((cls_boxes, inds[:, np.newaxis])) res_classeme = scores[inds] res_cls_conf = np.column_stack( (np.zeros(cls_scores.shape[0]) + j, cls_scores)) keep = nms(cls_dets[:, :5], .3) # nms threshold cls_dets = cls_dets[keep, :] res_loc = res_loc[keep] res_classeme = res_classeme[keep] res_cls_conf = res_cls_conf[keep] res_classemes.extend(res_classeme) res_locations.extend(res_loc) res_cls_confs.extend(res_cls_conf) boxes_tosort.append(cls_dets) try: # final class confidence inds = np.where( np.array(res_cls_confs)[:, 1] > thresh_final)[0] classemes = np.array(res_classemes)[inds] locations = np.array(res_locations)[inds] cls_confs = np.array(res_cls_confs)[inds] # decide what to pass to top # limit max w, h = self.meta['train/' + im_id + '/w'][...], self.meta['train/' + im_id + '/h'][...] if not isinstance(r_anno.relationship, np.ndarray): r_anno.relationship = [r_anno.relationship] for r in xrange(len(r_anno.relationship)): if not hasattr(r_anno.relationship[r], 'phrase'): continue predicate = r_anno.relationship[r].phrase[1] ymin, ymax, xmin, xmax = r_anno.relationship[r].subBox sub_bbox = [xmin, ymin, xmax, ymax] gt_boxes.append(sub_bbox) ymin, ymax, xmin, xmax = r_anno.relationship[r].objBox obj_bbox = [xmin, ymin, xmax, ymax] gt_boxes.append(obj_bbox) overlaps = bbox_overlaps( np.ascontiguousarray([sub_bbox, obj_bbox], dtype=np.float), np.ascontiguousarray(locations, dtype=np.float)) if overlaps.shape[0] == 0: continue sub_sorted = overlaps[0].argsort()[-40:][::-1] obj_sorted = overlaps[1].argsort()[-40:][::-1] while len(sub_sorted) > 0 and overlaps[0][ sub_sorted[-1]] < .6: sub_sorted = sub_sorted[:-1] while len(obj_sorted) > 0 and overlaps[1][ obj_sorted[-1]] < .6: obj_sorted = obj_sorted[:-1] if len(sub_sorted) <= 0 or len(obj_sorted) <= 0: continue cnt = 0 for s in sub_sorted[:1]: # sub_idx: for o in obj_sorted[:1]: # obj_idx: if s != o and cnt < 20: sub_clsmemes = classemes[s] obj_clsmemes = classemes[o] sub_box_encoded = bbox_transform( np.array([[0, 0, w, h]]), np.array([locations[s]]))[0] obj_box_encoded = bbox_transform( np.array([[0, 0, w, h]]), np.array([locations[o]]))[0] relation = self.meta['meta/pre/name2idx/' + predicate][...] # all done, now we put forward s_classeme.append(sub_clsmemes) o_classeme.append(obj_clsmemes) s_rois.append(rpn_boxes[locations[s][-1]]) o_rois.append(rpn_boxes[locations[o][-1]]) s_rois_encoded.append(sub_box_encoded) o_rois_encoded.append(obj_box_encoded) relation_label.append(np.float32(relation)) cnt += 1 # final step copy all the stuff for forward blobs['s_classeme'] = np.array(s_classeme) blobs['o_classeme'] = np.array(o_classeme) blobs['s_rois'] = np.array(s_rois) blobs['o_rois'] = np.array(o_rois) blobs['s_rois_encoded'] = np.array(s_rois_encoded) blobs['o_rois_encoded'] = np.array(o_rois_encoded) blobs['relation_label'] = np.array(relation_label) except: blobs = self._prev_blob if blobs['s_classeme'].shape[0] == 0: blobs = self._prev_blob else: blobs = self._prev_blob visualize_gt(im_data, gt_boxes) visualize(im_data, boxes_tosort, rpn_boxes_img_coor, m, thresh_final) for blob_name, blob in blobs.iteritems(): top_ind = self._name_to_top_map[blob_name] # Reshape net's input blobs top[top_ind].reshape(*(blob.shape)) # Copy data into net's input blobs top[top_ind].data[...] = blob.astype(np.float32, copy=False) # this becomes a dummy for forward in case things fail if blobs['relation_label'][0] != -1: for blob_name, blob in blobs.iteritems(): blobs[blob_name] = blob[0, np.newaxis] if blob_name == 'relation_label': blobs[blob_name][...] = -1 self._prev_blob = blobs
def get_minibatch(self): blobs = {} data = [] visual = [] classeme = [] classeme_s = [] classeme_o = [] visual_s = [] visual_o = [] loc_s = [] loc_o = [] location = [] labels = [] cnt = 0 while cnt < self._batch_size: if self.imidx >= len(self.imids): random.shuffle(self.imids) self.imidx = 0 imid = self.imids[self.imidx] self.imidx += 1 gt_rlp_labels = self.gt_labels[imid]['rlp_labels'] gt_sub_boxes = self.gt_labels[imid]['sub_boxes'] gt_obj_boxes = self.gt_labels[imid]['obj_boxes'] classemes = self.vgg_data[imid]['classemes'] visuals = self.vgg_data[imid]['visuals'] locations = self.vgg_data[imid]['locations'] cls_confs = self.vgg_data[imid]['cls_confs'] for i in xrange(gt_rlp_labels.shape[0]): gt_rlp_label = gt_rlp_labels[i] gt_sub_box = gt_sub_boxes[i] gt_obj_box = gt_obj_boxes[i] overlaps = bbox_overlaps(np.array([gt_sub_box, gt_obj_box]), locations.astype(np.float)) if overlaps.shape[0] == 0: continue sub_sorted = overlaps[0].argsort()[-30:][::-1] obj_sorted = overlaps[1].argsort()[-30:][::-1] while len(sub_sorted) > 0 and overlaps[0][sub_sorted[-1]] < .7: sub_sorted = sub_sorted[:-1] while len(obj_sorted) > 0 and overlaps[1][obj_sorted[-1]] < .7: obj_sorted = obj_sorted[:-1] if len(sub_sorted) <= 0 or len(obj_sorted) <= 0: continue for s in sub_sorted[:1]: for o in obj_sorted[:1]: if s != o and cnt < self._batch_size: sub_visual = visuals[s] obj_visual = visuals[o] sub_clsmemes = classemes[s] obj_clsmemes = classemes[o] sub_box_encoded = bbox_transform( np.array([locations[o]]), np.array([locations[s]]))[0] obj_box_encoded = bbox_transform( np.array([locations[s]]), np.array([locations[o]]))[0] pre_lbl = gt_rlp_label[1] labels.append(np.float32(pre_lbl)) classeme_s.append(sub_clsmemes) classeme_o.append(obj_clsmemes) visual_s.append(sub_visual) visual_o.append(obj_visual) loc_s.append(sub_box_encoded) loc_o.append(obj_box_encoded) visual.append(np.hstack((sub_visual, obj_visual))) classeme.append( np.hstack((sub_clsmemes, obj_clsmemes))) location.append( np.hstack((sub_box_encoded, obj_box_encoded))) cnt += 1 if cnt >= self._batch_size: break #print visual # blobs['classeme'] = np.array(classeme) blobs['visual'] = np.array(visual) #.astype(np.float32) # blobs['location'] = np.array(location) blobs['label'] = np.array(labels) #.astype(np.float32) return blobs