def bbox(self, img): rects = [] dlib.find_candidate_object_locations(img, rects, min_size=500) bbox = np.array([[r.left(), r.top(), r.right(), r.bottom()] for r in rects]) return bbox
def selective_search_dlib(img, max_img_size=(500, 500), kvals=(50, 200, 2), min_size=2200, check=True): org_h, org_w = img.shape[0:2] # Resize the image for speed up img, resize_scale = scale_down_image(img, max_img_size) # dlib' selective search # http://dlib.net/dlib/image_transforms/segment_image_abstract.h.html#find_candidate_object_locations drects = [] dlib.find_candidate_object_locations(img, drects, kvals=kvals, min_size=min_size) rects = [ (int(drect.left() * resize_scale), int(drect.top() * resize_scale), int(drect.width() * resize_scale), int(drect.height() * resize_scale)) for drect in drects ] # Check the validness of the rectangles if check: if len(rects) == 0: print('No selective search rectangle (Please tune the parameters)') for rect in rects: x, y = rect[0], rect[1] w, h = rect[2], rect[3] x2, y2 = x + w, y + h if x < 0 or y < 0 or org_w < x2 or org_h < y2 or w <= 0 or h <= 0: print('Invalid selective search rectangle, rect:{}, image:{}'. format(rect, (org_h, org_w))) return rects
def demo(net, im, scale_factor, classes): """Detect object classes in an image using pre-computed object proposals.""" im2 = cv2.resize(im, (0,0), fx=1.0/scale_factor, fy=1.0/scale_factor) obj_proposals_in = [] dlib.find_candidate_object_locations(im2, obj_proposals_in, min_size=70) obj_proposals = np.empty((len(obj_proposals_in),4)) for idx in range(len(obj_proposals_in)): obj_proposals[idx] = [obj_proposals_in[idx].left(), obj_proposals_in[idx].top(), obj_proposals_in[idx].right(), obj_proposals_in[idx].bottom()] # Detect all object classes and regress object bounds scores, boxes = im_detect(net, im2, obj_proposals) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls in classes: cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] return [im2, cls, dets, CONF_THRESH]
def dlib_selective_search(orig_img, img_scale, min_size, dedub_boxes=1. / 16): rects = [] dlib.find_candidate_object_locations(orig_img, rects, min_size=min_size) # proposals = [] # for key, val in enumerate(rects): # # templist = [val.left(), val.top(), val.right(), val.bottom()] # templist = [val.top(). val.left(), val.bottom(), val.right()] # proposals.append(templist) # proposals = np.array(proposals) # 0 maybe used for bg, no quite sure rects = [[ 0., d.left() * img_scale * dedub_boxes, d.top() * img_scale * dedub_boxes, d.right() * img_scale * dedub_boxes, d.bottom() * img_scale * dedub_boxes ] for d in rects] # bbox pre-processing # rects *= img_scale v = np.array([1, 1e3, 1e6, 1e9, 1e12]) # hashes = np.round(rects * dedub_boxes).dot(v) hashes = np.round(rects).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) rects = np.array(rects)[index, :] return rects
def _propose_regions(self, im, kvals, min_size): regions = [] dlib.find_candidate_object_locations(im, regions, kvals, min_size) return [ BoundingBox(r.left(), r.top(), r.right(), r.bottom()) for r in regions ]
def on_get(self, req, resp, name): t1 = time() names = name.split("&") prototxt = os.path.join(cfg.ROOT_DIR, 'models/CaffeNet/test.prototxt') caffemodel = os.path.join( cfg.ROOT_DIR, 'data/fast_rcnn_models/' 'caffenet_fast_rcnn_iter_40000.caffemodel') if not os.path.isfile(caffemodel): raise falcon.HTTPPreconditionFailed("Error", "Caffe model not found") caffe.set_mode_cpu() net = caffe.Net(prototxt, caffemodel, caffe.TEST) classes = read_synset(self.storage_path) ext = os.path.splitext(names[0])[1][1:] image_path = os.path.join(self.storage_path, names[0]) im = cv2.imread(image_path) rects = [] dlib.find_candidate_object_locations(im, rects, min_size=np.size(im, 1)) obj_proposals = np.empty((len(rects), 4), dtype=int) for k, d in enumerate(rects): obj_proposals[k] = [d.left(), d.top(), d.right(), d.bottom()] scores, boxes = im_detect(net, im, obj_proposals) CONF_THRESH = 0.9 NMS_THRESH = 0.3 for cls in classes: if str(cls).startswith('__background__'): continue cls_ind = CLASSES.index(cls) cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] keep = np.where(cls_scores >= CONF_THRESH)[0] cls_boxes = cls_boxes[keep, :] cls_scores = cls_scores[keep] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH) result_image_path = os.path.join(self.storage_path, "{0}.{1}".format(_generate_id(), ext)) if 'image/jpeg' == mimetypes.guess_type(image_path, strict=False): cv2.imwrite(result_image_path, im, [int(cv2.IMWRITE_JPEG_QUALITY), 100]) else: cv2.imwrite(result_image_path, im) t2 = time() - t1 data_size = os.path.getsize(image_path) data_size += os.path.getsize(result_image_path) data_size /= 1024.0 * 1024.0 cost = estimate_cost.estimateCost(data_size, t2, data_size) resp.status = falcon.HTTP_200 # OK resp.body = os.path.split(result_image_path)[1] + '&' + str( os.path.getsize(result_image_path) / 1024) + '&' + str( t2 * 1000.0) + '&' + "{:2.5f}".format(cost)
def selective_search(self,image): ''' Uses dlib built in find_candidate_object_location function to give face location proposals ''' rects = [] dlib.find_candidate_object_locations(image, rects, min_size = 500) return rects
def get_selective_search(imgdir, name, imgtype): img = io.imread(imgdir+'/'+name+'.'+imgtype) rects = [] dlib.find_candidate_object_locations(img,rects,min_size=0) boxes = [] for key,value in enumerate(rects): elem = [value.top()+1,value.left()+1,value.bottom()+1,value.right()+1] boxes.append(elem) boxes = np.array(boxes) return boxes
def get_obj_proposals(image_path): img = cv2.imread(image_path) rects = [] dlib.find_candidate_object_locations(img, rects, min_size=500,kvals=(100,100,1)) #dlib.find_candidate_object_locations(img, rects, min_size=500) #default activation #convert to fast-rcnn format boxes = np.zeros((0,4),dtype=np.float) for r in rects: boxes = np.vstack((boxes,np.array([r.left(),r.top(),r.right(),r.bottom()],dtype=np.float))) return boxes
def run_dlib_selective_search(image_name): img = io.imread(image_name) rects = [] dlib.find_candidate_object_locations(img, rects, min_size=1000) proposals = [] for k, d in enumerate(rects): temp_list = [d.top(), d.left(), d.bottom(), d.right()] proposals.append(temp_list) proposals = np.array(proposals) return proposals
def run_dlib_selective_search(image_name): img = io.imread(image_name) rects = [] dlib.find_candidate_object_locations(img,rects,min_size=400) proposals = [] for k,d in enumerate(rects): templist = [d.left(),d.top(),d.right(),d.bottom()] proposals.append(templist) proposals = np.array(proposals) return proposals
def run_dlib_selective_search_written_to_file(image_name,filename): file = open(filename,'w') file.write('filename'+','+'xmin'+','+'ymin'+','+'xmax'+','+'ymax'+'\n') print image_name img = io.imread(image_name) rects = [] dlib.find_candidate_object_locations(img,rects,min_size=300) proposals = [] for k,d in enumerate(rects): file.write(image_name+','+str(d.left())+','+str(d.top())+','+str(d.right())+','+str(d.bottom())+'\n')
def get_obj_proposals(image_path): img = cv2.imread(image_path) rects = [] dlib.find_candidate_object_locations(img, rects, min_size=500,kvals=(100,100,1)) #dlib.find_candidate_object_locations(img, rects, min_size=500), default activation #convert to fast-rcnn format boxes = np.zeros((0,4),dtype=np.float) for r in rects: boxes = np.vstack((boxes,np.array([r.left(),r.top(),r.right(),r.bottom()],dtype=np.float))) return boxes
def get_obj_proposals(bgr_img): import dlib rects = [] rgb_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2RGB) dlib.find_candidate_object_locations(rgb_img, rects, min_size=20) proposals = np.zeros((len(rects), 4)) for i, r in enumerate(rects): # xmin ymin xmax ymax proposals[i] = [r.left(), r.top(), r.right(), r.bottom()] proposals = proposals.astype(np.float32) return proposals
def run_dlib_selective_search(image_name): img = io.imread(image_name) rects = [] dlib.find_candidate_object_locations(img,rects,min_size=min_building_size) proposals = [] for k,d in enumerate(rects): # templist = [d.left(),d.top(),d.right(),d.bottom()] # Matlab's odd format [top, left, bottom, right], 1-based index templist = [d.top(),d.left(),d.bottom(),d.right()] proposals.append(templist) proposals = np.array(proposals) return proposals
def get_region_proposals(img, kvals=(50, 200, 3), min_size=20, max_merging_iterations=50): rects = [] dlib.find_candidate_object_locations( img, rects, kvals, min_size, max_merging_iterations) rois = [] for r in rects: x1, y1, x2, y2 = r.left(), r.top(), r.right(), r.bottom() roi_h, roi_w = y2 - y1, x2 - x1 if roi_h * roi_w > 0: rois.append((x1, y1, x2, y2)) rois = np.array(rois, dtype=np.int32) return rois
def get_bboxes(orig_img, im_scale, min_size=500, dedup_boxes=1. / 16): img = cv2.resize(orig_img, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) rects = [] dlib.find_candidate_object_locations(img, rects, min_size=min_size) rects = [[0, d.left(), d.top(), d.right(), d.bottom()] for d in rects] rects = np.asarray(rects, dtype=np.float32) # bbox pre-processing v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(rects * dedup_boxes).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) rects = rects[index, :] return rects
def selective_search(self,img,min_size=(2200),max_img_size=(24,24),debug=False): cand_rects = [] img,scale = self.resize_down_image(img,max_img_size) dlib.find_candidate_object_locations(img,cand_rects,min_size=min_size) rects = [(int(crect.left() * scale), int(crect.top() * scale), int(crect.right() * scale), int(crect.bottom() * scale), ) for crect in cand_rects] for rect in rects: cv2.rectangle(img,(rect[0],rect[1]),(rect[2],rect[3]),(255,0,0),2) cv2.imshow("Image",img) cv2.waitKey(0) cv2.destroyAllWindows()
def get_selective_search_boxes(imgdir, name): filepath = imgdir+'/'+name if os.path.exists(filepath)==False: print '[ERROR]: %s is not exist!'%(filepath) return None img = io.imread(filepath) rects = [] dlib.find_candidate_object_locations(img,rects,min_size=0) boxes = [] for key,value in enumerate(rects): elem = [value.left()+1, value.top()+1, value.right()+1, value.bottom()+1] boxes.append(elem) boxes = np.array(boxes) return boxes
def findSelectiveSearchRois(img, kvals, minSize, max_merging_iterations, nmsThreshold): tmp = [] dlib.find_candidate_object_locations(imconvertCv2Ski(img), tmp, kvals, minSize, max_merging_iterations) rois = [[d.left(), d.top(), d.right(), d.bottom()] for d in tmp] if nmsThreshold != None: assert(nmsThreshold > 0 and nmsThreshold < 1) dets = [ToFloats(r) + [abs((r[2] - r[0]) * (r[3] - r[1]))] for r in rois] keepInds = nmsPython(np.array(dets), nmsThreshold) #print("findSelectiveSearchRois using nms threshold: {}: before nms nrRois={}, after nms nrRois={}".format(nmsThreshold, len(rois), len(keepInds))) #groupedRectangles, weights = cv2.groupRectangles(np.asanyarray(rectsInput, np.float).tolist(), 1, 0.3) rois = [rois[i] for i in keepInds] random.shuffle(rois) # randomize ROI order to not introduce any unintended effects later return rois
def run_dlib_selective_search(im): #img = io.imread(image_name) #Color BGR to RGB img = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) rects = [] dlib.find_candidate_object_locations(img, rects, min_size=1000) proposals = [] for k,d in enumerate(rects): templist = [d.left(), d.top(), d.right(), d.bottom()] proposals.append(templist) proposals = np.array(proposals) return proposals
def selective_search(img, w, h, ground_truth): rects = [] dlib.find_candidate_object_locations(img, rects, min_size=500) filter_positive_rects = [] filter_negative_rects = [] for rect in rects: iou = find_iou(ground_truth, (rect.left(), rect.top(), rect.right(), rect.bottom())) if iou > 0.5: filter_positive_rects.append([rect.top() / h, rect.left() / w, rect.bottom() / h, rect.right() / w]) elif iou < 0.35: filter_negative_rects.append([rect.top() / h, rect.left() / w, rect.bottom() / h, rect.right() / w]) return filter_positive_rects, filter_negative_rects
def get_object_proposals(self, image): #Use Dlib as Selective_Search if self._bgr is True: #Color BGR to RGB img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) rects = [] dlib.find_candidate_object_locations(img, rects, min_size=250) proposals = [] for k,d in enumerate(rects): templist = [d.left(), d.top(), d.right(), d.bottom()] #print 'Object Proposal Rect: {}'.format(templist) proposals.append(templist) proposals = np.array(proposals) return proposals
def get_bboxes(orig_img, im_scale, min_size, dedup_boxes=1. / 16): rects = [] dlib.find_candidate_object_locations(orig_img, rects, min_size=min_size) rects = [[0, d.left(), d.top(), d.right(), d.bottom()] for d in rects] rects = np.asarray(rects, dtype=np.float32) # bbox pre-processing rects *= im_scale v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(rects * dedup_boxes).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) rects = rects[index, :] return rects
def _apply(self, img_msg): bridge = cv_bridge.CvBridge() img = bridge.imgmsg_to_cv2(img_msg, desired_encoding='bgr8') rects = [] dlib.find_candidate_object_locations(img, rects, min_size=self.min_size) ros_rect_array = RectArray() ros_rect_array.header = img_msg.header for d in rects: if (d.right() - d.left()) * (d.bottom() - d.top()) > self.max_size: continue cv2.rectangle(img, (d.left(), d.top()), (d.right(), d.bottom()), (255, 0, 0), 3) ros_rect_array.rects.append(Rect(x=d.left(), y=d.top(), width=d.right() - d.left(), height=d.bottom() - d.top())) imgmsg = bridge.cv2_to_imgmsg(img, encoding='bgr8') self.debug_pub_.publish(imgmsg) self.pub_.publish(ros_rect_array)
def selective_search_dlib(img, max_img_size=(500, 500), kvals=(50, 200, 2), min_size=2200, check=True, debug_window=True): if debug_window: org_img = img org_h, org_w = img.shape[0:2] # Resize the image for speed up img, resize_scale = _scale_down_image(img, max_img_size) # Selective search drects = [] dlib.find_candidate_object_locations(img, drects, kvals=kvals, min_size=min_size) rects = [ (int(drect.left() * resize_scale), int(drect.top() * resize_scale), int(drect.width() * resize_scale), int(drect.height() * resize_scale)) for drect in drects ] # Check the validness of the rectangles if check: if len(rects) == 0: logger.error('No selective search rectangle ' '(Please tune the parameters)') for rect in rects: x, y = rect[0], rect[1] w, h = rect[2], rect[3] x2, y2 = x + w, y + h if x < 0 or y < 0 or org_w < x2 or org_h < y2 or w <= 0 or h <= 0: logger.error('Invalid selective search rectangle, rect:{}, ' 'image:{}'.format(rect, (org_h, org_w))) # Debug window if debug_window: for rect in rects: p1 = (rect[0], rect[1]) p2 = (rect[0] + rect[2], rect[1] + rect[3]) cv2.rectangle(org_img, p1, p2, (0, 255, 0)) cv2.imshow('selective_search_dlib', org_img) cv2.waitKey(0) return rects
def get_bboxes(orig_img, im_scale, min_size, dedup_boxes=1./16): rects=[] dlib.find_candidate_object_locations(orig_img, rects, min_size=min_size) rects = [[0, d.left(), d.top(), d.right(), d.bottom()] for d in rects] rects = np.asarray(rects, dtype=np.float32) # bbox pre-processing rects *= im_scale print (rects) v = np.array([1, 1e3, 1e6, 1e9, 1e12]) hashes = np.round(rects * dedup_boxes).dot(v) _, index, inv_index = np.unique(hashes, return_index=True, return_inverse=True) rects = rects[index, :] return rects
def _apply(self, img_msg): bridge = cv_bridge.CvBridge() img_bgr = bridge.imgmsg_to_cv2(img_msg, desired_encoding='bgr8') img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) rects = [] dlib.find_candidate_object_locations(img_rgb, rects, min_size=self.min_size) ros_rect_array = RectArray() ros_rect_array.header = img_msg.header for d in rects: if (d.right() - d.left()) * (d.bottom() - d.top()) > self.max_size: continue cv2.rectangle(img_bgr, (d.left(), d.top()), (d.right(), d.bottom()), (255, 0, 0), 3) ros_rect_array.rects.append(Rect(x=d.left(), y=d.top(), width=d.right() - d.left(), height=d.bottom() - d.top())) imgmsg = bridge.cv2_to_imgmsg(img_bgr, encoding='bgr8') self.debug_pub_.publish(imgmsg) self.pub_.publish(ros_rect_array)
def perform_selective_search(img,ground_truth): rects=[] dlib.find_candidate_object_locations(img, rects, min_size=500) filter_positive_rects=[] filter_negative_rects=[] for rect in rects: iou = calc_2D_IOU(ground_truth,(rect.left(),rect.top(),rect.right(),rect.bottom())) if DEBUG_FLAG: debug_fp_csv.writerow([iou,ground_truth[0],ground_truth[1],ground_truth[2],ground_truth[3],rect.left(),rect.top(),rect.right(),rect.bottom()]) if iou > 0.5: filter_positive_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w]) elif iou < 0.35: filter_negative_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w]) return filter_positive_rects,filter_negative_rects
def perform_selective_search(img, w, h, ground_truth): rects = [] max_size = (500, 500) img = perform_scale_down(img, max_size) dlib.find_candidate_object_locations(img, rects, kvals=(50, 200, 2), min_size=2200) filter_positive_rects = [] filter_negative_rects = [] for rect in rects: iou = calc_2D_IOU( ground_truth, (rect.left(), rect.top(), rect.right(), rect.bottom())) if DEBUG_FLAG: debug_fp_csv.writerow([ iou, ground_truth[0], ground_truth[1], ground_truth[2], ground_truth[3], rect.left(), rect.top(), rect.right(), rect.bottom() ]) if iou > 0.5: filter_positive_rects.append([ rect.top() / h, rect.left() / w, rect.bottom() / h, rect.right() / w ]) elif iou < 0.35: filter_negative_rects.append([ rect.top() / h, rect.left() / w, rect.bottom() / h, rect.right() / w ]) return np.asarray(filter_positive_rects), np.asarray(filter_negative_rects)
def perform_selective_search(img,w,h,ground_truth): rects=[] max_size=(500,500) img = perform_scale_down(img,max_size) dlib.find_candidate_object_locations(img, rects, kvals=(50, 200, 2), min_size=2200) filter_positive_rects=[] filter_negative_rects=[] for rect in rects: iou = calc_2D_IOU(ground_truth,(rect.left(),rect.top(),rect.right(),rect.bottom())) if DEBUG_FLAG: debug_fp_csv.writerow([iou,ground_truth[0],ground_truth[1],ground_truth[2],ground_truth[3],rect.left(),rect.top(),rect.right(),rect.bottom()]) if iou > 0.5: filter_positive_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w]) elif iou < 0.35: filter_negative_rects.append([rect.top()/h,rect.left()/w,rect.bottom()/h,rect.right()/w]) return np.asarray(filter_positive_rects),np.asarray(filter_negative_rects)
def get_selective_search_boxes(imgpath): if os.path.exists(imgpath)==False: print '[ERROR]: %s is not exist!'%(imgpath) return None #img = io.imread(imgpath) img = load_image(imgpath) if img is None: return None #print img #print type(img) #exit() rects = [] dlib.find_candidate_object_locations(img,rects,min_size=0) boxes = [] for key,value in enumerate(rects): elem = [value.left()+1, value.top()+1, value.right()+1, value.bottom()+1] boxes.append(elem) if len(boxes) == 0: return None boxes = np.array(boxes) return boxes
def selective_search(image_in): image = np.array(image_in) rect_list = [] dlib.find_candidate_object_locations( image, rect_list, min_size=5000, max_merging_iterations=200) #5000, 200 windows_list = [] temp = np.empty([len(rect_list), 4]) for ii in range(len(rect_list)): temp[ii][0] = rect_list[ii].top() temp[ii][1] = rect_list[ii].left() temp[ii][2] = rect_list[ii].bottom() temp[ii][3] = rect_list[ii].right() windows_list.append(temp) image_list = [] bbox = [] for rect in windows_list[0]: tmp_img = image[int(rect[1]):int(rect[3]), int(rect[0]):int(rect[2])].copy() w, h, c = tmp_img.shape if w * h > 0: tmp_img = cv2.cvtColor(tmp_img, cv2.COLOR_BGR2RGB) image_list.append(tmp_img) bbox.append(rect) # cv2.imshow("test", tmp_img) # cv2.waitKey(0) roi = [] roi.append(bbox) return image_list, roi
def perform_selective_search(img, ground_truth): rects = [] dlib.find_candidate_object_locations(img, rects, min_size=500) filter_positive_rects = [] filter_negative_rects = [] for rect in rects: iou = calc_2D_IOU( ground_truth, (rect.left(), rect.top(), rect.right(), rect.bottom())) if DEBUG_FLAG: debug_fp_csv.writerow([ iou, ground_truth[0], ground_truth[1], ground_truth[2], ground_truth[3], rect.left(), rect.top(), rect.right(), rect.bottom() ]) if iou > 0.5: filter_positive_rects.append([ rect.top() / h, rect.left() / w, rect.bottom() / h, rect.right() / w ]) elif iou < 0.35: filter_negative_rects.append([ rect.top() / h, rect.left() / w, rect.bottom() / h, rect.right() / w ]) return filter_positive_rects, filter_negative_rects
def demo(net, im, scale_factor, classes): """Detect object classes in an image using pre-computed object proposals.""" #im2 = cv2.resize(im, (0,0), fx=1.0/scale_factor, fy=1.0/scale_factor) obj_proposals_in = [] dlib.find_candidate_object_locations(im, obj_proposals_in, min_size=70) obj_proposals = np.empty((len(obj_proposals_in),4)) for idx in range(len(obj_proposals_in)): obj_proposals[idx] = [obj_proposals_in[idx].left(), obj_proposals_in[idx].top(), obj_proposals_in[idx].right(), obj_proposals_in[idx].bottom()] # Detect all object classes and regress object bounds timer = Timer() timer.tic() scores, boxes = im_detect(net, im, obj_proposals) #scores, boxes = im_detect(net, im) timer.toc() print (('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0])) # Visualize detections for each class CONF_THRESH = 0.8 NMS_THRESH = 0.3 for cls_ind, cls in enumerate(CLASSES[1:]): cls_ind += 1 # because we skipped background cls_boxes = boxes[:, 4*cls_ind:4*(cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] vis_detections(im, cls, dets, thresh=CONF_THRESH) return[im, cls, dets,CONF_THRESH]
# python setup.py install --yes USE_AVX_INSTRUCTIONS # if you have a CPU that supports AVX instructions, since this makes some # things run faster. # # Compiling dlib should work on any operating system so long as you have # CMake and boost-python installed. On Ubuntu, this can be done easily by # running the command: # sudo apt-get install libboost-python-dev cmake # # Also note that this example requires scikit-image which can be installed # via the command: # pip install scikit-image # Or downloaded from http://scikit-image.org/download.html. import dlib from skimage import io image_file = '../examples/faces/2009_004587.jpg' img = io.imread(image_file) # Locations of candidate objects will be saved into rects rects = [] dlib.find_candidate_object_locations(img, rects, min_size=500) print("number of rectangles found {}".format(len(rects))) for k, d in enumerate(rects): print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format( k, d.left(), d.top(), d.right(), d.bottom()))
def perform_selective_search(img, ground_truth, gt2): rects = [] max_size = (500, 500) h = float(img.shape[0]) w = float(img.shape[1]) img, scale = perform_scale_down(img, max_size) dlib.find_candidate_object_locations(img, rects, kvals=(50, 200, 2), min_size=1200) filter_positive_rects = [] filter_negative_rects_hard = [] filter_negative_rects_easy = [] max_negatives = 50 hard_negative_ratio = 0.6 iou_list = [] filter_negative_rects = [] for rect in rects: descaled_top_x = (rect.left() * scale) descaled_top_y = (rect.top() * scale) descaled_bottom_x = (rect.right() * scale) descaled_bottom_y = (rect.bottom() * scale) descaled_width = descaled_bottom_x - descaled_top_x #int(rect.width()*scale) descaled_height = descaled_bottom_y - descaled_top_y #int(rect.height()*scale) descaled_center_x = descaled_top_x + (descaled_width / 2.0) descaled_center_y = descaled_top_y + (descaled_height / 2.0) #iou,a1,a2 = rect_overlap_rate(gt2,(descaled_top_x,descaled_top_y,descaled_width,descaled_height)) iou = calc_2D_IOU(ground_truth, (descaled_top_x, descaled_top_y, descaled_bottom_x, descaled_bottom_y)) iou_list.append(iou) if DEBUG_FLAG: debug_fp_csv.writerow([ iou, ground_truth[0], ground_truth[1], ground_truth[2], ground_truth[3], rect.left(), rect.top(), rect.right(), rect.bottom() ]) if iou > 0.50: if VIS_FLAG: filter_positive_rects.append([ int(descaled_top_x), int(descaled_top_y), int(descaled_bottom_x), int(descaled_bottom_y) ]) else: filter_positive_rects.append([ descaled_top_y / h, descaled_top_x / w, descaled_bottom_y / h, descaled_bottom_x / w, descaled_center_x, descaled_center_y, descaled_width, descaled_height ]) elif iou <= 0.0: if VIS_FLAG: filter_negative_rects.append([ int(descaled_top_x), int(descaled_top_y), int(descaled_bottom_x), int(descaled_bottom_y) ]) else: filter_negative_rects.append([ descaled_top_y / h, descaled_top_x / w, descaled_bottom_y / h, descaled_bottom_x / w, descaled_center_x, descaled_center_y, descaled_width, descaled_height ]) # elif 0.25 <= iou < 0.35: # if VIS_FLAG: # filter_negative_rects_hard.append([int(descaled_top_x),int(descaled_top_y),int(descaled_bottom_x),int(descaled_bottom_y)]) # else: # filter_negative_rects_hard.append([descaled_top_y/h,descaled_top_x/w,descaled_bottom_y/h,descaled_bottom_x/w]) # elif iou < 0.25: # if VIS_FLAG: # filter_negative_rects_easy.append([int(descaled_top_x),int(descaled_top_y),int(descaled_bottom_x),int(descaled_bottom_y)]) # else: # filter_negative_rects_easy.append([descaled_top_y/h,descaled_top_x/w,descaled_bottom_y/h,descaled_bottom_x/w]) # if len(filter_negative_rects_easy) + len(filter_negative_rects_hard) < max_negatives: # filter_negative_rects = np.concatenate([np.asarray(filter_negative_rects_easy),np.asarray(filter_negative_rects_hard)],axis=0) # filter_negative_rects = filter_negative_rects.tolist() # else: # if len(filter_negative_rects_hard) < int(hard_negative_ratio*max_negatives): # index = np.random.choice(np.arange(len(filter_negative_rects_easy)),max_negatives -len(filter_negative_rects_hard) # ,replace=False) # filter_negative_rects_easy = np.asarray(filter_negative_rects_easy)[index,:].tolist() # elif len(filter_negative_rects_easy) < int((1-hard_negative_ratio)*max_negatives): # index = np.random.choice(np.arange(len(filter_negative_rects_hard)),max_negatives -len(filter_negative_rects_easy), # replace=False) # filter_negative_rects_hard = np.asarray(filter_negative_rects_hard)[index,:].tolist() # else: # index = np.random.choice(np.arange(len(filter_negative_rects_hard)),int(hard_negative_ratio*max_negatives),replace=False) # filter_negative_rects_hard = np.asarray(filter_negative_rects_hard)[index,:].tolist() # index = np.random.choice(np.arange(len(filter_negative_rects_easy)),int((1-hard_negative_ratio)*max_negatives),replace=False) # filter_negative_rects_easy = np.asarray(filter_negative_rects_easy)[index,:].tolist() # filter_negative_rects = np.concatenate([np.asarray(filter_negative_rects_easy),np.asarray(filter_negative_rects_hard)],axis=0) # filter_negative_rects = filter_negative_rects.tolist() # Jittering the ground truth gt_top_x1 = ground_truth[0] gt_top_y1 = ground_truth[1] gt_bottom_x2 = ground_truth[2] gt_bottom_y2 = ground_truth[3] gt_w = gt_bottom_x2 - gt_top_x1 gt_h = gt_bottom_y2 - gt_top_y1 w_list = np.arange(-0.5 * gt_w, 0.5 * gt_w, 0.1 * gt_w).tolist() h_list = np.arange(-0.5 * gt_h, 0.5 * gt_h, 0.1 * gt_h).tolist() for w_shift in w_list: for h_shift in h_list: new_x1 = gt_top_x1 + w_shift new_y1 = gt_top_y1 + h_shift new_x2 = gt_bottom_x2 + w_shift new_y2 = gt_bottom_y2 + h_shift if new_x1 < 0.0: new_x1 = 0.0 elif new_x1 > w: new_x1 = w if new_y1 < 0.0: new_y1 = 0.0 elif new_y1 > h: new_y1 = h if new_x2 < 0.0: new_x2 = 0.0 elif new_x2 > w: new_x2 = w if new_y2 < 0.0: new_y2 = 0.0 elif new_y2 > h: new_y2 = h iou = calc_2D_IOU(ground_truth, (new_x1, new_y1, new_x2, new_y2)) if iou > 0.50: if VIS_FLAG: filter_positive_rects.append( [int(new_x1), int(new_y1), int(new_x2), int(new_y2)]) else: descaled_width = new_x2 - new_x1 #int(rect.width()*scale) descaled_height = new_y2 - new_y1 #int(rect.height()*scale) descaled_center_x = new_x1 + (descaled_width / 2.0) descaled_center_y = new_y1 + (descaled_height / 2.0) filter_positive_rects.append([ (new_y1) / h, (new_x1) / w, (new_y2) / h, (new_x2) / w, descaled_center_x, descaled_center_y, descaled_width, descaled_height ]) if VIS_FLAG: return filter_positive_rects, filter_negative_rects else: return np.asarray(filter_positive_rects).astype( np.float32), np.asarray(filter_negative_rects).astype(np.float32)
def test(model, test_set_images, output_path=None, annotations_path=None, print_probs=False, bbox_model=None): """ Given a model and a test set, perform inference and save results. Parameters ---------- model : Keras model Trained model to be used for inference test_set_images : string Path to test set images output_path : string Path to save images with bounding boxes annotations_path : string Path to ground truth annotations to compute mAP print_probs : boolean Boolean to define if the probability should be printed along with the bounding box bbox_model : Kera model Trained regression model for bounding box regression Returns ------- None """ # Make output directory if required if (output_path is not None) and (not os.path.exists(output_path)): os.makedirs(output_path) all_regions = dict() mAP = 0 mAP_count = 0 # Iterate over all test images for idx, f in enumerate(os.listdir(test_set_images)): # Filter out non image files ext = f.split('.')[-1] if ext.lower() not in ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff']: print "Not an Image: " + f continue print "Image: " + f # Check if this image has already been processed if output_path is not None: img_out_path = os.path.join(output_path, f + ".png") if os.path.exists(img_out_path): print "Already processed." continue # Load actual image im = Image.open(os.path.join(test_set_images, f)) img = np.array(im) img = normalize_image(img) # Perform Selective Search all_regions[f] = [] dlib.find_candidate_object_locations(img, all_regions[f], min_size=500) print len(all_regions[f]) candidates = list() for i, r in enumerate(all_regions[f]): candidates.append([r.left(), r.top(), r.right(), r.bottom()]) if i > 2000: # Hard limit number of proposals break # Extract all proposed sub-images candidates = list(candidates) X_test = np.zeros((len(candidates), 128, 128, 3)) for idx, bbox in enumerate(candidates): bounding_box = bbox cropped_im = im.crop(bounding_box) cropped_im = cropped_im.resize((128, 128), resample=Image.BILINEAR) X_test[idx, :, :, :] = normalize_image(np.array(cropped_im)) # Perform inference on extracted proposals X_test = X_test.astype('float32') X_test /= 255 y_pred = model.predict(X_test, batch_size=128, verbose=True) if bbox_model is not None: y_bbox_pred = bbox_model.predict(X_test, batch_size=128, verbose=True) # Extract positive proposals accepted_bboxes = [] accepted_bboxes_probs = [] if bbox_model is not None: accepted_bboxes_reg = [] for i in xrange(y_pred.shape[0]): if y_pred[i, 1] > 0.5: accepted_bboxes.append(candidates[i]) accepted_bboxes_probs.append(y_pred[i, 1]) if bbox_model is not None: accepted_bboxes_reg.append(y_bbox_pred[i]) # Perform NMS to reduce number of proposals if len(accepted_bboxes) == 0: final_bboxes = [] accepted_bboxes = np.array(accepted_bboxes) final_bboxes = np.array(final_bboxes) if bbox_model is not None: final_bboxes_reg = np.array(accepted_bboxes_reg) else: accepted_bboxes = np.stack(accepted_bboxes, axis=0) accepted_bboxes_probs = np.array(accepted_bboxes_probs) if bbox_model is not None: accepted_bboxes_reg = np.stack(accepted_bboxes_reg, axis=0) accepted_bboxes += accepted_bboxes_reg.astype(np.int) print accepted_bboxes.shape, accepted_bboxes_probs.shape final_bboxes, final_probs, idx = non_max_suppression_fast( accepted_bboxes, accepted_bboxes_probs, 0.3) filtered_idx = np.where(final_probs > 0.75)[0] final_bboxes = final_bboxes[filtered_idx] final_probs = final_probs[filtered_idx] areas = (final_bboxes[:, 2] - final_bboxes[:, 0]) * ( final_bboxes[:, 3] - final_bboxes[:, 1]) final_bboxes, final_probs, idx = non_max_suppression_fast( final_bboxes, final_probs, 0.5, areas=areas) # Print stats print "Total candidates: ", y_pred.shape[0] print "Positive candidates: ", accepted_bboxes.shape[0] print "Final candidates: ", final_bboxes.shape[0] # If annotations are available, compute mAP if annotations_path is not None: img_base = f.split('.')[0] gt_bboxes = get_bounding_boxes(img_base, 'cat', data_path=annotations_path) for gt_bbox in gt_bboxes: tp = 0 fp = 0 ious = [] for pred_bbox_idx in xrange(final_bboxes.shape[0]): ious.append(IoU(gt_bbox, final_bboxes[pred_bbox_idx])) ious = sorted(ious) if len(ious) > 0: if ious[-1] > 0.5: tp += 1 fp = len(ious) - 1 else: fp = len(ious) if (tp + fp) != 0: mAP += float(tp) / float(tp + fp) mAP_count += 1 # If output path is available, write image with bboxes to file if output_path is not None: im = Image.open(os.path.join(test_set_images, f)) width, height = im.size line_width = int(max(im.size) * 0.005) draw = ImageDraw.Draw(im) for i in xrange(final_bboxes.shape[0]): # Handle potential out of bounds because of regression curr_bbox = final_bboxes[i] curr_bbox[0] = max(min(curr_bbox[0], width), 0) curr_bbox[1] = max(min(curr_bbox[1], height), 0) curr_bbox[2] = max(min(curr_bbox[2], width), 0) curr_bbox[3] = max(min(curr_bbox[3], height), 0) draw.line(rect2lines(curr_bbox), fill="green", width=line_width) if print_probs: draw.text((curr_bbox[0], curr_bbox[1]), str(final_probs[i])) del draw im.save(img_out_path, "PNG") # Print bounding boxes to stdout for bbox_idx in xrange(final_bboxes.shape[0]): print "Bbox %d:" % (bbox_idx + 1), final_bboxes[bbox_idx] # Print final mAP score if (annotations_path is not None) and (mAP_count > 0): print "mAP score: %0.2f" % (float(mAP) / mAP_count)
import dlib from skimage import io import cv2 from dip import selective_search parser = argparse.ArgumentParser() parser.add_argument('filename') args = parser.parse_args() image_file = args.filename img = io.imread(image_file) # Locations of candidate objects will be saved into rects rects = [] dlib.find_candidate_object_locations( img, rects, kvals=(50, 200, 3), min_size=20, max_merging_iterations=50, ) dst = selective_search.draw_rects(img, rects) decomposed = selective_search.decompose(img, rects, num=3) for img in [dst] + decomposed: io.imshow(img) io.show()
def perform_selective_search(img,ground_truth,gt2): rects=[] max_size = (500,500) h = float(img.shape[0]) w = float(img.shape[1]) img,scale = perform_scale_down(img,max_size) dlib.find_candidate_object_locations(img, rects, kvals=(50, 200, 2), min_size=1200) filter_positive_rects=[] filter_negative_rects_hard=[] filter_negative_rects_easy=[] max_negatives = 50 hard_negative_ratio = 0.6 iou_list = [] filter_negative_rects=[] for rect in rects: descaled_top_x = (rect.left()*scale) descaled_top_y = (rect.top()*scale) descaled_bottom_x = (rect.right()*scale) descaled_bottom_y = (rect.bottom()*scale) descaled_width = descaled_bottom_x - descaled_top_x#int(rect.width()*scale) descaled_height = descaled_bottom_y - descaled_top_y #int(rect.height()*scale) descaled_center_x = descaled_top_x + (descaled_width/2.0) descaled_center_y = descaled_top_y + (descaled_height/2.0) #iou,a1,a2 = rect_overlap_rate(gt2,(descaled_top_x,descaled_top_y,descaled_width,descaled_height)) iou = calc_2D_IOU(ground_truth,(descaled_top_x,descaled_top_y,descaled_bottom_x,descaled_bottom_y)) iou_list.append(iou) if DEBUG_FLAG: debug_fp_csv.writerow([iou,ground_truth[0],ground_truth[1],ground_truth[2],ground_truth[3],rect.left(),rect.top(),rect.right(),rect.bottom()]) if iou > 0.50: if VIS_FLAG: filter_positive_rects.append([int(descaled_top_x),int(descaled_top_y),int(descaled_bottom_x),int(descaled_bottom_y)]) else: filter_positive_rects.append([descaled_top_y/h,descaled_top_x/w,descaled_bottom_y/h,descaled_bottom_x/w, descaled_center_x,descaled_center_y,descaled_width,descaled_height]) elif iou <= 0.0: if VIS_FLAG: filter_negative_rects.append([int(descaled_top_x),int(descaled_top_y),int(descaled_bottom_x),int(descaled_bottom_y)]) else: filter_negative_rects.append([descaled_top_y/h,descaled_top_x/w,descaled_bottom_y/h,descaled_bottom_x/w, descaled_center_x,descaled_center_y,descaled_width,descaled_height]) # elif 0.25 <= iou < 0.35: # if VIS_FLAG: # filter_negative_rects_hard.append([int(descaled_top_x),int(descaled_top_y),int(descaled_bottom_x),int(descaled_bottom_y)]) # else: # filter_negative_rects_hard.append([descaled_top_y/h,descaled_top_x/w,descaled_bottom_y/h,descaled_bottom_x/w]) # elif iou < 0.25: # if VIS_FLAG: # filter_negative_rects_easy.append([int(descaled_top_x),int(descaled_top_y),int(descaled_bottom_x),int(descaled_bottom_y)]) # else: # filter_negative_rects_easy.append([descaled_top_y/h,descaled_top_x/w,descaled_bottom_y/h,descaled_bottom_x/w]) # if len(filter_negative_rects_easy) + len(filter_negative_rects_hard) < max_negatives: # filter_negative_rects = np.concatenate([np.asarray(filter_negative_rects_easy),np.asarray(filter_negative_rects_hard)],axis=0) # filter_negative_rects = filter_negative_rects.tolist() # else: # if len(filter_negative_rects_hard) < int(hard_negative_ratio*max_negatives): # index = np.random.choice(np.arange(len(filter_negative_rects_easy)),max_negatives -len(filter_negative_rects_hard) # ,replace=False) # filter_negative_rects_easy = np.asarray(filter_negative_rects_easy)[index,:].tolist() # elif len(filter_negative_rects_easy) < int((1-hard_negative_ratio)*max_negatives): # index = np.random.choice(np.arange(len(filter_negative_rects_hard)),max_negatives -len(filter_negative_rects_easy), # replace=False) # filter_negative_rects_hard = np.asarray(filter_negative_rects_hard)[index,:].tolist() # else: # index = np.random.choice(np.arange(len(filter_negative_rects_hard)),int(hard_negative_ratio*max_negatives),replace=False) # filter_negative_rects_hard = np.asarray(filter_negative_rects_hard)[index,:].tolist() # index = np.random.choice(np.arange(len(filter_negative_rects_easy)),int((1-hard_negative_ratio)*max_negatives),replace=False) # filter_negative_rects_easy = np.asarray(filter_negative_rects_easy)[index,:].tolist() # filter_negative_rects = np.concatenate([np.asarray(filter_negative_rects_easy),np.asarray(filter_negative_rects_hard)],axis=0) # filter_negative_rects = filter_negative_rects.tolist() # Jittering the ground truth gt_top_x1 = ground_truth[0] gt_top_y1 = ground_truth[1] gt_bottom_x2 = ground_truth[2] gt_bottom_y2 = ground_truth[3] gt_w = gt_bottom_x2 - gt_top_x1 gt_h = gt_bottom_y2 - gt_top_y1 w_list = np.arange(-0.5*gt_w,0.5*gt_w,0.1*gt_w).tolist() h_list = np.arange(-0.5*gt_h,0.5*gt_h,0.1*gt_h).tolist() for w_shift in w_list: for h_shift in h_list: new_x1 = gt_top_x1 + w_shift new_y1 = gt_top_y1 + h_shift new_x2 = gt_bottom_x2 + w_shift new_y2 = gt_bottom_y2 + h_shift if new_x1 < 0.0: new_x1 = 0.0 elif new_x1 > w : new_x1 = w if new_y1 < 0.0: new_y1 = 0.0 elif new_y1 > h : new_y1 = h if new_x2 < 0.0: new_x2 = 0.0 elif new_x2 > w : new_x2 = w if new_y2 < 0.0: new_y2 = 0.0 elif new_y2 > h : new_y2 = h iou = calc_2D_IOU(ground_truth,(new_x1,new_y1,new_x2,new_y2)) if iou > 0.50: if VIS_FLAG: filter_positive_rects.append([int(new_x1),int(new_y1),int(new_x2),int(new_y2)]) else: descaled_width = new_x2 - new_x1#int(rect.width()*scale) descaled_height = new_y2 - new_y1#int(rect.height()*scale) descaled_center_x = new_x1 + (descaled_width/2.0) descaled_center_y = new_y1 + (descaled_height/2.0) filter_positive_rects.append([(new_y1)/h,(new_x1)/w,(new_y2)/h,(new_x2)/w, descaled_center_x,descaled_center_y,descaled_width,descaled_height]) if VIS_FLAG: return filter_positive_rects,filter_negative_rects else: return np.asarray(filter_positive_rects).astype(np.float32),np.asarray(filter_negative_rects).astype(np.float32)
def pred_bboxes(orig_img, min_size, index): rects = [] dlib.find_candidate_object_locations(orig_img, rects, min_size=min_size) rects = [[0, d.left(), d.top(), d.right(), d.bottom()] for d in rects] rects = np.asarray(rects, dtype=np.float64) return rects
import dlib from skimage import io image_file = '/home/jps/LIBRARIES/dlib/dlib/examples/faces/2009_004587.jpg' img = io.imread(image_file) # Locations of candidate objects will be saved into rects rects = [] dlib.find_candidate_object_locations(img, rects, min_size=500) print("number of rectangles found {}".format(len(rects))) for k, d in enumerate(rects): print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format( k, d.left(), d.top(), d.right(), d.bottom()))
def process_voc_detection_data(save_file, res=128): """ Extract positive and negative samples from the VOC dataset. Parameters ---------- save_file : string path to save file so we don't have to do this over and over again. res : number Resolution to which the samples must be rescaled Returns ------- None """ image_sets = [get_class_image_names(c) for c in classes] bounding_boxes = [] for c in classes: print "Extracting boxes for",c bounding_boxes.append([get_bounding_boxes(image_sets[get_class_idx(c)][i], c) for i in xrange(len(image_sets[get_class_idx(c)]))]) all_samples = {} background_samples = [] background_bbox_deltas = [] for c in classes: print "Extracting images for",c class_idx = get_class_idx(c) num_images = len(image_sets[class_idx]) num_samples = sum([len(bboxes) for bboxes in bounding_boxes[class_idx]]) samples = [] bbox_deltas = [] ious = [] sample_idx = 0 progress_meter = int(0.10 * num_images) for image_idx, image_name in enumerate(image_sets[class_idx]): if (image_idx+1) % progress_meter == 0: print int(100.0 * (image_idx+1)/num_images),"% done" for bbox in bounding_boxes[class_idx][image_idx]: im = Image.open(DATA_PATH + 'JPEGImages/' + image_name + ".jpg") img = np.array(im) regions = [] dlib.find_candidate_object_locations(img, regions, min_size=500) positive_bounding_boxes = [] negative_bounding_boxes = [] for r in regions: if len(positive_bounding_boxes) > 10 and len(negative_bounding_boxes) > 10: break curr_bbox = [r.left(), r.top(), r.right(), r.bottom()] iou = IoU(bbox, curr_bbox) if iou > 0.5: positive_bounding_boxes.append(curr_bbox + [iou]) if iou < 0.3: negative_bounding_boxes.append(curr_bbox + [iou]) random.shuffle(positive_bounding_boxes) random.shuffle(negative_bounding_boxes) positive_bounding_boxes = positive_bounding_boxes[:4] negative_bounding_boxes = negative_bounding_boxes[:3] # Get ground_truth cropped_im = im.crop(bbox) cropped_im = cropped_im.resize((res,res), resample=Image.BILINEAR) samples.append(np.array(cropped_im)) bbox_deltas.append([0,0,0,0]) ious.append(IoU(bbox, bbox)) # Get positive samples for p_bbox in positive_bounding_boxes: cropped_im = im.crop(p_bbox[:4]) cropped_im = cropped_im.resize((res,res), resample=Image.BILINEAR) samples.append(np.array(cropped_im)) bbox_deltas.append([ bbox[0]-p_bbox[0], bbox[1]-p_bbox[1], bbox[2]-p_bbox[2], bbox[3]-p_bbox[3], ]) ious.append(p_bbox[4]) # Get negative samples for n_bbox in negative_bounding_boxes: cropped_im = im.crop(n_bbox[:4]) cropped_im = cropped_im.resize((res,res), resample=Image.BILINEAR) background_samples.append(np.array(cropped_im)) background_bbox_deltas.append([0,0,0,0]) samples = np.stack(samples, axis=0) bbox_deltas = np.stack(bbox_deltas, axis=0) print c,samples.shape, bbox_deltas.shape,num_samples all_samples["samples_" + c] = samples all_samples["bboxdeltas_" + c] = bbox_deltas background_samples = np.stack(background_samples, axis=0) background_bbox_deltas = np.stack(background_bbox_deltas, axis=0) print "background",background_samples.shape, background_bbox_deltas.shape all_samples["samples_background"] = background_samples all_samples["bboxdeltas_background"] = background_bbox_deltas np.savez_compressed(save_file, **all_samples)