def __init__(self, min_face=20, thresh=[0.6, 0.7, 0.7], scale=0.79, stride=2, cellsize=12, use_cuda=True): self.min_face = min_face self.thresh = thresh self.scale = scale self.stride = stride self.cellsize = cellsize self.pnet = PNet() self.rnet = RNet() self.onet = ONet() self._load_state(self.pnet) self._load_state(self.rnet) self._load_state(self.onet) if cuda.is_available() and use_cuda: self.pnet.cuda() self.rnet.cuda() self.onet.cuda() self.pnet.eval() self.rnet.eval() self.onet.eval() self.use_cuda = use_cuda
def test_camera(): video_capture = cv2.VideoCapture(0) pnet = PNet() rnet = RNet() onet = ONet() frame_counter = 0 boxes, landmarks = None, None while True: ret, frame = video_capture.read() if ret: if frame_counter % 10 == 0: boxes = pnet.detect(frame) if len(boxes) > 0: boxes = rnet.detect(frame, boxes) if len(boxes) > 0: boxes, landmarks = onet.detect(frame, boxes) else: boxes, landmarks = None, None if boxes is not None: draw(frame, boxes, landmarks) cv2.imshow('image', frame) if cv2.waitKey(1) & 0xFF == ord('q'): break frame_counter += 1
def main(_): device = "cpu" print("Use device: {}".format(device)) env = gym.make(FLAGS.env) p_net = PNet(env.observation_space, env.action_space, FLAGS.hid_num) v_net = VNet(env.observation_space, FLAGS.hid_num) p_net.to(device) v_net.to(device) agent = Agent(p_net, v_net, None, None, device) # モデルの途中状態を読込む max_rew = -1e6 model_filename_base = os.path.join( FLAGS.data_dir, "models", "model_" + FLAGS.env + "_PPO_H" + str(FLAGS.hid_num)) print("Load best model: {}".format(model_filename_base)) load_info = agent.load_model(model_filename_base, "best") if load_info: max_rew = load_info["max_rew"] print("Max reward: {0}".format(max_rew)) else: print("Model file not found") exit(0) test(env, agent, device)
def load(self): sess = self.m_session p_path, r_path, o_path = self.m_model_path[0], None, None if len(self.m_model_path) >= 2: r_path = self.m_model_path[1] if len(self.m_model_path) == 3: o_path = self.m_model_path[2] if p_path: with tf.variable_scope('pnet'): data = tf.placeholder(tf.float32, (None, None, None, 3), 'input') pnet = PNet({'data': data}) pnet.load(os.path.join(self.m_model_path[0], 'det1.npy'), sess) self.pnet = lambda img: sess.run( ('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0': img}) if r_path: with tf.variable_scope('rnet'): data = tf.placeholder(tf.float32, (None, 24, 24, 3), 'input') rnet = RNet({'data': data}) rnet.load(os.path.join(self.m_model_path[1], 'det2.npy'), sess) self.rnet = lambda img: sess.run( ('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0': img}) if o_path: with tf.variable_scope('onet'): data = tf.placeholder(tf.float32, (None, 48, 48, 3), 'input') onet = ONet({'data': data}) onet.load(os.path.join(self.m_model_path[2], 'det3.npy'), sess) self.onet = lambda img: sess.run( ('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0': img})
def export_to_pb(): pnet = PNet() rnet = RNet() onet = ONet() pnet.export_to_pb() rnet.export_to_pb() onet.export_to_pb()
def test_img(): pnet = PNet() rnet = RNet() onet = ONet() img = cv2.imread('C:\\Users\\lenovo\\Desktop\\0_Parade_Parade_0_693.jpg') boxes = pnet.detect(img) boxes = rnet.detect(img, boxes) boxes, landmarks = onet.detect(img, boxes) draw(img, boxes, landmarks) cv2.imshow('image', img) cv2.waitKey()
def __init__(self, min_face_size=20.0, thresholds=[0.6, 0.7, 0.8], nms_thresholds=[0.7, 0.7, 0.7], device=None): # Selece t the device if device in ['gpu', 'cuda']: if not torch.cuda.is_available(): print("cuda not available, using cpu instead") self.device = torch.device('cpu') self.device = torch.device('cuda') elif device in ['cpu', 'none']: self.device = torch.device('cpu') else: self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') print("Using {}...\n".format(self.device)) self.thresholds = thresholds self.nms_thresholds = nms_thresholds self.min_face_size = min_face_size self.empty_float = torch.tensor([], dtype=torch.float, device=self.device) self.pnet = PNet().to(device=self.device).eval() self.rnet = RNet().to(device=self.device).eval() self.onet = ONet().to(device=self.device).eval()
def __init__(self): use_cuda = torch.cuda.is_available() if use_cuda: self.device = 'cuda' self.tensor = torch.cuda.FloatTensor else: self.device = 'cpu' self.tensor = torch.FloatTensor self._pnet = PNet().to(self.device).eval() self._rnet = RNet().to(self.device).eval() self._onet = ONet().to(self.device).eval() self.scales = [0.3, 0.15, 0.07, 0.035] self.thresholds = [0.7, 0.8, 0.9] self.nms_thresholds = [0.7, 0.7, 0.7]
def __init__(self, min_face=20, thresh=[0.6, 0.7, 0.7], scale=0.79, stride=2, cellsize=12): self.min_face = min_face self.thresh = thresh self.scale = scale self.stride = stride self.cellsize = cellsize self.pnet = PNet() self.rnet = RNet() self.onet = ONet() self._load_state(self.pnet) self._load_state(self.rnet) self._load_state(self.onet)
def train(args): if torch.cuda.is_available(): device = torch.device("cuda") torch.cuda.set_device(args.cuda) else: device = torch.device("cpu") if args.net == "pnet": model = PNet(device) elif args.net == "rnet": model = RNet() elif args.net == "onet": model = ONet() else: raise Exception("Net Type Error!") loss_func = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), args.lr, args.momentum) transformed_data = WIDER_Dataset( data_path, anno_filename, transforms.Compose([Resize((12, 12)), Normalize(), To_Tensor()])) trainloader = DataLoader(transformed_data, batch_size=1, shuffle=True, collate_fn=transformed_data.collate_fn, num_workers=4, pin_memory=True) #model.to(device=device) for epoch in range(args.epoch): model.train() for i_batch, (images, boxes) in enumerate(trainloader): images.type(torch.DoubleTensor) images.to(device=device) boxes[0].to(device=device, dtype=torch.float) output = model(images) ptint(output.cpu())
def main(_): device = get_device(FLAGS.use_gpu) print("Use device: {}".format(device)) # モデル保存用フォルダ生成 data_dir = FLAGS.data_dir create_directory(data_dir) create_directory(os.path.join(data_dir, "models")) env = gym.make(FLAGS.env) p_net = PNet(env.observation_space, env.action_space, FLAGS.hid_num) v_net = VNet(env.observation_space, FLAGS.hid_num) print(p_net) print(v_net) p_net.to(device) v_net.to(device) optim_p = ralamb.Ralamb(p_net.parameters(), lr=FLAGS.lr, weight_decay=FLAGS.weight_decay) optim_v = ralamb.Ralamb(v_net.parameters(), lr=FLAGS.lr, weight_decay=FLAGS.weight_decay) agent = Agent(p_net, v_net, optim_p, optim_v, device) if FLAGS.use_discrim: expert_filename = os.path.join(FLAGS.data_dir, "expert_data", "taxi_expert.pkl") print("Load expert data: ", expert_filename) with open(expert_filename, "rb") as f: expert_traj = Trajectory() expert_epis = pickle.load(f) for epi in expert_epis: epi["next_obs"] = np.append(epi["obs"][1:], epi["obs"][0]) expert_traj.append(epi) expert_traj.to_tensor(device) pseudo_rew_net = VNet(env.observation_space, FLAGS.hid_num) shaping_val_net = VNet(env.observation_space, FLAGS.hid_num) print(pseudo_rew_net) print(shaping_val_net) pseudo_rew_net.to(device) shaping_val_net.to(device) optim_discrim = ralamb.Ralamb( list(pseudo_rew_net.parameters()) + list(shaping_val_net.parameters()), lr=FLAGS.lr, weight_decay=FLAGS.weight_decay, ) discrim = Discriminator(pseudo_rew_net, shaping_val_net, optim_discrim, device) else: discrim = None expert_traj = None # モデルの途中状態を読込む max_rew = -1e6 model_filename_base = os.path.join( FLAGS.data_dir, "models", "model_" + FLAGS.env + "_PPO_H" + str(FLAGS.hid_num)) discrim_filename_base = None if FLAGS.resume: print("Load last model") load_info = agent.load_model(model_filename_base, "last") if load_info: max_rew = load_info["max_rew"] print("Max reward: {0}".format(max_rew)) else: print("Model file not found") if FLAGS.use_discrim: discrim_filename_base = os.path.join( FLAGS.data_dir, "models", "discrim_" + FLAGS.env + "_AIRL_H" + str(FLAGS.hid_num)) discrim.load_model(discrim_filename_base, "last") train(env, agent, max_rew, model_filename_base, device, discrim, discrim_filename_base, expert_traj) test(env, agent, device)
from model import detect_faces, show_bboxes, PNet, RNet, ONet from PIL import Image import numpy as np def test(filename, save_name, model): image = Image.open(filename) bounding_boxes, landmarks = detect_faces(image, model) show_bboxes(image, bounding_boxes, facial_landmarks=landmarks).save(save_name) if __name__ == "__main__": pnet = PNet() rnet = RNet() onet = ONet() model = (pnet, rnet, onet) # test("test_1.jpg", "1.jpg", model) # test("test_2.jpg", "2.jpg", model) # test("test_3.jpg", "3.jpg", model) test("test.jpg", "1.jpg", model)
print(predicted) if lb == predicted: correct += 1 total += 1 # print(res[i]) print("iteration:{}/{} test accuracy: {}% correct:{} total:{} loss:{}".\ format(i,total_step,correct / total * 100, correct, total, loss.item())) from pfs import create_buffer print("Il commence...") device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(device) game_batch = 20 mbuffer = Memory(53248) #create_buffer(mbuffer, train_data=False) mbuffer = torch.load('testdata.txt') print('data loaded') p4 = PNet().to(device) p4.load_state_dict(torch.load('robot-net_5.txt'), False) print('model loaded') test(p4, mbuffer, 16 * 1, 0.000001, 400, device) #torch.save(p4.state_dict(), 'robot-net.txt')
class MtcnnDetector(object): """ mtcnn detector Params: prefix: {str} checkpoint Attributes: Content: """ def __init__(self, min_face=20, thresh=[0.6, 0.7, 0.7], scale=0.79, stride=2, cellsize=12, use_cuda=True): self.min_face = min_face self.thresh = thresh self.scale = scale self.stride = stride self.cellsize = cellsize self.pnet = PNet() self.rnet = RNet() self.onet = ONet() self._load_state(self.pnet) self._load_state(self.rnet) self._load_state(self.onet) if cuda.is_available() and use_cuda: self.pnet.cuda() self.rnet.cuda() self.onet.cuda() self.pnet.eval() self.rnet.eval() self.onet.eval() self.use_cuda = use_cuda def _load_state(self, net): ckpt = '../mtcnn_py/ckptdir/{}.pkl'.format(net._get_name()) if not os.path.exists(ckpt): return print("load state from {}".format(ckpt)) ckpt = torch.load(ckpt, map_location='cuda' if torch.cuda.is_available() else 'cpu') net.load_state_dict(ckpt['net_state']) def detect_image(self, image): """ Detect face over single image Params: image: {ndarray(H, W, C)} """ boxes, boxes_c, landmark = self._detect_pnet(image) boxes, boxes_c, landmark = self._detect_rnet(image, boxes_c) boxes, boxes_c, landmark = self._detect_onet(image, boxes_c) return boxes_c, landmark def _detect_pnet(self, image): """ Params: image: {ndarray(1, C, H, W)} Returns: boxes: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score boxes_c: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score landmark: None """ NETSIZE = 12 def _resize_image(image, scale): """ resize image according to scale Params: image: {ndarray(h, w, c)} scale: {float} """ h, w, c = image.shape hn = int(h*scale); wn = int(w*scale) resized = cv2.resize(image, (wn, hn), interpolation=cv2.INTER_LINEAR) return resized def _generate_box(cls_map, reg_map, thresh, scale): """ generate boxes Params: cls_map: {ndarray(h, w)} reg_map: {ndarray(4, h, w)} thresh: {float} scale: {float} Returns: bboxes: {ndarray(n_boxes, 9)} x1, y1, x2, y2, score, offsetx1, offsety1, offsetx2, offsety2 """ idx = np.where(cls_map>thresh) if idx[0].size == 0: return np.array([]) x1 = np.round(self.stride * idx[1] / scale) y1 = np.round(self.stride * idx[0] / scale) x2 = np.round((self.stride * idx[1] + self.cellsize) / scale) y2 = np.round((self.stride * idx[0] + self.cellsize) / scale) # print("current scale: {} current size: {}".format(scale, self.cellsize/scale)) score = cls_map[idx[0], idx[1]] reg = np.array([reg_map[i, idx[0], idx[1]] for i in range(4)]) boxes = np.vstack([x1, y1, x2, y2 ,score, reg]).T return boxes # ======================= generate boxes =========================== cur_scale = NETSIZE / self.min_face cur_img = _resize_image(image, cur_scale) all_boxes = None while min(cur_img.shape[:-1]) >= NETSIZE: ## forward network X = ToTensor()(cur_img).unsqueeze(0) if cuda.is_available() and self.use_cuda: X = X.cuda() with torch.no_grad(): y_pred = self.pnet(X)[0].cpu().detach().numpy() ## generate bbox cls_map = sigmoid(y_pred[0,:,:]) reg_map = y_pred[1:5,:,:] boxes = _generate_box(cls_map, reg_map, self.thresh[0], cur_scale) ## update scale cur_scale *= self.scale cur_img = _resize_image(image, cur_scale) if boxes.size == 0: continue ## nms # boxes = boxes[self._nms(boxes[:, :5], 0.6, 'Union')] # show_bbox(image.copy(), boxes[:, :5]) ## save bbox if all_boxes is None: all_boxes = boxes else: all_boxes = np.concatenate([all_boxes, boxes], axis=0) # ==================================================================== if all_boxes is None: return np.array([]), np.array([]), None ## nms all_boxes = all_boxes[self._nms(all_boxes[:, 0:5], 0.6, 'Union')] ## parse boxes = all_boxes[:, :4] # (n_boxes, 4) score = all_boxes[:, 4].reshape((-1, 1)) # (n_boxes, 1) offset = all_boxes[:, 5:] # (n_boxes, 4) # refine bbox boxes_c = self._cal_box(boxes, offset) ## concat boxes = np.concatenate([boxes, score], axis=1) boxes_c = np.concatenate([boxes_c, score], axis=1) ## landmark landmark = None return boxes, boxes_c, landmark def _detect_rnet(self, image, bboxes): """ Params: image: {ndarray(H, W, C)} bboxes: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score Returns: boxes: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score boxes_c: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score landmark: None """ NETSIZE = 24 if bboxes.shape[0] == 0: return np.array([]), np.array([]), None bboxes = self._square(bboxes) patches = self._crop_patch(image, bboxes, NETSIZE) ## forward network X = torch.cat(list(map(lambda x: ToTensor()(x).unsqueeze(0), patches)), dim=0) if cuda.is_available() and self.use_cuda: X = X.cuda() with torch.no_grad(): y_pred = self.rnet(X).cpu().detach().numpy() # (n_boxes, 15) scores = sigmoid(y_pred[:, 0]) # (n_boxes,) offset = y_pred[:, 1: 5] # (n_boxes, 4) landmark = y_pred[:, 5:] # (n_boxes, 10) ## update score bboxes[:, -1] = scores ## filter idx = scores > self.thresh[1] bboxes = bboxes[idx] # (n_boxes, 5) offset = offset[idx] # (n_boxes, 4) landmark = landmark[idx] # (n_boxes, 10) if bboxes.shape[0] == 0: return np.array([]), np.array([]), None ## nms idx = self._nms(bboxes, 0.5) bboxes = bboxes[idx] offset = offset[idx] landmark = landmark[idx] ## landmark landmark = self._cal_landmark(bboxes[:, :-1], landmark) bboxes_c = self._cal_box(bboxes[:,:-1], offset) bboxes_c = np.concatenate([bboxes_c, bboxes[:, -1].reshape((-1, 1))], axis=1) return bboxes, bboxes_c, landmark def _detect_onet(self, image, bboxes): """ Params: image: {ndarray(H, W, C)} bboxes: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score Returns: boxes: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score boxes_c: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score landmark: None """ NETSIZE = 48 if bboxes.shape[0] == 0: return np.array([]), np.array([]), np.array([]) bboxes = self._square(bboxes) patches = self._crop_patch(image, bboxes, NETSIZE) ## forward network X = torch.cat(list(map(lambda x: ToTensor()(x).unsqueeze(0), patches)), dim=0) if cuda.is_available() and self.use_cuda: X = X.cuda() with torch.no_grad(): y_pred = self.onet(X).cpu().detach().numpy() # (n_boxes, 15) scores = sigmoid(y_pred[:, 0]) # (n_boxes,) offset = y_pred[:, 1: 5] # (n_boxes, 4) landmark = y_pred[:, 5:] # (n_boxes, 10) ## update score bboxes[:, -1] = scores ## filter idx = scores > self.thresh[2] bboxes = bboxes[idx] # (n_boxes, 5) offset = offset[idx] # (n_boxes, 4) landmark = landmark[idx] # (n_boxes, 10) if bboxes.shape[0] == 0: return np.array([]), np.array([]), np.array([]) ## nms idx = self._nms(bboxes, 0.5, mode='Minimum') bboxes = bboxes[idx] offset = offset[idx] landmark = landmark[idx] ## landmark landmark = self._cal_landmark(bboxes[:, :-1], landmark) bboxes_c = self._cal_box(bboxes[:,:-1], offset) bboxes_c = np.concatenate([bboxes_c, bboxes[:, -1].reshape((-1, 1))], axis=1) return bboxes, bboxes_c, landmark @classmethod def _cal_box(self, boxes, offset): """ refine boxes Params: boxes: {ndarray(n_boxes, 4)} unrefined boxes offset: {ndarray(n_boxes, 4)} boxes offset Returns: boxes_c:{ndarray(n_boxes, 4)} refined boxes Notes: offset = (gt - square) / size of square box => gt = square + offset * size of square box (*) where - `offset`, `gt`, `square` are ndarrays - `size of square box` is a number """ ## square boxes' heights and widths x1, y1, x2, y2 = np.hsplit(boxes, 4) # (n_boxes, 1) w = x2 - x1 + 1; h = y2 - y1 + 1 # (n_boxes, 1) bsize = np.hstack([w, h]*2) # (n_boxes, 4) bbase = np.hstack([x1, y1, x2, y2]) # (n_boxes, 4) ## refine boxes_c = bbase + offset*bsize return boxes_c @classmethod def _cal_landmark(self, boxes, offset): """ calculate landmark Params: boxes: {ndarray(n_boxes, 4)} unrefined boxes offset: {ndarray(n_boxes, 10)} landmark offset Returns: landmark:{ndarray(n_boxes, 10)} landmark location Notes: offset_x = (gt_x - square_x1) / size of square box => gt_x = square_x1 + offset_x * size of square box (*) offset_y = (gt_y - square_y1) / size of square box => gt_y = square_y1 + offset_y * size of square box (*) where - `offset_{}`, `gt_{}`, `square_{}1` are ndarrays - `size of square box` is a number """ ## square boxes' heights and widths x1, y1, x2, y2 = np.hsplit(boxes, 4) # (n_boxes, 1) w = x2 - x1 +1; h = y2 - y1 + 1 # (n_boxes, 1) bsize = np.hstack([w, h]*5) # (n_boxes, 10) bbase = np.hstack([x1, y1]*5) # (n_boxes, 10) ## refine landmark = bbase + offset*bsize return landmark @classmethod def _nms(self, dets, thresh, mode="Union"): """ Params: dets: {ndarray(n_boxes, 5)} x1, y1, x2, y2 score thresh: {float} retain overlap <= thresh mode: {str} 'Union' or 'Minimum' Returns: idx: {list[int]} indexes to keep Notes: greedily select boxes with high confidence idx boxes overlap <= thresh rule out overlap > thresh if thresh==1.0, keep all """ x1 = dets[:, 0] y1 = dets[:, 1] x2 = dets[:, 2] y2 = dets[:, 3] scores = dets[:, 4] areas = (x2 - x1 + 1) * (y2 - y1 + 1) order = scores.argsort()[::-1] idx = [] while order.size > 0: i = order[0] idx.append(i) xx1 = np.maximum(x1[i], x1[order[1:]]) yy1 = np.maximum(y1[i], y1[order[1:]]) xx2 = np.minimum(x2[i], x2[order[1:]]) yy2 = np.minimum(y2[i], y2[order[1:]]) w = np.maximum(0.0, xx2 - xx1 + 1) h = np.maximum(0.0, yy2 - yy1 + 1) inter = w * h if mode == "Union": ovr = inter / (areas[i] + areas[order[1:]] - inter) elif mode == "Minimum": ovr = inter / np.minimum(areas[i], areas[order[1:]]) inds = np.where(ovr <= thresh)[0] order = order[inds + 1] return idx @classmethod def _square(self, bbox): """ convert rectangle bbox to square bbox Params: bbox: {ndarray(n_boxes, 5)} Returns: bbox_s: {ndarray(n_boxes, 5)} """ ## rectangle boxes' heights and widths x1, y1, x2, y2, score = np.hsplit(bbox, 5) # (n_boxes, 1) w = x2 - x1 +1; h = y2 - y1 + 1 # (n_boxes, 1) maxsize = np.maximum(w, h) # (n_boxes, 1) ## square boxes' heights and widths x1 = x1 + w/2 - maxsize/2 y1 = y1 + h/2 - maxsize/2 x2 = x1 + maxsize - 1 y2 = y1 + maxsize - 1 bbox_s = np.hstack([x1, y1, x2, y2, score]) return bbox_s @classmethod def _crop_patch(self, image, bbox_s, size): """ crop patches from image Params: image: {ndarray(H, W, C)} bbox_s: {ndarray(n_boxes, 5)} squared bbox Returns: patches: {list[ndarray(h, w, c)]} """ def locate(bbox, imh, imw): """ Params: bbox: {ndarray(n_boxes, 5)} x1, y1, x2, y2, score imh, imw: {float} size of input image Returns: oriloc, dstloc: {ndarray(n_boxes, 4)} x1, y1, x2, y2 """ ## origin boxes' heights and widths x1, y1, x2, y2, score = np.hsplit(bbox_s, 5)# (n_boxes, 1) x1, y1, x2, y2 = list(map(lambda x: x.astype('int').reshape(-1), [x1, y1, x2, y2])) w = x2 - x1 + 1; h = y2 - y1 + 1 # (n_boxes, 1) ## destinate boxes xx1 = np.zeros_like(x1) yy1 = np.zeros_like(y1) xx2 = w.copy() - 1 yy2 = h.copy() - 1 ## left side out of image i = x1 < 0 xx1[i] = 0 + (0 - x1[i]) x1 [i] = 0 ## top side out of image i = y1 < 0 yy1[i] = 0 + (0 - y1[i]) y1 [i] = 0 ## right side out of image i = x2 > imw - 1 xx2[i] = (w[i]-1) + (imw-1 - x2[i]) x2 [i] = imw - 1 ## bottom side out of image i = y2 > imh - 1 yy2[i] = (h[i]-1) + (imh-1 - y2[i]) y2 [i] = imh - 1 return [x1, y1, x2, y2, xx1, yy1, xx2, yy2] imh, imw, _ = image.shape x1, y1, x2, y2, score = np.hsplit(bbox_s, 5) pw = x2 - x1 + 1; ph = y2 - y1 + 1 pshape = np.hstack([ph, pw, 3*np.ones(shape=(score.shape[0], 1))]).astype('int') # (n_boxes, 3) # keep = np.bitwise_or(pw > 0, ph > 0).reshape(-1) # pshape = pshape[keep]; bbox_s = bbox_s[keep] n_boxes = bbox_s.shape[0] x1, y1, x2, y2, xx1, yy1, xx2, yy2 = locate(bbox_s, imh, imw) # (n_boxes, 1) patches = [] for i_boxes in range(n_boxes): patch = np.zeros(shape=pshape[i_boxes], dtype='uint8') patch[yy1[i_boxes]: yy2[i_boxes], xx1[i_boxes]: xx2[i_boxes]] = \ image[y1[i_boxes]: y2[i_boxes], x1[i_boxes]: x2[i_boxes]] patch = cv2.resize(patch, (size, size)) patches += [patch] return patches