def detect(net, img_path, thresh, wid, hei): #img = cv2.imread(img_path, cv2.IMREAD_COLOR) img = Image.open(img_path) if img.mode == 'L': img = img.convert('RGB') img = np.array(img) height, width, _ = img.shape if wid > 0 and hei > 0: image = cv2.resize(img, (wid, hei)) else: max_im_shrink = np.sqrt(1700 * 1200 / (img.shape[0] * img.shape[1])) image = cv2.resize(img, None, None, fx=max_im_shrink, fy=max_im_shrink, interpolation=cv2.INTER_LINEAR) x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() with torch.no_grad(): t1 = time.time() y = net(x) detections = y.data scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img = cv2.imread(img_path, cv2.IMREAD_COLOR) for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= thresh: score = detections[0, i, j, 0] pt = (detections[0, i, j, 1:] * scale).cpu().numpy() left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3]) j += 1 cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2) conf = "{:.3f}".format(score) point = (int(left_up[0]), int(left_up[1] - 5)) # cv2.putText(img, conf, point, cv2.FONT_HERSHEY_COMPLEX, # 0.6, (0, 255, 0), 1) t2 = time.time() #print('detect:{} timer:{}'.format(img_path, t2 - t1)) cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), img) return t2 - t1
def detect_face(net, img, thresh): height, width, _ = img.shape x = to_chw_bgr(img) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() y = net(x) detections = y.data scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) bboxes = [] for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= thresh: box = [] score = detections[0, i, j, 0] pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(np.int) j += 1 box += [pt[0], pt[1], pt[2] - pt[0], pt[3] - pt[1], score] bboxes += [box] return bboxes
def detect_face(net, img, shrink): if shrink != 1: img = cv2.resize(img, None, None, fx=shrink, fy=shrink, interpolation=cv2.INTER_LINEAR) x = to_chw_bgr(img) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() # print(x.size()) y = net(x) detections = y.data detections = detections.cpu().numpy() det_conf = detections[0, 1, :, 0] det_xmin = img.shape[1] * detections[0, 1, :, 1] / shrink det_ymin = img.shape[0] * detections[0, 1, :, 2] / shrink det_xmax = img.shape[1] * detections[0, 1, :, 3] / shrink det_ymax = img.shape[0] * detections[0, 1, :, 4] / shrink det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf)) keep_index = np.where(det[:, 4] >= args.thresh)[0] det = det[keep_index, :] return det
def detect_face(net, img, thresh): height, width, _ = img.shape im_shrink = 640.0 / max(height, width) image = cv2.resize(img, None, None, fx=im_shrink, fy=im_shrink, interpolation=cv2.INTER_LINEAR).copy() x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() y = net(x) detections = y.data scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) bboxes = [] for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= thresh: box = [] score = detections[0, i, j, 0] pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(np.int) j += 1 box += [pt[0], pt[1], pt[2], pt[3], score] box[1] += 0.2 * (box[3] - box[1] + 1) bboxes += [box] return bboxes
def detect(net, img_path, thresh): img = Image.open(img_path) if img.mode == 'L': img = img.convert('RGB') img = np.array(img) height, width, _ = img.shape max_im_shrink = np.sqrt(2000 * 2000 / (img.shape[0] * img.shape[1])) image = cv2.resize(img, None, None, fx=max_im_shrink, fy=max_im_shrink, interpolation=cv2.INTER_LINEAR) x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] # x = x * cfg.scale # scaling if cfg.rescale: x = 1 / 255. * (x + 127.5) x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() t1 = time.time() y = net(x) detections = y.data scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) detect_results = [] for i in range(detections.size(1)): j = 0 while j < detections.size(2) and detections[0, i, j, 0] >= thresh: score = detections[0, i, j, 0] pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int) detect_results.append([pt[0], pt[1], pt[2], pt[3], float(score)]) j += 1 detect_results = np.array(detect_results) if len(detect_results.shape) == 1: return np.array([]) order = detect_results[:, 4].ravel().argsort()[::-1] det = detect_results[order, :] t2 = time.time() print('detect:{} timer:{}'.format(img_path, t2 - t1)) return det
def detect(net, img_path, thresh): img = Image.open(img_path) img = img.convert('RGB') img = np.array(img) height, width, _ = img.shape if os.environ['IS_QVGA_MONO'] == '1': max_im_shrink = np.sqrt(320 * 240 / (img.shape[0] * img.shape[1])) else: max_im_shrink = np.sqrt(640 * 480 / (img.shape[0] * img.shape[1])) image = cv2.resize(img, None, None, fx=max_im_shrink, fy=max_im_shrink, interpolation=cv2.INTER_LINEAR) # img = cv2.resize(img, (640, 640)) x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] if cfg.IS_MONOCHROME == True: x = 0.299 * x[0] + 0.587 * x[1] + 0.114 * x[2] x = torch.from_numpy(x).unsqueeze(0).unsqueeze(0) else: x = torch.from_numpy(x).unsqueeze(0) if use_cuda: x = x.cuda() t1 = time.time() y = net(x) detections = y.data scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img = cv2.imread(img_path, cv2.IMREAD_COLOR) for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= thresh: score = detections[0, i, j, 0] pt = (detections[0, i, j, 1:] * scale).cpu().numpy() left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3]) j += 1 cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2) conf = "{:.3f}".format(score) point = (int(left_up[0]), int(left_up[1] - 5)) cv2.putText(img, conf, point, cv2.FONT_HERSHEY_COMPLEX, 0.6, (0, 255, 0), 1) t2 = time.time() print('detect:{} timer:{}'.format(img_path, t2 - t1)) cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), img)
def detect(net, img_path, thresh): img = Image.open(img_path) if img.mode == 'L': img = img.convert('RGB') img = np.array(img) height, width, _ = img.shape max_im_shrink = np.sqrt(1500 * 1000 / (img.shape[0] * img.shape[1])) image = cv2.resize(img, None, None, fx=max_im_shrink, fy=max_im_shrink, interpolation=cv2.INTER_LINEAR) x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() t1 = time.time() y = net(x) detections = y.data scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img = cv2.imread(img_path, cv2.IMREAD_COLOR) cnt_faces = 0 for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= thresh: cnt_faces += 1 score = detections[0, i, j, 0] pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int) left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3]) j += 1 cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2) conf = "{:.2f}".format(score) text_size, baseline = cv2.getTextSize(conf, cv2.FONT_HERSHEY_SIMPLEX, 0.3, 1) p1 = (left_up[0], left_up[1] - text_size[1]) cv2.rectangle(img, (p1[0] - 2 // 2, p1[1] - 2 - baseline), (p1[0] + text_size[0], p1[1] + text_size[1]), [255, 0, 0], -1) cv2.putText(img, conf, (p1[0], p1[1] + baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, 8) t2 = time.time() print('detect:{}, num_faces:{}, timer:{}'.format(img_path, cnt_faces, t2 - t1)) cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), img)
def detect(net, img_path, thresh): img = Image.open(img_path) if img.mode == 'L': img = img.convert('RGB') img = np.array(img) height, width, _ = img.shape max_im_shrink = np.sqrt( 1500 * 1000 / (img.shape[0] * img.shape[1])) image = cv2.resize(img, None, None, fx=max_im_shrink, fy=max_im_shrink, interpolation=cv2.INTER_LINEAR) x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() t1 = time.time() y = net(x) detections = y.data scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) # 图片类型 img_type = img_path.split('.')[-1] # 原图 origin_img = Image.open(img_path) for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= thresh: # score = detections[0, i, j, 0] pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int) # left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3]) xmin, ymin, xmax, ymax = pt[0], pt[1], pt[2], pt[3] xmin = max(0, xmin-5) ymin = max(0, ymin-5) xmax = min(width, xmax+5) ymax = min(height, ymax+5) crop_img = origin_img.crop((xmin, ymin, xmax, ymax)) crop_img = crop_img.convert('RGB') global name_count name_count += 1 filename = (name_ss + str(name_count))[-6:] print(args.save_dir + filename + img_type) crop_img.save(args.save_dir + filename + '.' + img_type) j += 1
def detect(net, img_path, thresh, save_crops): img = Image.open(img_path) #orig_img = img #if img.mode == 'L': # img = img.convert('RGB') #orig_img = np.array(orig_img) img = np.array(img) height, width, _ = img.shape max_im_shrink = np.sqrt(1200 * 1100 / (img.shape[0] * img.shape[1])) image = cv2.resize(img, None, None, fx=max_im_shrink, fy=max_im_shrink, interpolation=cv2.INTER_LINEAR) x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] #x = x * cfg.scale x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() t1 = time.time() y = net(x) detections = y.data scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) for i in range(detections.size(1)): j = 0 image_filename = os.path.join(args.save_dir, os.path.basename(img_path)) while detections[0, i, j, 0] >= thresh: score = detections[0, i, j, 0] pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int) left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3]) j += 1 if save_crops: # increase bbox by 10% on all sides face_width = pt[3] - pt[1] ten_percent_width = int(0.1 * face_width) face_height = pt[2] - pt[0] ten_percent_height = int(0.1 * face_height) # save cropped face images as face0-imagename, face1-imagename, etc. face = img[pt[1] - ten_percent_width:pt[3] + ten_percent_width, pt[0] - ten_percent_height:pt[2] + ten_percent_height] cv2.imwrite( os.path.join( args.save_dir, 'face{}-'.format(j) + os.path.basename(img_path)), cv2.cvtColor(face, cv2.COLOR_RGB2BGR)) cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2) conf = "{:.2f}".format(score) text_size, baseline = cv2.getTextSize(conf, cv2.FONT_HERSHEY_SIMPLEX, 0.3, 1) p1 = (left_up[0], left_up[1] - text_size[1]) cv2.rectangle(img, (p1[0] - 2 // 2, p1[1] - 2 - baseline), (p1[0] + text_size[0], p1[1] + text_size[1]), [255, 0, 0], -1) cv2.putText(img, conf, (p1[0], p1[1] + baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, 8) t2 = time.time() print('detect:{} timer:{}'.format(img_path, t2 - t1)) cv2.imwrite(image_filename, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
def detect(net, im, thresh, pre_bbox): #img = Image.open(img_path) img = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB)) if img.mode == 'L': img = img.convert('RGB') img = np.array(img) height, width, _ = img.shape max_im_shrink = np.sqrt(1200 * 1000 / (img.shape[0] * img.shape[1])) image = cv2.resize(img, None, None, fx=max_im_shrink, fy=max_im_shrink, interpolation=cv2.INTER_LINEAR) #image = cv2.resize(img,(224,),interpolation=cv2.INTER_LINEAR) x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() #t1 = time.time() y = net(x) detections = y.data scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) #img = cv2.imread(img_path, cv2.IMREAD_COLOR) mask_img = np.ones(im.shape, np.int8) kernel_size = 15 blur_img = cv2.blur(im, (kernel_size, kernel_size)) now_bbox = [] for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= thresh: score = detections[0, i, j, 0] pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int) now_bbox.append(pt) j += 1 x, y, w, h = pt[0], pt[1], pt[2] - pt[0], pt[3] - pt[1] mask = generate_mask(im.shape[0], im.shape[1], max(w, h) / 2, x + w / 2, y + h / 2) mask_img[mask] = [0, 0, 0] add_bbox = find_no_match_box(pre_bbox, now_bbox) for bbox in add_bbox: score = bbox[0, i, j, 0] pt = bbox[0, i, j, 1:] * scale.numpy() j += 1 x, y, w, h = pt[0], pt[1], pt[2] - pt[0], pt[3] - pt[1] mask = generate_mask(im.shape[0], im.shape[1], max(w, h) / 2, x + w / 2, y + h / 2) mask_img[mask] = [0, 0, 0] now_bbox += add_bbox mask_img_verse = np.ones(img.shape, np.int8) - mask_img result_img = mask_img * im + mask_img_verse * blur_img #t2 = time.time() #print('detect:{} timer:{}'.format(img_path, t2 - t1)) #cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), result_img) return result_img, now_bbox
def detect(net, img_path, thresh,use_cuda): img = Image.open(img_path) if img.mode == 'L': img = img.convert('RGB') img = np.array(img) height, width, _ = img.shape #max_im_shrink = np.sqrt(1400 * 1200 / (img.shape[0] * img.shape[1])) max_im_shrink=1.8 image = cv2.resize(img, None, None, fx=max_im_shrink,fy=max_im_shrink, interpolation=cv2.INTER_LINEAR) #这种是对原图像width,height进行缩放,fx,fy表示x和y轴的缩放比例 #image = cv2.resize(img, (800, 600)) x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() t1 = time.time() y = net(x) detections = y.data scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img = cv2.imread(img_path, cv2.IMREAD_COLOR) img1=img.copy() left=[] right=[] score=[] for i in range(detections.size(1)): j = 0 #print(detections[0, i, j, 0]) while detections[0, i, j, 0] >= 0.8: score.append((detections[0, i, j, 0],j)) j += 1 if len(score)>0: score=sorted(score,key=lambda value:value[0],reverse=True) #以list中tuple的第一个元素进行降序排列 #sorted()函数不改变原list,sort()函数会改变,默认升序,reverse=True实现降序 num=0 for score in score: if num>1: #检测到的包含手的概率最高的两个框 break index=int(score[1]) score=score[0] pt = (detections[0, i, index, 1:] * scale).cpu().numpy() left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3]) #j += 1 cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2) #左上和右下角坐标 left.append(left_up) right.append(right_bottom) conf = "{:.3f}".format(score) #保留3位小数 point = (int(left_up[0]), int(left_up[1] - 5)) cv2.putText(img, conf, point, cv2.FONT_HERSHEY_COMPLEX,0.6, (0, 255, 0), 1) num+=1 #else: #continue #对于for..else循环,执行完里面的for循环后再执行else,continue结束当前for循环(外面的),执行下次for循环 #break hand_left=None hand_right=None #得到左手的框 if (len(left)==2): if left[0][0]<left[1][0]: left_point=left[0] else: left_point=left[1] for i in range(len(left)): if left_point==left[i]: left_axis=i else: right_axis=i left_width=right[left_axis][0]-left_point[0] left_height=right[left_axis][1]-left_point[1] #print('坐标是({},{})'.format(left_width,left_height)) #左上坐标 if(int(left_point[0]-0.3*left_width)<0): new_left_x=0 else: new_left_x=int(left_point[0]-0.3*left_width) if(int(left_point[1]-0.3*left_height)<0): new_left_y=0 else: new_left_y=int(left_point[1]-0.3*left_height) new_left_point=(new_left_x,new_left_y) #左下坐标 #一开始左手和右手的框就有重叠,右边框就不扩充了 if not(int(right[left_axis][0]>int(left[right_axis][0]))): if(int(right[left_axis][0]+0.3*left_width)>int(left[right_axis][0])): new_left_x1=int(left[right_axis][0]) else: new_left_x1=int(right[left_axis][0]+0.3*left_width) else: new_left_x1=int(right[left_axis][0]) if(int(right[left_axis][1]+0.3*left_height)>height): new_left_y1=height else: new_left_y1=int(right[left_axis][1]+0.3*left_height) new_left_point1=(new_left_x1,new_left_y1) #得到右手的框 right_point=left[right_axis] #左上角坐标 right_width=right[right_axis][0]-right_point[0] right_height=right[right_axis][1]-right_point[1] #print('坐标是({},{})'.format(right_width,right_height)) #一开始左手和右手的框就有重叠,左边框就不扩充了 if not(int(right_point[0])<int(right[left_axis][0])): if(int(right_point[0]-0.3*right_width)<int(right[left_axis][0])): new_right_x=int(right[left_axis][0]) else: new_right_x=int(right_point[0]-0.3*right_width) else: new_right_x=int(right_point[0]) if(int(right_point[1]-0.4*right_height)<0): new_right_y=0 else: new_right_y=int(right_point[1]-0.4*right_height) new_right_point=(new_right_x,new_right_y) #右下坐标 if(int(right[right_axis][0]+0.3*right_width)>width): new_right_x1=width else: new_right_x1=int(right[right_axis][0]+0.3*right_width) if(int(right[right_axis][1]+0.4*right_height)>height): new_right_y1=height else: new_right_y1=int(right[right_axis][1]+0.4*right_height) new_right_point1=(new_right_x1,new_right_y1) box1=(new_left_point,new_left_point1) box2=(new_right_point,new_right_point1) t2 = time.time() #print('detect:{} timer:{}'.format(img_path, t2 - t1)) #cropImg_right=img1[new_right_point[1]:new_right_point1[1],new_right_point[0]:new_right_point1[0]] #cv2.imwrite(os.path.join('.', os.path.basename(img_path)), cropImg_right) return box1,box2 elif len(left==1):
def saveTracesNpy(net, img_list): if os.path.isdir(args.save_traces_npy_dir) is False: try: os.mkdir(args.save_traces_npy_dir) except OSError: print("Creation of the directory %s failed" % args.save_traces_npy_dir) return if os.path.isdir(os.path.join(args.save_traces_npy_dir, 'inputs')) is False: try: os.mkdir(os.path.join(args.save_traces_npy_dir, 'inputs')) except OSError: print("Creation of the directory %s failed" % os.path.join(args.save_traces_npy_dir, 'inputs')) return if os.path.isdir(os.path.join(args.save_traces_npy_dir, 'outputs')) is False: try: os.mkdir(os.path.join(args.save_traces_npy_dir, 'outputs')) except OSError: print("Creation of the directory %s failed" % os.path.join(args.save_traces_npy_dir, 'outputs')) return inputDims = net.rnn_model.inputDims nRows = net.rnn_model.nRows nCols = net.rnn_model.nCols count = 0 for img_path in img_list: img = Image.open(os.path.join(args.image_folder, img_path)) img = img.convert('RGB') img = np.array(img) max_im_shrink = np.sqrt(640 * 480 / (img.shape[0] * img.shape[1])) image = cv2.resize(img, None, None, fx=max_im_shrink, fy=max_im_shrink, interpolation=cv2.INTER_LINEAR) x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() t1 = time.time() y = net(x) patches = activation['prepatch'] patches = torch.cat(torch.unbind(patches, dim=2), dim=0) patches = torch.reshape(patches, (-1, inputDims, nRows, nCols)) rnnX = activation['rnn_model'] patches_all = torch.stack(torch.split(patches, split_size_or_sections=1, dim=0), dim=-1) rnnX_all = torch.stack(torch.split(rnnX, split_size_or_sections=1, dim=0), dim=-1) for k in range(patches_all.shape[-1]): patches_tosave = patches_all[0, :, :, :, k].cpu().numpy().transpose(1, 2, 0) rnnX_tosave = rnnX_all[0, :, k].cpu().numpy() np.save( args.save_traces_npy_dir + '/inputs/trace_' + str(count) + '_' + str(k) + '.npy', patches_tosave) np.save( args.save_traces_npy_dir + '/outputs/trace_' + str(count) + '_' + str(k) + '.npy', rnnX_tosave) count += 1
def detect_hand(net, img_path, thresh, use_cuda, save_path): if not os.path.exists(save_path): os.mkdir(save_path) img = Image.open(img_path) if img.mode == 'L': img = img.convert('RGB') img = np.array(img) #--np.array()类型才有shape函数使用 height, width, _ = img.shape Rect = (32, 499, 1260, 639) img = process_img(img, Rect) #---将钢琴键盘上方的像素处理一下, #print(height,width) #max_im_shrink = np.sqrt(1700 * 1200 / (img.shape[0] * img.shape[1])) max_im_shrink = 1.5 image = cv2.resize(img, None, None, fx=max_im_shrink, fy=max_im_shrink, interpolation=cv2.INTER_LINEAR ) #这种是对原图像width,height进行缩放,fx,fy表示x和y轴的缩放比例 #image = cv2.resize(img, (640, 640)) x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() t1 = time.time() y = net(x) detections = y.data scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img = cv2.imread(img_path, cv2.IMREAD_COLOR) img1 = img.copy() left = [] right = [] scores = [] h_type = [] num_class = [] num_class.append('left') num_class.append('right') #i=0的时候检测到的是background class #i=1是左手,2是右手 for i in range(1, detections.size(1)): j = 0 print('the probability of {} is {}'.format(num_class[i - 1], detections[0, i, j, 0])) #detections[0, i, j, 0]为检测到的手的box的概率,j表示有多少个框(box) while detections[0, i, j, 0] >= 0.2: scores.append((detections[0, i, j, 0], j)) j += 1 #print(scores) if len(scores) > 0: scores = sorted(scores, key=lambda value: value[0], reverse=True) #以list中tuple的第一个元素进行降序排列 #sorted()函数不改变原list,sort()函数会改变,默认升序,reverse=True实现降序 num = 0 for score in scores: if num > 0: #检测到的包含左/右手的概率最高的那个框 break index = int(score[1]) score = score[0] pt = (detections[0, i, index, 1:] * scale).cpu().numpy() left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3]) #j += 1 cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2) #左上和右下角坐标 left.append(left_up) right.append(right_bottom) h_type.append(num_class[i - 1]) conf = "{:.3f}".format(score) #保留3位小数 point = (int(left_up[0]), int(left_up[1] - 5)) point1 = (int(right_bottom[0] - 20), int(left_up[1] - 5)) cv2.putText(img, conf, point, cv2.FONT_HERSHEY_COMPLEX, 0.6, (0, 0, 255), 2) cv2.putText(img, str(num_class[i - 1]), point1, cv2.FONT_HERSHEY_COMPLEX, 0.7, (0, 0, 255), 2) num += 1 #cv2.imwrite(os.path.join('/home/lj/cy/openpose/piano/test_piano/image/point_dir',os.path.basename(img_path)),img) #else: #continue #对于for..else循环,执行完里面的for循环后再执行else,continue结束当前for循环(外面的),执行下次for循环 #break #双手覆盖的问题先不考虑,比较复杂,看其他的 hand_left = None hand_right = None #h_type = None #再考虑一个手的情况兄弟 if (len(left) == 2): #下面是假设左手一直在左边,在右手的左边,没有考虑左手在右边(两手交叉),框扩充的时候需要变一下 #而且这种两个手是假定弹钢琴是横着的情况,如果是竖着的话坐标轴变了,判断两个框是否重合的时候进行比较是不一样的 for i, left1 in enumerate(left): hand_type = h_type[i] if (hand_type == 'left'): left_width = right[i][0] - left1[0] left_height = right[i][1] - left1[1] #左手框左上坐标(将检测到的框进行扩充,尽量使得手在中心位置,便于检测关键点) ,而且有时候检测到的手不完整 if (int(left1[0] - 0.3 * left_width) < 0): new_left_x = 0 else: new_left_x = int(left1[0] - 0.3 * left_width) if (int(left1[1] - 0.3 * left_height) < 0): new_left_y = 0 else: new_left_y = int(left1[1] - 0.3 * left_height) new_left_point = (new_left_x, new_left_y) #左手框右下坐标 #一开始左手和右手的框就有重叠,右边框就不扩充了,以防止框中包含两个手 if (i == 0): j = 1 else: j = 0 if not (int(right[i][0] > int(left[j][0]))): #左手框右下大于右手框左上 if (int(right[i][0] + 0.3 * left_width) > int(left[j][0])): new_left_x1 = int(left[j][0]) #如果没重叠左手框右边界最多扩充到右手框的左边界 else: new_left_x1 = int(right[i][0] + 0.3 * left_width) else: new_left_x1 = int(right[i][0]) if (int(right[i][1] + 0.3 * left_height) > height): new_left_y1 = height else: new_left_y1 = int(right[i][1] + 0.3 * left_height) new_left_point1 = (new_left_x1, new_left_y1) if (hand_type == 'right'): #right_point=left[i] #左上角坐标 right_width = right[i][0] - left1[0] right_height = right[i][1] - left1[1] if (i == 0): j = 1 else: j = 0 #右手框左上坐标 #一开始左手和右手的框就有重叠,左边框就不扩充了 if not (int(left1[0]) < int(right[j][0])): if (int(left1[0] - 0.3 * right_width) < int(right[j][0])): new_right_x = int(right[j][0]) else: new_right_x = int(left1[0] - 0.3 * right_width) else: new_right_x = int(left1[0]) if (int(left1[1] - 0.4 * right_height) < 0): new_right_y = 0 else: new_right_y = int(left1[1] - 0.4 * right_height) new_right_point = (new_right_x, new_right_y) #右手框右上坐标 #右下坐标 if (int(right[i][0] + 0.3 * right_width) > width): new_right_x1 = width else: new_right_x1 = int(right[i][0] + 0.3 * right_width) if (int(right[i][1] + 0.4 * right_height) > height): new_right_y1 = height else: new_right_y1 = int(right[i][1] + 0.4 * right_height) new_right_point1 = (new_right_x1, new_right_y1) box1 = (new_left_point, new_left_point1) #左上和右下角坐标 box2 = (new_right_point, new_right_point1) cv2.rectangle(img, box1[0], box1[1], (0, 0, 255), 2) cv2.rectangle(img, box2[0], box2[1], (0, 0, 255), 2) cv2.imwrite(os.path.join(save_path, os.path.basename(img_path)), img) return box1, box2, h_type elif (len(left) == 1): hand_type = h_type[0] if (hand_type == 'left'): left_width = right[0][0] - left[0][0] left_height = right[0][1] - left[0][1] #左手框左上坐标 if (int(left[0][0] - 0.3 * left_width) < 0): new_left_x = 0 else: new_left_x = int(left[0][0] - 0.3 * left_width) if (int(left[0][1] - 0.3 * left_height) < 0): new_left_y = 0 else: new_left_y = int(left[0][1] - 0.3 * left_height) new_left_point = (new_left_x, new_left_y) #左手框右下坐标 if (int(right[0][0] + 0.3 * left_width) > width): new_left_x1 = width else: new_left_x1 = int(right[0][0] + 0.3 * left_width) if (int(right[0][1] + 0.3 * left_height) > height): new_left_y1 = height else: new_left_y1 = int(right[0][1] + 0.3 * left_height) new_left_point1 = (new_left_x1, new_left_y1) box1 = (new_left_point, new_left_point1) cv2.rectangle(img, box1[0], box1[1], (0, 0, 255), 2) cv2.imwrite(os.path.join(save_path, os.path.basename(img_path)), img) return box1, hand_right, h_type else: right_width = right[0][0] - left[0][0] right_height = right[0][1] - left[0][1] #右手框左上坐标 if (int(left[0][0] - 0.3 * right_width) < 0): new_right_x = 0 else: new_right_x = int(left[0][0] - 0.3 * right_width) if (int(left[0][1] - 0.3 * right_height) < 0): new_right_y = 0 else: new_right_y = int(left[0][1] - 0.3 * right_height) new_right_point = (new_right_x, new_right_y) #右手框右下坐标 if (int(right[0][0] + 0.3 * right_width) > width): new_right_x1 = width else: new_right_x1 = int(right[0][0] + 0.3 * right_width) if (int(right[0][1] + 0.3 * right_height) > height): new_right_y1 = height else: new_right_y1 = int(right[0][1] + 0.3 * right_height) new_right_point1 = (new_right_x1, new_right_y1) box2 = (new_right_point, new_right_point1) cv2.rectangle(img, box2[0], box2[1], (0, 0, 255), 2) cv2.imwrite(os.path.join(save_path, os.path.basename(img_path)), img) return hand_left, box2, h_type else: return hand_left, hand_right, h_type
def detect(net, img_path, thresh, gt_path): img_name = img_path.split('/')[-1].split('.')[0] out_f = "{}/txt/{}.txt".format(args.save_dir, img_name) # if os.path.isfile(out_f): # print("exists.") # return fout = open(out_f, "w") img = Image.open(img_path) if img.mode == 'L': img = img.convert('RGB') img = np.array(img) height, width, _ = img.shape max_im_shrink = np.sqrt(1500 * 1000 / (img.shape[0] * img.shape[1])) image = cv2.resize(img, None, None, fx=max_im_shrink, fy=max_im_shrink, interpolation=cv2.INTER_LINEAR) x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() t1 = time.time() y = net(x) detections = y.data scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img = cv2.imread(img_path, cv2.IMREAD_COLOR) # plot ground truth gt = load_gt(gt_path) for box in gt: left_up = (box[0], box[1]) right_bottom = (box[2], box[3]) cv2.rectangle(img, left_up, right_bottom, (0, 255, 78), 1) for i in range(detections.size(1)): j = 0 while j < detections.size(2) and detections[0, i, j, 0] >= thresh: score = detections[0, i, j, 0] pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int) left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3]) j += 1 cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 1) conf = "{:.2f}".format(score) text_size, baseline = cv2.getTextSize(conf, cv2.FONT_HERSHEY_SIMPLEX, 0.3, 1) p1 = (left_up[0], left_up[1] - text_size[1]) fout.write('{} {} {} {} {}\n'.format(pt[0], pt[1], pt[2], pt[3], score)) cv2.rectangle(img, (p1[0] - 2 // 2, p1[1] - 2 - baseline), (p1[0] + text_size[0], p1[1] + text_size[1]), [255, 0, 0], -1) cv2.putText(img, conf, (p1[0], p1[1] + baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, 8) t2 = time.time() print('detect:{} timer:{}'.format(img_path, t2 - t1)) cv2.imwrite(os.path.join(args.save_dir, "png", os.path.basename(img_path)), img) fout.close()
def detect(net, img_path, thresh, imgName): img = Image.open(img_path) if img.mode == 'L': img = img.convert('RGB') img = np.array(img) height, width, _ = img.shape max_im_shrink = np.sqrt(1500 * 1000 / (img.shape[0] * img.shape[1])) image = cv2.resize(img, None, None, fx=max_im_shrink, fy=max_im_shrink, interpolation=cv2.INTER_LINEAR) x = to_chw_bgr(image) x = x.astype('float32') x -= np.array([103.939, 116.779, 123.68])[:, np.newaxis, np.newaxis].astype('float32') x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() t1 = time.time() y = net(x) detections = y.data scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) # print(detections) # print(detections.size()) img = cv2.imread(img_path, cv2.IMREAD_COLOR) f = open(os.path.join(args.res_dir, imgName.replace('png', 'txt')), "w") for i in range(detections.size(1)): j = 0 # while detections[0, i, j, 0] >= thresh: while ((j < detections.size(2)) and detections[0, i, j, 0] > thresh): pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int) left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3]) score = detections[0, i, j, 0] f.write("%f %f %f %f %f\n" % (pt[0], pt[1], pt[2], pt[3], score)) j += 1 f.close() """ for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= thresh: score = detections[0, i, j, 0] pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int) left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3]) j += 1 cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2) conf = "{:.2f}".format(score) text_size, baseline = cv2.getTextSize( conf, cv2.FONT_HERSHEY_SIMPLEX, 0.3, 1) p1 = (left_up[0], left_up[1] - text_size[1]) cv2.rectangle(img, (p1[0] - 2 // 2, p1[1] - 2 - baseline), (p1[0] + text_size[0], p1[1] + text_size[1]),[255,0,0], -1) cv2.putText(img, conf, (p1[0], p1[ 1] + baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, 8) """ t2 = time.time() print('detect:{} timer:{}'.format(img_path, t2 - t1))
def test_net(save_folder, net, dataset, thresh=0.05): num_images = len(dataset) all_boxes = [[[] for _ in range(num_images)] for _ in range(2)] _t = {'im_detect': Timer(), 'misc': Timer()} output_dir = get_output_dir(os.path.join(save_folder, 'sfd_hand'), set_type) det_file = os.path.join(output_dir, 'detections.pkl') for i in range(num_images): img = dataset.pull_image(i) h, w, _ = img.shape shrink = np.sqrt(1700 * 1200 / (img.shape[0] * img.shape[1])) image = cv2.resize(img, None, None, fx=shrink, fy=shrink, interpolation=cv2.INTER_LINEAR) x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() _t['im_detect'].tic() detections = net(x).data detect_time = _t['im_detect'].toc(average=False) for j in range(1, detections.size(1)): dets = detections[0, j, :] mask = dets[:, 0].gt(thresh).expand(5, dets.size(0)).t() dets = torch.masked_select(dets, mask).view(-1, 5) if dets.dim() == 0: continue boxes = dets[:, 1:] boxes[:, 0] *= w boxes[:, 2] *= w boxes[:, 1] *= h boxes[:, 3] *= h scores = dets[:, 0].cpu().numpy() cls_dets = np.hstack( (boxes.cpu().numpy(), scores[:, np.newaxis])).astype(np.float32, copy=False) all_boxes[j][i] = cls_dets fin_mask = np.where(scores > 0.6)[0] bboxes = boxes.cpu().numpy()[fin_mask] scores = scores[fin_mask] for k in range(len(scores)): leftup = (int(bboxes[k][0]), int(bboxes[k][1])) right_bottom = (int(bboxes[k][2]), int(bboxes[k][3])) cv2.rectangle(img, leftup, right_bottom, (0, 255, 0), 2) save_file = os.path.join(output_dir, '{}.jpg'.format(i + 1)) cv2.imwrite(save_file, img) print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images, detect_time)) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') evaluate_detections(all_boxes, output_dir, dataset)
def detect(net, img_path, thresh, use_cuda): img = Image.open(img_path) if img.mode == 'L': img = img.convert('RGB') img = np.array(img) height, width, _ = img.shape print(height, width) #max_im_shrink = np.sqrt(1700 * 1200 / (img.shape[0] * img.shape[1])) max_im_shrink = 1.5 image = cv2.resize(img, None, None, fx=max_im_shrink, fy=max_im_shrink, interpolation=cv2.INTER_LINEAR ) #这种是对原图像width,height进行缩放,fx,fy表示x和y轴的缩放比例 #image = cv2.resize(img, (640, 640)) x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() t1 = time.time() y = net(x) detections = y.data scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img = cv2.imread(img_path, cv2.IMREAD_COLOR) img1 = img.copy() left = [] right = [] scores = [] h_type = [] num_class = [] num_class.append('left') num_class.append('right') #i=0的时候检测到的是background class #i=1是左手,2是右手 for i in range(1, detections.size(1)): j = 0 print('the pro of {} is {}'.format(num_class[i - 1], detections[0, i, j, 0])) #detections[0, i, j, 0]为检测到的手的box的概率,j表示有多少个框(box) while detections[0, i, j, 0] >= 0.7: scores.append((detections[0, i, j, 0], j)) j += 1 #print(scores) if len(scores) > 0: scores = sorted(scores, key=lambda value: value[0], reverse=True) #以list中tuple的第一个元素进行降序排列 #sorted()函数不改变原list,sort()函数会改变,默认升序,reverse=True实现降序 num = 0 for t_index, score in enumerate(scores): if num > 0: #检测到的包含手的概率最高的两个框,因为有时候会将一些没有包含手的信息误认为手从而检测 break index = int(score[1]) score = score[0] pt = (detections[0, i, index, 1:] * scale).cpu().numpy() left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3]) #j += 1 cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2) #左上和右下角坐标 left.append(left_up) right.append(right_bottom) conf = "{:.3f}".format(score) #保留3位小数 point = (int(left_up[0]), int(left_up[1] - 5)) point1 = (int(right_bottom[0] - 20), int(left_up[1] - 5)) cv2.putText(img, conf, point, cv2.FONT_HERSHEY_COMPLEX, 0.6, (0, 0, 255), 2) cv2.putText(img, str(num_class[i - 1]), point1, cv2.FONT_HERSHEY_COMPLEX, 0.7, (0, 0, 255), 2) num += 1 cv2.imwrite( os.path.join( '/home/lj/cy/openpose/piano/test_piano/image/point_dir', os.path.basename(img_path)), img) #else: #continue #对于for..else循环,执行完里面的for循环后再执行else,continue结束当前for循环(外面的),执行下次for循环 #break hand_left = None hand_right = None #(检测到两只手时,根据框的位置判断是左手还是右手),如果两手交叉是不是会出现问题 if (len(left) == 2): if left[0][0] < left[1][0]: left_point = left[0] else: left_point = left[1] for i in range(len(left)): if left_point == left[i]: left_axis = i else: right_axis = i left_width = right[left_axis][0] - left_point[0] left_height = right[left_axis][1] - left_point[1] #print('坐标是({},{})'.format(left_width,left_height)) #左上坐标(将检测到的框进行扩充,尽量使得手在中心位置,便于检测关键点) if (int(left_point[0] - 0.3 * left_width) < 0): new_left_x = 0 else: new_left_x = int(left_point[0] - 0.3 * left_width) if (int(left_point[1] - 0.3 * left_height) < 0): new_left_y = 0 else: new_left_y = int(left_point[1] - 0.3 * left_height) new_left_point = (new_left_x, new_left_y) #左下坐标 #一开始左手和右手的框就有重叠,右边框就不扩充了 if not (int(right[left_axis][0] > int(left[right_axis][0]))): if (int(right[left_axis][0] + 0.3 * left_width) > int( left[right_axis][0])): new_left_x1 = int(left[right_axis][0]) else: new_left_x1 = int(right[left_axis][0] + 0.3 * left_width) else: new_left_x1 = int(right[left_axis][0]) if (int(right[left_axis][1] + 0.3 * left_height) > height): new_left_y1 = height else: new_left_y1 = int(right[left_axis][1] + 0.3 * left_height) new_left_point1 = (new_left_x1, new_left_y1) #得到右手的框 right_point = left[right_axis] #左上角坐标 right_width = right[right_axis][0] - right_point[0] right_height = right[right_axis][1] - right_point[1] #print('坐标是({},{})'.format(right_width,right_height)) #一开始左手和右手的框就有重叠,左边框就不扩充了 if not (int(right_point[0]) < int(right[left_axis][0])): if (int(right_point[0] - 0.3 * right_width) < int( right[left_axis][0])): new_right_x = int(right[left_axis][0]) else: new_right_x = int(right_point[0] - 0.3 * right_width) else: new_right_x = int(right_point[0]) if (int(right_point[1] - 0.4 * right_height) < 0): new_right_y = 0 else: new_right_y = int(right_point[1] - 0.4 * right_height) new_right_point = (new_right_x, new_right_y) #右下坐标 if (int(right[right_axis][0] + 0.3 * right_width) > width): new_right_x1 = width else: new_right_x1 = int(right[right_axis][0] + 0.3 * right_width) if (int(right[right_axis][1] + 0.4 * right_height) > height): new_right_y1 = height else: new_right_y1 = int(right[right_axis][1] + 0.4 * right_height) new_right_point1 = (new_right_x1, new_right_y1) box1 = (new_left_point, new_left_point1) #左上和右下角坐标 box2 = (new_right_point, new_right_point1) t2 = time.time() #print('detect:{} timer:{}'.format(img_path, t2 - t1)) #cropImg_right=img1[new_right_point[1]:new_right_point1[1],new_right_point[0]:new_right_point1[0]] #cv2.imwrite(os.path.join('.', os.path.basename(img_path)), cropImg_right) return box1, box2 else: return hand_left, hand_right
def detect(net, img_path, thresh): #img = cv2.imread(img_path, cv2.IMREAD_COLOR) img = Image.open(img_path) if img.mode == 'L': img = img.convert('RGB') img = np.array(img) height, width, _ = img.shape max_im_shrink = np.sqrt(1700 * 1200 / (img.shape[0] * img.shape[1])) image = cv2.resize(img, None, None, fx=max_im_shrink, fy=max_im_shrink, interpolation=cv2.INTER_LINEAR) #image = cv2.resize(img, (640, 640)) x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0), requires_grad=False) print(x.shape) if use_cuda: x = x.cuda() t1 = time.time() y = net(x) detections = y.data #torch.size:[1,2,750,5] scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) #original img shape img = cv2.imread(img_path, cv2.IMREAD_COLOR) # for i in range(detections.size(1)): # j = 0 # while detections[0, i, j, 0] >= thresh: #5iterms the first is the score # score = detections[0, i, j, 0] # pt = (detections[0, i, j, 1:] * scale).cpu().numpy() #left up x,left up y ,right bottom x ,right bottom y # left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3]) # j += 1 # cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2) # conf = "{:.3f}".format(score) # point = (int(left_up[0]), int(left_up[1] - 5)) # #cv2.putText(img, conf, point, cv2.FONT_HERSHEY_COMPLEX, # # 0.6, (0, 255, 0), 1) dclone = [] for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= thresh: #5iterms the first is the score score = detections[0, i, j, 0] pt = (detections[0, i, j, 1:] * scale).cpu().numpy( ) #left up x,left up y ,right bottom x ,right bottom y dclone.append([pt[0], pt[1], pt[2], pt[3], score]) j += 1 dclone = np.array(dclone) dclone = dclone[[my_nms(np.array(dclone), 0.3)]] for i in range(dclone.shape[0]): pt = dclone[i] left_up, right_bottom = (int(pt[0]), int(pt[1])), (int(pt[2]), int(pt[3])) cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2) t2 = time.time() print('detect:{} timer:{}'.format(img_path, t2 - t1)) cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), img)
def detect(net, im, thresh): #img = Image.open(img_path) img = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB)) if img.mode == 'L': img = img.convert('RGB') img = np.array(img) height, width, _ = img.shape max_im_shrink = np.sqrt(1200 * 1000 / (img.shape[0] * img.shape[1])) image = cv2.resize(img, None, None, fx=max_im_shrink, fy=max_im_shrink, interpolation=cv2.INTER_LINEAR) #image = cv2.resize(img,(224,),interpolation=cv2.INTER_LINEAR) x = to_chw_bgr(image) x = x.astype('float32') x -= cfg.img_mean x = x[[2, 1, 0], :, :] x = Variable(torch.from_numpy(x).unsqueeze(0)) if use_cuda: x = x.cuda() #t1 = time.time() y = net(x) detections = y.data scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) #img = cv2.imread(img_path, cv2.IMREAD_COLOR) mask_img = np.ones(im.shape, np.int8) kernel_size = 15 blur_img = cv2.blur(im, (kernel_size, kernel_size)) for i in range(detections.size(1)): j = 0 while detections[0, i, j, 0] >= thresh: score = detections[0, i, j, 0] pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int) #left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3]) j += 1 #cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2) #conf = "{:.2f}".format(score) #text_size, baseline = cv2.getTextSize( # conf, cv2.FONT_HERSHEY_SIMPLEX, 0.3, 1) #p1 = (left_up[0], left_up[1] - text_size[1]) #cv2.rectangle(img, (p1[0] - 2 // 2, p1[1] - 2 - baseline), # (p1[0] + text_size[0], p1[1] + text_size[1]),[255,0,0], -1) #cv2.putText(img, conf, (p1[0], p1[ # 1] + baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, 8) x, y, w, h = pt[0], pt[1], pt[2] - pt[0], pt[3] - pt[1] mask = generate_mask(im.shape[0], im.shape[1], max(w, h) / 2, x + w / 2, y + h / 2) mask_img[mask] = [0, 0, 0] mask_img_verse = np.ones(img.shape, np.int8) - mask_img result_img = mask_img * im + mask_img_verse * blur_img #t2 = time.time() #print('detect:{} timer:{}'.format(img_path, t2 - t1)) #cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), result_img) return result_img