Exemple #1
0
def detect(net, img_path, thresh, wid, hei):
    #img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = Image.open(img_path)
    if img.mode == 'L':
        img = img.convert('RGB')

    img = np.array(img)
    height, width, _ = img.shape

    if wid > 0 and hei > 0:
        image = cv2.resize(img, (wid, hei))
    else:
        max_im_shrink = np.sqrt(1700 * 1200 / (img.shape[0] * img.shape[1]))
        image = cv2.resize(img,
                           None,
                           None,
                           fx=max_im_shrink,
                           fy=max_im_shrink,
                           interpolation=cv2.INTER_LINEAR)

    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    if use_cuda:
        x = x.cuda()

    with torch.no_grad():

        t1 = time.time()

        y = net(x)
        detections = y.data
        scale = torch.Tensor(
            [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])

        img = cv2.imread(img_path, cv2.IMREAD_COLOR)

        for i in range(detections.size(1)):
            j = 0
            while detections[0, i, j, 0] >= thresh:
                score = detections[0, i, j, 0]
                pt = (detections[0, i, j, 1:] * scale).cpu().numpy()
                left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3])
                j += 1
                cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2)
                conf = "{:.3f}".format(score)
                point = (int(left_up[0]), int(left_up[1] - 5))
                # cv2.putText(img, conf, point, cv2.FONT_HERSHEY_COMPLEX,
                #            0.6, (0, 255, 0), 1)

        t2 = time.time()

    #print('detect:{} timer:{}'.format(img_path, t2 - t1))

    cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), img)

    return t2 - t1
def detect_face(net, img, thresh):
    height, width, _ = img.shape
    x = to_chw_bgr(img)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    if use_cuda:
        x = x.cuda()

    y = net(x)
    detections = y.data
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])

    bboxes = []
    for i in range(detections.size(1)):
        j = 0
        while detections[0, i, j, 0] >= thresh:
            box = []
            score = detections[0, i, j, 0]
            pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(np.int)
            j += 1
            box += [pt[0], pt[1], pt[2] - pt[0], pt[3] - pt[1], score]
            bboxes += [box]

    return bboxes
Exemple #3
0
def detect_face(net, img, shrink):
    if shrink != 1:
        img = cv2.resize(img,
                         None,
                         None,
                         fx=shrink,
                         fy=shrink,
                         interpolation=cv2.INTER_LINEAR)

    x = to_chw_bgr(img)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]

    x = Variable(torch.from_numpy(x).unsqueeze(0))

    if use_cuda:
        x = x.cuda()
    # print(x.size())
    y = net(x)
    detections = y.data
    detections = detections.cpu().numpy()

    det_conf = detections[0, 1, :, 0]
    det_xmin = img.shape[1] * detections[0, 1, :, 1] / shrink
    det_ymin = img.shape[0] * detections[0, 1, :, 2] / shrink
    det_xmax = img.shape[1] * detections[0, 1, :, 3] / shrink
    det_ymax = img.shape[0] * detections[0, 1, :, 4] / shrink
    det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf))

    keep_index = np.where(det[:, 4] >= args.thresh)[0]
    det = det[keep_index, :]

    return det
def detect_face(net, img, thresh):
    height, width, _ = img.shape
    im_shrink = 640.0 / max(height, width)
    image = cv2.resize(img, None, None, fx=im_shrink,
                       fy=im_shrink, interpolation=cv2.INTER_LINEAR).copy()

    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    if use_cuda:
        x = x.cuda()

    y = net(x)
    detections = y.data
    scale = torch.Tensor([img.shape[1], img.shape[0],
                          img.shape[1], img.shape[0]])

    bboxes = []
    for i in range(detections.size(1)):
        j = 0
        while detections[0, i, j, 0] >= thresh:
            box = []
            score = detections[0, i, j, 0]
            pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(np.int)
            j += 1
            box += [pt[0], pt[1], pt[2], pt[3], score]
            box[1] += 0.2 * (box[3] - box[1] + 1)
            bboxes += [box]

    return bboxes
Exemple #5
0
def detect(net, img_path, thresh):
    img = Image.open(img_path)
    if img.mode == 'L':
        img = img.convert('RGB')

    img = np.array(img)
    height, width, _ = img.shape
    max_im_shrink = np.sqrt(2000 * 2000 / (img.shape[0] * img.shape[1]))
    image = cv2.resize(img,
                       None,
                       None,
                       fx=max_im_shrink,
                       fy=max_im_shrink,
                       interpolation=cv2.INTER_LINEAR)

    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]
    # x = x * cfg.scale

    # scaling
    if cfg.rescale:
        x = 1 / 255. * (x + 127.5)

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    if use_cuda:
        x = x.cuda()
    t1 = time.time()
    y = net(x)
    detections = y.data
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])

    detect_results = []

    for i in range(detections.size(1)):
        j = 0
        while j < detections.size(2) and detections[0, i, j, 0] >= thresh:
            score = detections[0, i, j, 0]
            pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int)
            detect_results.append([pt[0], pt[1], pt[2], pt[3], float(score)])
            j += 1

    detect_results = np.array(detect_results)

    if len(detect_results.shape) == 1:
        return np.array([])

    order = detect_results[:, 4].ravel().argsort()[::-1]
    det = detect_results[order, :]

    t2 = time.time()
    print('detect:{} timer:{}'.format(img_path, t2 - t1))

    return det
Exemple #6
0
def detect(net, img_path, thresh):
    img = Image.open(img_path)
    img = img.convert('RGB')
    img = np.array(img)
    height, width, _ = img.shape

    if os.environ['IS_QVGA_MONO'] == '1':
        max_im_shrink = np.sqrt(320 * 240 / (img.shape[0] * img.shape[1]))
    else:
        max_im_shrink = np.sqrt(640 * 480 / (img.shape[0] * img.shape[1]))

    image = cv2.resize(img,
                       None,
                       None,
                       fx=max_im_shrink,
                       fy=max_im_shrink,
                       interpolation=cv2.INTER_LINEAR)
    # img = cv2.resize(img, (640, 640))
    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]

    if cfg.IS_MONOCHROME == True:
        x = 0.299 * x[0] + 0.587 * x[1] + 0.114 * x[2]
        x = torch.from_numpy(x).unsqueeze(0).unsqueeze(0)
    else:
        x = torch.from_numpy(x).unsqueeze(0)
    if use_cuda:
        x = x.cuda()
    t1 = time.time()
    y = net(x)
    detections = y.data
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])

    img = cv2.imread(img_path, cv2.IMREAD_COLOR)

    for i in range(detections.size(1)):
        j = 0
        while detections[0, i, j, 0] >= thresh:
            score = detections[0, i, j, 0]
            pt = (detections[0, i, j, 1:] * scale).cpu().numpy()
            left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3])
            j += 1
            cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2)
            conf = "{:.3f}".format(score)
            point = (int(left_up[0]), int(left_up[1] - 5))
            cv2.putText(img, conf, point, cv2.FONT_HERSHEY_COMPLEX, 0.6,
                        (0, 255, 0), 1)

    t2 = time.time()
    print('detect:{} timer:{}'.format(img_path, t2 - t1))

    cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), img)
Exemple #7
0
def detect(net, img_path, thresh):
    img = Image.open(img_path)
    if img.mode == 'L':
        img = img.convert('RGB')

    img = np.array(img)
    height, width, _ = img.shape
    max_im_shrink = np.sqrt(1500 * 1000 / (img.shape[0] * img.shape[1]))
    image = cv2.resize(img,
                       None,
                       None,
                       fx=max_im_shrink,
                       fy=max_im_shrink,
                       interpolation=cv2.INTER_LINEAR)

    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    if use_cuda:
        x = x.cuda()
    t1 = time.time()
    y = net(x)
    detections = y.data
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])

    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    cnt_faces = 0
    for i in range(detections.size(1)):
        j = 0
        while detections[0, i, j, 0] >= thresh:
            cnt_faces += 1
            score = detections[0, i, j, 0]
            pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int)
            left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3])
            j += 1
            cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2)
            conf = "{:.2f}".format(score)
            text_size, baseline = cv2.getTextSize(conf,
                                                  cv2.FONT_HERSHEY_SIMPLEX,
                                                  0.3, 1)
            p1 = (left_up[0], left_up[1] - text_size[1])
            cv2.rectangle(img, (p1[0] - 2 // 2, p1[1] - 2 - baseline),
                          (p1[0] + text_size[0], p1[1] + text_size[1]),
                          [255, 0, 0], -1)
            cv2.putText(img, conf, (p1[0], p1[1] + baseline),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, 8)

    t2 = time.time()
    print('detect:{}, num_faces:{}, timer:{}'.format(img_path, cnt_faces,
                                                     t2 - t1))
    cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), img)
def detect(net, img_path, thresh):
    img = Image.open(img_path)
    if img.mode == 'L':
        img = img.convert('RGB')

    img = np.array(img)
    height, width, _ = img.shape
    max_im_shrink = np.sqrt(
        1500 * 1000 / (img.shape[0] * img.shape[1]))
    image = cv2.resize(img, None, None, fx=max_im_shrink,
                       fy=max_im_shrink, interpolation=cv2.INTER_LINEAR)

    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    if use_cuda:
        x = x.cuda()
    t1 = time.time()
    y = net(x)
    detections = y.data
    scale = torch.Tensor([img.shape[1], img.shape[0],
                          img.shape[1], img.shape[0]])

    # 图片类型
    img_type = img_path.split('.')[-1]
    # 原图
    origin_img = Image.open(img_path)

    for i in range(detections.size(1)):
        j = 0
        while detections[0, i, j, 0] >= thresh:
            # score = detections[0, i, j, 0]
            pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int)
            # left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3])
            xmin, ymin, xmax, ymax = pt[0], pt[1], pt[2], pt[3]
            xmin = max(0, xmin-5)
            ymin = max(0, ymin-5)
            xmax = min(width, xmax+5)
            ymax = min(height, ymax+5)
            crop_img = origin_img.crop((xmin, ymin, xmax, ymax))
            crop_img = crop_img.convert('RGB')
            global name_count
            name_count += 1
            filename = (name_ss + str(name_count))[-6:]
            print(args.save_dir + filename + img_type)
            crop_img.save(args.save_dir + filename + '.' + img_type)
            j += 1
Exemple #9
0
def detect(net, img_path, thresh, save_crops):
    img = Image.open(img_path)
    #orig_img = img
    #if img.mode == 'L':
    #   img = img.convert('RGB')
    #orig_img = np.array(orig_img)
    img = np.array(img)
    height, width, _ = img.shape
    max_im_shrink = np.sqrt(1200 * 1100 / (img.shape[0] * img.shape[1]))
    image = cv2.resize(img,
                       None,
                       None,
                       fx=max_im_shrink,
                       fy=max_im_shrink,
                       interpolation=cv2.INTER_LINEAR)

    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]
    #x = x * cfg.scale

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    if use_cuda:
        x = x.cuda()
    t1 = time.time()
    y = net(x)
    detections = y.data
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])

    for i in range(detections.size(1)):
        j = 0
        image_filename = os.path.join(args.save_dir,
                                      os.path.basename(img_path))
        while detections[0, i, j, 0] >= thresh:
            score = detections[0, i, j, 0]
            pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int)
            left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3])
            j += 1

            if save_crops:
                # increase bbox by 10% on all sides
                face_width = pt[3] - pt[1]
                ten_percent_width = int(0.1 * face_width)
                face_height = pt[2] - pt[0]
                ten_percent_height = int(0.1 * face_height)

                # save cropped face images as face0-imagename, face1-imagename, etc.
                face = img[pt[1] - ten_percent_width:pt[3] + ten_percent_width,
                           pt[0] - ten_percent_height:pt[2] +
                           ten_percent_height]
                cv2.imwrite(
                    os.path.join(
                        args.save_dir,
                        'face{}-'.format(j) + os.path.basename(img_path)),
                    cv2.cvtColor(face, cv2.COLOR_RGB2BGR))

            cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2)

            conf = "{:.2f}".format(score)
            text_size, baseline = cv2.getTextSize(conf,
                                                  cv2.FONT_HERSHEY_SIMPLEX,
                                                  0.3, 1)
            p1 = (left_up[0], left_up[1] - text_size[1])
            cv2.rectangle(img, (p1[0] - 2 // 2, p1[1] - 2 - baseline),
                          (p1[0] + text_size[0], p1[1] + text_size[1]),
                          [255, 0, 0], -1)
            cv2.putText(img, conf, (p1[0], p1[1] + baseline),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, 8)

    t2 = time.time()
    print('detect:{} timer:{}'.format(img_path, t2 - t1))

    cv2.imwrite(image_filename, cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
Exemple #10
0
def detect(net, im, thresh, pre_bbox):
    #img = Image.open(img_path)

    img = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
    if img.mode == 'L':
        img = img.convert('RGB')

    img = np.array(img)
    height, width, _ = img.shape

    max_im_shrink = np.sqrt(1200 * 1000 / (img.shape[0] * img.shape[1]))
    image = cv2.resize(img,
                       None,
                       None,
                       fx=max_im_shrink,
                       fy=max_im_shrink,
                       interpolation=cv2.INTER_LINEAR)
    #image = cv2.resize(img,(224,),interpolation=cv2.INTER_LINEAR)
    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    if use_cuda:
        x = x.cuda()
    #t1 = time.time()
    y = net(x)
    detections = y.data
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])

    #img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    mask_img = np.ones(im.shape, np.int8)
    kernel_size = 15
    blur_img = cv2.blur(im, (kernel_size, kernel_size))

    now_bbox = []
    for i in range(detections.size(1)):
        j = 0
        while detections[0, i, j, 0] >= thresh:
            score = detections[0, i, j, 0]
            pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int)
            now_bbox.append(pt)
            j += 1
            x, y, w, h = pt[0], pt[1], pt[2] - pt[0], pt[3] - pt[1]
            mask = generate_mask(im.shape[0], im.shape[1],
                                 max(w, h) / 2, x + w / 2, y + h / 2)
            mask_img[mask] = [0, 0, 0]

    add_bbox = find_no_match_box(pre_bbox, now_bbox)
    for bbox in add_bbox:
        score = bbox[0, i, j, 0]
        pt = bbox[0, i, j, 1:] * scale.numpy()
        j += 1
        x, y, w, h = pt[0], pt[1], pt[2] - pt[0], pt[3] - pt[1]
        mask = generate_mask(im.shape[0], im.shape[1],
                             max(w, h) / 2, x + w / 2, y + h / 2)
        mask_img[mask] = [0, 0, 0]
    now_bbox += add_bbox
    mask_img_verse = np.ones(img.shape, np.int8) - mask_img
    result_img = mask_img * im + mask_img_verse * blur_img
    #t2 = time.time()
    #print('detect:{} timer:{}'.format(img_path, t2 - t1))

    #cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), result_img)
    return result_img, now_bbox
def detect(net, img_path, thresh,use_cuda):
    img = Image.open(img_path)
    if img.mode == 'L':
        img = img.convert('RGB')
    img = np.array(img)
    height, width, _ = img.shape
    #max_im_shrink = np.sqrt(1400 * 1200 / (img.shape[0] * img.shape[1]))
    max_im_shrink=1.8
    image = cv2.resize(img, None, None, fx=max_im_shrink,fy=max_im_shrink, interpolation=cv2.INTER_LINEAR)    #这种是对原图像width,height进行缩放,fx,fy表示x和y轴的缩放比例
    #image = cv2.resize(img, (800, 600))
    
    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    if use_cuda:
        x = x.cuda()
    t1 = time.time()
    y = net(x) 
    detections = y.data
    scale = torch.Tensor([img.shape[1], img.shape[0],
                          img.shape[1], img.shape[0]])

    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img1=img.copy()
    left=[]
    right=[]
    score=[]
    for i in range(detections.size(1)):     
        j = 0
        #print(detections[0, i, j, 0])
        while detections[0, i, j, 0] >= 0.8:
            score.append((detections[0, i, j, 0],j))
            j += 1
        if len(score)>0:
            score=sorted(score,key=lambda value:value[0],reverse=True)  #以list中tuple的第一个元素进行降序排列
            #sorted()函数不改变原list,sort()函数会改变,默认升序,reverse=True实现降序
        num=0
        for score in score:    
            if num>1:         #检测到的包含手的概率最高的两个框
                break
            index=int(score[1])    
            score=score[0]

            pt = (detections[0, i, index, 1:] * scale).cpu().numpy()
            left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3])
            #j += 1
            cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2)   #左上和右下角坐标
            left.append(left_up)
            right.append(right_bottom)
            conf = "{:.3f}".format(score)    #保留3位小数
            point = (int(left_up[0]), int(left_up[1] - 5))
            cv2.putText(img, conf, point, cv2.FONT_HERSHEY_COMPLEX,0.6, (0, 255, 0), 1)
            num+=1
        #else:
            #continue  #对于for..else循环,执行完里面的for循环后再执行else,continue结束当前for循环(外面的),执行下次for循环
        #break

    hand_left=None
    hand_right=None
    #得到左手的框
    if (len(left)==2):
        if left[0][0]<left[1][0]:
            left_point=left[0]
        else:
            left_point=left[1]
        for i in range(len(left)):
            if left_point==left[i]:
                left_axis=i
            else:
                right_axis=i

        left_width=right[left_axis][0]-left_point[0]
        left_height=right[left_axis][1]-left_point[1]
        #print('坐标是({},{})'.format(left_width,left_height))
        #左上坐标
        if(int(left_point[0]-0.3*left_width)<0):
            new_left_x=0
        else:
            new_left_x=int(left_point[0]-0.3*left_width)
        if(int(left_point[1]-0.3*left_height)<0):
            new_left_y=0
        else:
            new_left_y=int(left_point[1]-0.3*left_height)
        new_left_point=(new_left_x,new_left_y)  
        #左下坐标
        #一开始左手和右手的框就有重叠,右边框就不扩充了
        if not(int(right[left_axis][0]>int(left[right_axis][0]))):
            if(int(right[left_axis][0]+0.3*left_width)>int(left[right_axis][0])):
                new_left_x1=int(left[right_axis][0])
            else:
                new_left_x1=int(right[left_axis][0]+0.3*left_width)
        else:
            new_left_x1=int(right[left_axis][0])
            
        if(int(right[left_axis][1]+0.3*left_height)>height):
            new_left_y1=height
        else:
            new_left_y1=int(right[left_axis][1]+0.3*left_height)
        new_left_point1=(new_left_x1,new_left_y1)
        #得到右手的框
        right_point=left[right_axis]   #左上角坐标
        right_width=right[right_axis][0]-right_point[0]
        right_height=right[right_axis][1]-right_point[1]
        #print('坐标是({},{})'.format(right_width,right_height))
        #一开始左手和右手的框就有重叠,左边框就不扩充了
        if not(int(right_point[0])<int(right[left_axis][0])):
            if(int(right_point[0]-0.3*right_width)<int(right[left_axis][0])):
                new_right_x=int(right[left_axis][0])
            else:
                new_right_x=int(right_point[0]-0.3*right_width)
        else:
            new_right_x=int(right_point[0])

        if(int(right_point[1]-0.4*right_height)<0):
            new_right_y=0
        else:
            new_right_y=int(right_point[1]-0.4*right_height)
        new_right_point=(new_right_x,new_right_y)  
        #右下坐标
        if(int(right[right_axis][0]+0.3*right_width)>width):
            new_right_x1=width
        else:
            new_right_x1=int(right[right_axis][0]+0.3*right_width)
        if(int(right[right_axis][1]+0.4*right_height)>height):
            new_right_y1=height
        else:
            new_right_y1=int(right[right_axis][1]+0.4*right_height)
        new_right_point1=(new_right_x1,new_right_y1)
        box1=(new_left_point,new_left_point1)
        box2=(new_right_point,new_right_point1)
        t2 = time.time()
        #print('detect:{} timer:{}'.format(img_path, t2 - t1))
        #cropImg_right=img1[new_right_point[1]:new_right_point1[1],new_right_point[0]:new_right_point1[0]]
        #cv2.imwrite(os.path.join('.', os.path.basename(img_path)), cropImg_right)
        return box1,box2
    elif len(left==1):
Exemple #12
0
def saveTracesNpy(net, img_list):
    if os.path.isdir(args.save_traces_npy_dir) is False:
        try:
            os.mkdir(args.save_traces_npy_dir)
        except OSError:
            print("Creation of the directory %s failed" %
                  args.save_traces_npy_dir)
            return

    if os.path.isdir(os.path.join(args.save_traces_npy_dir,
                                  'inputs')) is False:
        try:
            os.mkdir(os.path.join(args.save_traces_npy_dir, 'inputs'))
        except OSError:
            print("Creation of the directory %s failed" %
                  os.path.join(args.save_traces_npy_dir, 'inputs'))
            return

    if os.path.isdir(os.path.join(args.save_traces_npy_dir,
                                  'outputs')) is False:
        try:
            os.mkdir(os.path.join(args.save_traces_npy_dir, 'outputs'))
        except OSError:
            print("Creation of the directory %s failed" %
                  os.path.join(args.save_traces_npy_dir, 'outputs'))
            return

    inputDims = net.rnn_model.inputDims
    nRows = net.rnn_model.nRows
    nCols = net.rnn_model.nCols
    count = 0
    for img_path in img_list:
        img = Image.open(os.path.join(args.image_folder, img_path))

        img = img.convert('RGB')

        img = np.array(img)
        max_im_shrink = np.sqrt(640 * 480 / (img.shape[0] * img.shape[1]))
        image = cv2.resize(img,
                           None,
                           None,
                           fx=max_im_shrink,
                           fy=max_im_shrink,
                           interpolation=cv2.INTER_LINEAR)

        x = to_chw_bgr(image)
        x = x.astype('float32')
        x -= cfg.img_mean
        x = x[[2, 1, 0], :, :]

        x = Variable(torch.from_numpy(x).unsqueeze(0))
        if use_cuda:
            x = x.cuda()
        t1 = time.time()
        y = net(x)

        patches = activation['prepatch']
        patches = torch.cat(torch.unbind(patches, dim=2), dim=0)
        patches = torch.reshape(patches, (-1, inputDims, nRows, nCols))

        rnnX = activation['rnn_model']

        patches_all = torch.stack(torch.split(patches,
                                              split_size_or_sections=1,
                                              dim=0),
                                  dim=-1)
        rnnX_all = torch.stack(torch.split(rnnX,
                                           split_size_or_sections=1,
                                           dim=0),
                               dim=-1)

        for k in range(patches_all.shape[-1]):
            patches_tosave = patches_all[0, :, :, :,
                                         k].cpu().numpy().transpose(1, 2, 0)
            rnnX_tosave = rnnX_all[0, :, k].cpu().numpy()
            np.save(
                args.save_traces_npy_dir + '/inputs/trace_' + str(count) +
                '_' + str(k) + '.npy', patches_tosave)
            np.save(
                args.save_traces_npy_dir + '/outputs/trace_' + str(count) +
                '_' + str(k) + '.npy', rnnX_tosave)

        count += 1
Exemple #13
0
def detect_hand(net, img_path, thresh, use_cuda, save_path):
    if not os.path.exists(save_path):
        os.mkdir(save_path)
    img = Image.open(img_path)
    if img.mode == 'L':
        img = img.convert('RGB')
    img = np.array(img)  #--np.array()类型才有shape函数使用
    height, width, _ = img.shape

    Rect = (32, 499, 1260, 639)
    img = process_img(img, Rect)  #---将钢琴键盘上方的像素处理一下,

    #print(height,width)
    #max_im_shrink = np.sqrt(1700 * 1200 / (img.shape[0] * img.shape[1]))
    max_im_shrink = 1.5
    image = cv2.resize(img,
                       None,
                       None,
                       fx=max_im_shrink,
                       fy=max_im_shrink,
                       interpolation=cv2.INTER_LINEAR
                       )  #这种是对原图像width,height进行缩放,fx,fy表示x和y轴的缩放比例
    #image = cv2.resize(img, (640, 640))
    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    if use_cuda:
        x = x.cuda()
    t1 = time.time()
    y = net(x)
    detections = y.data
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])

    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img1 = img.copy()
    left = []
    right = []
    scores = []
    h_type = []
    num_class = []
    num_class.append('left')
    num_class.append('right')
    #i=0的时候检测到的是background class
    #i=1是左手,2是右手
    for i in range(1, detections.size(1)):
        j = 0
        print('the probability of {} is {}'.format(num_class[i - 1],
                                                   detections[0, i, j, 0]))
        #detections[0, i, j, 0]为检测到的手的box的概率,j表示有多少个框(box)
        while detections[0, i, j, 0] >= 0.2:
            scores.append((detections[0, i, j, 0], j))
            j += 1
            #print(scores)
        if len(scores) > 0:
            scores = sorted(scores, key=lambda value: value[0],
                            reverse=True)  #以list中tuple的第一个元素进行降序排列
            #sorted()函数不改变原list,sort()函数会改变,默认升序,reverse=True实现降序
        num = 0
        for score in scores:
            if num > 0:  #检测到的包含左/右手的概率最高的那个框
                break
            index = int(score[1])
            score = score[0]
            pt = (detections[0, i, index, 1:] * scale).cpu().numpy()
            left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3])
            #j += 1
            cv2.rectangle(img, left_up, right_bottom, (0, 0, 255),
                          2)  #左上和右下角坐标
            left.append(left_up)
            right.append(right_bottom)
            h_type.append(num_class[i - 1])
            conf = "{:.3f}".format(score)  #保留3位小数
            point = (int(left_up[0]), int(left_up[1] - 5))
            point1 = (int(right_bottom[0] - 20), int(left_up[1] - 5))
            cv2.putText(img, conf, point, cv2.FONT_HERSHEY_COMPLEX, 0.6,
                        (0, 0, 255), 2)
            cv2.putText(img, str(num_class[i - 1]), point1,
                        cv2.FONT_HERSHEY_COMPLEX, 0.7, (0, 0, 255), 2)
            num += 1
        #cv2.imwrite(os.path.join('/home/lj/cy/openpose/piano/test_piano/image/point_dir',os.path.basename(img_path)),img)
        #else:
        #continue  #对于for..else循环,执行完里面的for循环后再执行else,continue结束当前for循环(外面的),执行下次for循环
        #break

    #双手覆盖的问题先不考虑,比较复杂,看其他的
    hand_left = None
    hand_right = None
    #h_type = None
    #再考虑一个手的情况兄弟
    if (len(left) == 2):
        #下面是假设左手一直在左边,在右手的左边,没有考虑左手在右边(两手交叉),框扩充的时候需要变一下
        #而且这种两个手是假定弹钢琴是横着的情况,如果是竖着的话坐标轴变了,判断两个框是否重合的时候进行比较是不一样的
        for i, left1 in enumerate(left):
            hand_type = h_type[i]
            if (hand_type == 'left'):

                left_width = right[i][0] - left1[0]
                left_height = right[i][1] - left1[1]
                #左手框左上坐标(将检测到的框进行扩充,尽量使得手在中心位置,便于检测关键点) ,而且有时候检测到的手不完整
                if (int(left1[0] - 0.3 * left_width) < 0):
                    new_left_x = 0
                else:
                    new_left_x = int(left1[0] - 0.3 * left_width)
                if (int(left1[1] - 0.3 * left_height) < 0):
                    new_left_y = 0
                else:
                    new_left_y = int(left1[1] - 0.3 * left_height)
                new_left_point = (new_left_x, new_left_y)
                #左手框右下坐标
                #一开始左手和右手的框就有重叠,右边框就不扩充了,以防止框中包含两个手
                if (i == 0):
                    j = 1
                else:
                    j = 0
                if not (int(right[i][0] > int(left[j][0]))):  #左手框右下大于右手框左上
                    if (int(right[i][0] + 0.3 * left_width) > int(left[j][0])):
                        new_left_x1 = int(left[j][0])  #如果没重叠左手框右边界最多扩充到右手框的左边界
                    else:
                        new_left_x1 = int(right[i][0] + 0.3 * left_width)
                else:
                    new_left_x1 = int(right[i][0])

                if (int(right[i][1] + 0.3 * left_height) > height):
                    new_left_y1 = height
                else:
                    new_left_y1 = int(right[i][1] + 0.3 * left_height)
                new_left_point1 = (new_left_x1, new_left_y1)

            if (hand_type == 'right'):
                #right_point=left[i]    #左上角坐标
                right_width = right[i][0] - left1[0]
                right_height = right[i][1] - left1[1]
                if (i == 0):
                    j = 1
                else:
                    j = 0
            #右手框左上坐标
            #一开始左手和右手的框就有重叠,左边框就不扩充了
                if not (int(left1[0]) < int(right[j][0])):
                    if (int(left1[0] - 0.3 * right_width) < int(right[j][0])):
                        new_right_x = int(right[j][0])
                    else:
                        new_right_x = int(left1[0] - 0.3 * right_width)
                else:
                    new_right_x = int(left1[0])

                if (int(left1[1] - 0.4 * right_height) < 0):
                    new_right_y = 0
                else:
                    new_right_y = int(left1[1] - 0.4 * right_height)
                new_right_point = (new_right_x, new_right_y)
                #右手框右上坐标
                #右下坐标
                if (int(right[i][0] + 0.3 * right_width) > width):
                    new_right_x1 = width
                else:
                    new_right_x1 = int(right[i][0] + 0.3 * right_width)
                if (int(right[i][1] + 0.4 * right_height) > height):
                    new_right_y1 = height
                else:
                    new_right_y1 = int(right[i][1] + 0.4 * right_height)
                new_right_point1 = (new_right_x1, new_right_y1)
        box1 = (new_left_point, new_left_point1)  #左上和右下角坐标
        box2 = (new_right_point, new_right_point1)
        cv2.rectangle(img, box1[0], box1[1], (0, 0, 255), 2)
        cv2.rectangle(img, box2[0], box2[1], (0, 0, 255), 2)
        cv2.imwrite(os.path.join(save_path, os.path.basename(img_path)), img)
        return box1, box2, h_type

    elif (len(left) == 1):
        hand_type = h_type[0]
        if (hand_type == 'left'):

            left_width = right[0][0] - left[0][0]
            left_height = right[0][1] - left[0][1]
            #左手框左上坐标
            if (int(left[0][0] - 0.3 * left_width) < 0):
                new_left_x = 0
            else:
                new_left_x = int(left[0][0] - 0.3 * left_width)
            if (int(left[0][1] - 0.3 * left_height) < 0):
                new_left_y = 0
            else:
                new_left_y = int(left[0][1] - 0.3 * left_height)
            new_left_point = (new_left_x, new_left_y)

            #左手框右下坐标
            if (int(right[0][0] + 0.3 * left_width) > width):
                new_left_x1 = width
            else:
                new_left_x1 = int(right[0][0] + 0.3 * left_width)

            if (int(right[0][1] + 0.3 * left_height) > height):
                new_left_y1 = height
            else:
                new_left_y1 = int(right[0][1] + 0.3 * left_height)
            new_left_point1 = (new_left_x1, new_left_y1)
            box1 = (new_left_point, new_left_point1)
            cv2.rectangle(img, box1[0], box1[1], (0, 0, 255), 2)
            cv2.imwrite(os.path.join(save_path, os.path.basename(img_path)),
                        img)
            return box1, hand_right, h_type

        else:

            right_width = right[0][0] - left[0][0]
            right_height = right[0][1] - left[0][1]
            #右手框左上坐标
            if (int(left[0][0] - 0.3 * right_width) < 0):
                new_right_x = 0
            else:
                new_right_x = int(left[0][0] - 0.3 * right_width)
            if (int(left[0][1] - 0.3 * right_height) < 0):
                new_right_y = 0
            else:
                new_right_y = int(left[0][1] - 0.3 * right_height)
            new_right_point = (new_right_x, new_right_y)
            #右手框右下坐标
            if (int(right[0][0] + 0.3 * right_width) > width):
                new_right_x1 = width
            else:
                new_right_x1 = int(right[0][0] + 0.3 * right_width)

            if (int(right[0][1] + 0.3 * right_height) > height):
                new_right_y1 = height
            else:
                new_right_y1 = int(right[0][1] + 0.3 * right_height)
            new_right_point1 = (new_right_x1, new_right_y1)
            box2 = (new_right_point, new_right_point1)
            cv2.rectangle(img, box2[0], box2[1], (0, 0, 255), 2)
            cv2.imwrite(os.path.join(save_path, os.path.basename(img_path)),
                        img)
            return hand_left, box2, h_type
    else:
        return hand_left, hand_right, h_type
def detect(net, img_path, thresh, gt_path):
    img_name = img_path.split('/')[-1].split('.')[0]
    out_f = "{}/txt/{}.txt".format(args.save_dir, img_name)
    # if os.path.isfile(out_f):
    #     print("exists.")
    #     return
    fout = open(out_f, "w")

    img = Image.open(img_path)
    if img.mode == 'L':
        img = img.convert('RGB')

    img = np.array(img)

    height, width, _ = img.shape
    max_im_shrink = np.sqrt(1500 * 1000 / (img.shape[0] * img.shape[1]))
    image = cv2.resize(img,
                       None,
                       None,
                       fx=max_im_shrink,
                       fy=max_im_shrink,
                       interpolation=cv2.INTER_LINEAR)

    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    if use_cuda:
        x = x.cuda()
    t1 = time.time()
    y = net(x)
    detections = y.data
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])

    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    # plot ground truth
    gt = load_gt(gt_path)
    for box in gt:
        left_up = (box[0], box[1])
        right_bottom = (box[2], box[3])
        cv2.rectangle(img, left_up, right_bottom, (0, 255, 78), 1)

    for i in range(detections.size(1)):
        j = 0
        while j < detections.size(2) and detections[0, i, j, 0] >= thresh:
            score = detections[0, i, j, 0]
            pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int)
            left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3])
            j += 1
            cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 1)
            conf = "{:.2f}".format(score)
            text_size, baseline = cv2.getTextSize(conf,
                                                  cv2.FONT_HERSHEY_SIMPLEX,
                                                  0.3, 1)
            p1 = (left_up[0], left_up[1] - text_size[1])
            fout.write('{} {} {} {} {}\n'.format(pt[0], pt[1], pt[2], pt[3],
                                                 score))
            cv2.rectangle(img, (p1[0] - 2 // 2, p1[1] - 2 - baseline),
                          (p1[0] + text_size[0], p1[1] + text_size[1]),
                          [255, 0, 0], -1)
            cv2.putText(img, conf, (p1[0], p1[1] + baseline),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, 8)

    t2 = time.time()
    print('detect:{} timer:{}'.format(img_path, t2 - t1))

    cv2.imwrite(os.path.join(args.save_dir, "png", os.path.basename(img_path)),
                img)
    fout.close()
Exemple #15
0
def detect(net, img_path, thresh, imgName):
    img = Image.open(img_path)
    if img.mode == 'L':
        img = img.convert('RGB')

    img = np.array(img)
    height, width, _ = img.shape
    max_im_shrink = np.sqrt(1500 * 1000 / (img.shape[0] * img.shape[1]))
    image = cv2.resize(img,
                       None,
                       None,
                       fx=max_im_shrink,
                       fy=max_im_shrink,
                       interpolation=cv2.INTER_LINEAR)

    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= np.array([103.939, 116.779, 123.68])[:, np.newaxis,
                                              np.newaxis].astype('float32')
    x = x[[2, 1, 0], :, :]

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    if use_cuda:
        x = x.cuda()
    t1 = time.time()
    y = net(x)
    detections = y.data
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    # print(detections)
    # print(detections.size())
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    f = open(os.path.join(args.res_dir, imgName.replace('png', 'txt')), "w")
    for i in range(detections.size(1)):
        j = 0
        # while detections[0, i, j, 0] >= thresh:
        while ((j < detections.size(2)) and detections[0, i, j, 0] > thresh):
            pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int)
            left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3])
            score = detections[0, i, j, 0]
            f.write("%f %f %f %f %f\n" % (pt[0], pt[1], pt[2], pt[3], score))
            j += 1
    f.close()
    """
    for i in range(detections.size(1)):
        j = 0
        while detections[0, i, j, 0] >= thresh:
            score = detections[0, i, j, 0]
            pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int)
            left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3])
            j += 1
            cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2)
            conf = "{:.2f}".format(score)
            text_size, baseline = cv2.getTextSize(
                conf, cv2.FONT_HERSHEY_SIMPLEX, 0.3, 1)
            p1 = (left_up[0], left_up[1] - text_size[1])
            cv2.rectangle(img, (p1[0] - 2 // 2, p1[1] - 2 - baseline),
                          (p1[0] + text_size[0], p1[1] + text_size[1]),[255,0,0], -1)
            cv2.putText(img, conf, (p1[0], p1[
                            1] + baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, 8)
    """
    t2 = time.time()
    print('detect:{} timer:{}'.format(img_path, t2 - t1))
Exemple #16
0
def test_net(save_folder, net, dataset, thresh=0.05):
    num_images = len(dataset)

    all_boxes = [[[] for _ in range(num_images)] for _ in range(2)]
    _t = {'im_detect': Timer(), 'misc': Timer()}

    output_dir = get_output_dir(os.path.join(save_folder, 'sfd_hand'),
                                set_type)
    det_file = os.path.join(output_dir, 'detections.pkl')

    for i in range(num_images):
        img = dataset.pull_image(i)
        h, w, _ = img.shape
        shrink = np.sqrt(1700 * 1200 / (img.shape[0] * img.shape[1]))
        image = cv2.resize(img,
                           None,
                           None,
                           fx=shrink,
                           fy=shrink,
                           interpolation=cv2.INTER_LINEAR)

        x = to_chw_bgr(image)
        x = x.astype('float32')
        x -= cfg.img_mean
        x = x[[2, 1, 0], :, :]
        x = Variable(torch.from_numpy(x).unsqueeze(0))
        if use_cuda:
            x = x.cuda()
        _t['im_detect'].tic()
        detections = net(x).data
        detect_time = _t['im_detect'].toc(average=False)

        for j in range(1, detections.size(1)):
            dets = detections[0, j, :]
            mask = dets[:, 0].gt(thresh).expand(5, dets.size(0)).t()
            dets = torch.masked_select(dets, mask).view(-1, 5)
            if dets.dim() == 0:
                continue
            boxes = dets[:, 1:]
            boxes[:, 0] *= w
            boxes[:, 2] *= w
            boxes[:, 1] *= h
            boxes[:, 3] *= h
            scores = dets[:, 0].cpu().numpy()
            cls_dets = np.hstack(
                (boxes.cpu().numpy(), scores[:,
                                             np.newaxis])).astype(np.float32,
                                                                  copy=False)
            all_boxes[j][i] = cls_dets

            fin_mask = np.where(scores > 0.6)[0]
            bboxes = boxes.cpu().numpy()[fin_mask]
            scores = scores[fin_mask]
            for k in range(len(scores)):
                leftup = (int(bboxes[k][0]), int(bboxes[k][1]))
                right_bottom = (int(bboxes[k][2]), int(bboxes[k][3]))
                cv2.rectangle(img, leftup, right_bottom, (0, 255, 0), 2)

        save_file = os.path.join(output_dir, '{}.jpg'.format(i + 1))
        cv2.imwrite(save_file, img)

        print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images,
                                                    detect_time))

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('Evaluating detections')
    evaluate_detections(all_boxes, output_dir, dataset)
Exemple #17
0
def detect(net, img_path, thresh, use_cuda):
    img = Image.open(img_path)
    if img.mode == 'L':
        img = img.convert('RGB')
    img = np.array(img)
    height, width, _ = img.shape
    print(height, width)
    #max_im_shrink = np.sqrt(1700 * 1200 / (img.shape[0] * img.shape[1]))
    max_im_shrink = 1.5
    image = cv2.resize(img,
                       None,
                       None,
                       fx=max_im_shrink,
                       fy=max_im_shrink,
                       interpolation=cv2.INTER_LINEAR
                       )  #这种是对原图像width,height进行缩放,fx,fy表示x和y轴的缩放比例
    #image = cv2.resize(img, (640, 640))
    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    if use_cuda:
        x = x.cuda()
    t1 = time.time()
    y = net(x)
    detections = y.data
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])

    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img1 = img.copy()
    left = []
    right = []
    scores = []
    h_type = []
    num_class = []
    num_class.append('left')
    num_class.append('right')
    #i=0的时候检测到的是background class
    #i=1是左手,2是右手
    for i in range(1, detections.size(1)):
        j = 0
        print('the pro of {} is {}'.format(num_class[i - 1], detections[0, i,
                                                                        j, 0]))
        #detections[0, i, j, 0]为检测到的手的box的概率,j表示有多少个框(box)
        while detections[0, i, j, 0] >= 0.7:
            scores.append((detections[0, i, j, 0], j))
            j += 1
            #print(scores)
        if len(scores) > 0:
            scores = sorted(scores, key=lambda value: value[0],
                            reverse=True)  #以list中tuple的第一个元素进行降序排列
            #sorted()函数不改变原list,sort()函数会改变,默认升序,reverse=True实现降序
        num = 0
        for t_index, score in enumerate(scores):
            if num > 0:  #检测到的包含手的概率最高的两个框,因为有时候会将一些没有包含手的信息误认为手从而检测
                break
            index = int(score[1])
            score = score[0]
            pt = (detections[0, i, index, 1:] * scale).cpu().numpy()
            left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3])
            #j += 1
            cv2.rectangle(img, left_up, right_bottom, (0, 0, 255),
                          2)  #左上和右下角坐标
            left.append(left_up)
            right.append(right_bottom)

            conf = "{:.3f}".format(score)  #保留3位小数
            point = (int(left_up[0]), int(left_up[1] - 5))
            point1 = (int(right_bottom[0] - 20), int(left_up[1] - 5))
            cv2.putText(img, conf, point, cv2.FONT_HERSHEY_COMPLEX, 0.6,
                        (0, 0, 255), 2)
            cv2.putText(img, str(num_class[i - 1]), point1,
                        cv2.FONT_HERSHEY_COMPLEX, 0.7, (0, 0, 255), 2)
            num += 1
        cv2.imwrite(
            os.path.join(
                '/home/lj/cy/openpose/piano/test_piano/image/point_dir',
                os.path.basename(img_path)), img)
        #else:
        #continue  #对于for..else循环,执行完里面的for循环后再执行else,continue结束当前for循环(外面的),执行下次for循环
        #break

    hand_left = None
    hand_right = None
    #(检测到两只手时,根据框的位置判断是左手还是右手),如果两手交叉是不是会出现问题
    if (len(left) == 2):
        if left[0][0] < left[1][0]:
            left_point = left[0]
        else:
            left_point = left[1]
        for i in range(len(left)):
            if left_point == left[i]:
                left_axis = i
            else:
                right_axis = i

        left_width = right[left_axis][0] - left_point[0]
        left_height = right[left_axis][1] - left_point[1]
        #print('坐标是({},{})'.format(left_width,left_height))
        #左上坐标(将检测到的框进行扩充,尽量使得手在中心位置,便于检测关键点)
        if (int(left_point[0] - 0.3 * left_width) < 0):
            new_left_x = 0
        else:
            new_left_x = int(left_point[0] - 0.3 * left_width)
        if (int(left_point[1] - 0.3 * left_height) < 0):
            new_left_y = 0
        else:
            new_left_y = int(left_point[1] - 0.3 * left_height)
        new_left_point = (new_left_x, new_left_y)
        #左下坐标
        #一开始左手和右手的框就有重叠,右边框就不扩充了
        if not (int(right[left_axis][0] > int(left[right_axis][0]))):
            if (int(right[left_axis][0] + 0.3 * left_width) > int(
                    left[right_axis][0])):
                new_left_x1 = int(left[right_axis][0])
            else:
                new_left_x1 = int(right[left_axis][0] + 0.3 * left_width)
        else:
            new_left_x1 = int(right[left_axis][0])

        if (int(right[left_axis][1] + 0.3 * left_height) > height):
            new_left_y1 = height
        else:
            new_left_y1 = int(right[left_axis][1] + 0.3 * left_height)
        new_left_point1 = (new_left_x1, new_left_y1)
        #得到右手的框
        right_point = left[right_axis]  #左上角坐标
        right_width = right[right_axis][0] - right_point[0]
        right_height = right[right_axis][1] - right_point[1]
        #print('坐标是({},{})'.format(right_width,right_height))
        #一开始左手和右手的框就有重叠,左边框就不扩充了
        if not (int(right_point[0]) < int(right[left_axis][0])):
            if (int(right_point[0] - 0.3 * right_width) < int(
                    right[left_axis][0])):
                new_right_x = int(right[left_axis][0])
            else:
                new_right_x = int(right_point[0] - 0.3 * right_width)
        else:
            new_right_x = int(right_point[0])

        if (int(right_point[1] - 0.4 * right_height) < 0):
            new_right_y = 0
        else:
            new_right_y = int(right_point[1] - 0.4 * right_height)
        new_right_point = (new_right_x, new_right_y)
        #右下坐标
        if (int(right[right_axis][0] + 0.3 * right_width) > width):
            new_right_x1 = width
        else:
            new_right_x1 = int(right[right_axis][0] + 0.3 * right_width)
        if (int(right[right_axis][1] + 0.4 * right_height) > height):
            new_right_y1 = height
        else:
            new_right_y1 = int(right[right_axis][1] + 0.4 * right_height)
        new_right_point1 = (new_right_x1, new_right_y1)
        box1 = (new_left_point, new_left_point1)  #左上和右下角坐标
        box2 = (new_right_point, new_right_point1)
        t2 = time.time()
        #print('detect:{} timer:{}'.format(img_path, t2 - t1))
        #cropImg_right=img1[new_right_point[1]:new_right_point1[1],new_right_point[0]:new_right_point1[0]]
        #cv2.imwrite(os.path.join('.', os.path.basename(img_path)), cropImg_right)
        return box1, box2
    else:
        return hand_left, hand_right
Exemple #18
0
def detect(net, img_path, thresh):
    #img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = Image.open(img_path)
    if img.mode == 'L':
        img = img.convert('RGB')

    img = np.array(img)
    height, width, _ = img.shape
    max_im_shrink = np.sqrt(1700 * 1200 / (img.shape[0] * img.shape[1]))
    image = cv2.resize(img,
                       None,
                       None,
                       fx=max_im_shrink,
                       fy=max_im_shrink,
                       interpolation=cv2.INTER_LINEAR)
    #image = cv2.resize(img, (640, 640))
    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]

    x = Variable(torch.from_numpy(x).unsqueeze(0), requires_grad=False)
    print(x.shape)
    if use_cuda:
        x = x.cuda()
    t1 = time.time()
    y = net(x)
    detections = y.data  #torch.size:[1,2,750,5]
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1],
         img.shape[0]])  #original img shape

    img = cv2.imread(img_path, cv2.IMREAD_COLOR)

    # for i in range(detections.size(1)):
    #     j = 0
    #     while detections[0, i, j, 0] >= thresh:             #5iterms the first is the score
    #         score = detections[0, i, j, 0]
    #         pt = (detections[0, i, j, 1:] * scale).cpu().numpy()        #left up x,left up y ,right bottom x ,right bottom y
    #         left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3])
    #         j += 1
    #         cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2)
    #         conf = "{:.3f}".format(score)
    #         point = (int(left_up[0]), int(left_up[1] - 5))
    #         #cv2.putText(img, conf, point, cv2.FONT_HERSHEY_COMPLEX,
    #         #            0.6, (0, 255, 0), 1)

    dclone = []
    for i in range(detections.size(1)):
        j = 0
        while detections[0, i, j,
                         0] >= thresh:  #5iterms the first is the score
            score = detections[0, i, j, 0]
            pt = (detections[0, i, j, 1:] * scale).cpu().numpy(
            )  #left up x,left up y ,right bottom x ,right bottom y
            dclone.append([pt[0], pt[1], pt[2], pt[3], score])
            j += 1
    dclone = np.array(dclone)
    dclone = dclone[[my_nms(np.array(dclone), 0.3)]]

    for i in range(dclone.shape[0]):
        pt = dclone[i]
        left_up, right_bottom = (int(pt[0]), int(pt[1])), (int(pt[2]),
                                                           int(pt[3]))
        cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2)
    t2 = time.time()
    print('detect:{} timer:{}'.format(img_path, t2 - t1))
    cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), img)
Exemple #19
0
def detect(net, im, thresh):
    #img = Image.open(img_path)

    img = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
    if img.mode == 'L':
        img = img.convert('RGB')

    img = np.array(img)
    height, width, _ = img.shape

    max_im_shrink = np.sqrt(1200 * 1000 / (img.shape[0] * img.shape[1]))
    image = cv2.resize(img,
                       None,
                       None,
                       fx=max_im_shrink,
                       fy=max_im_shrink,
                       interpolation=cv2.INTER_LINEAR)
    #image = cv2.resize(img,(224,),interpolation=cv2.INTER_LINEAR)
    x = to_chw_bgr(image)
    x = x.astype('float32')
    x -= cfg.img_mean
    x = x[[2, 1, 0], :, :]

    x = Variable(torch.from_numpy(x).unsqueeze(0))
    if use_cuda:
        x = x.cuda()
    #t1 = time.time()
    y = net(x)
    detections = y.data
    scale = torch.Tensor(
        [img.shape[1], img.shape[0], img.shape[1], img.shape[0]])

    #img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    mask_img = np.ones(im.shape, np.int8)
    kernel_size = 15
    blur_img = cv2.blur(im, (kernel_size, kernel_size))
    for i in range(detections.size(1)):
        j = 0
        while detections[0, i, j, 0] >= thresh:
            score = detections[0, i, j, 0]
            pt = (detections[0, i, j, 1:] * scale).cpu().numpy().astype(int)
            #left_up, right_bottom = (pt[0], pt[1]), (pt[2], pt[3])
            j += 1
            #cv2.rectangle(img, left_up, right_bottom, (0, 0, 255), 2)
            #conf = "{:.2f}".format(score)
            #text_size, baseline = cv2.getTextSize(
            #    conf, cv2.FONT_HERSHEY_SIMPLEX, 0.3, 1)
            #p1 = (left_up[0], left_up[1] - text_size[1])
            #cv2.rectangle(img, (p1[0] - 2 // 2, p1[1] - 2 - baseline),
            #              (p1[0] + text_size[0], p1[1] + text_size[1]),[255,0,0], -1)
            #cv2.putText(img, conf, (p1[0], p1[
            #                1] + baseline), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255, 255, 255), 1, 8)
            x, y, w, h = pt[0], pt[1], pt[2] - pt[0], pt[3] - pt[1]
            mask = generate_mask(im.shape[0], im.shape[1],
                                 max(w, h) / 2, x + w / 2, y + h / 2)
            mask_img[mask] = [0, 0, 0]
    mask_img_verse = np.ones(img.shape, np.int8) - mask_img
    result_img = mask_img * im + mask_img_verse * blur_img
    #t2 = time.time()
    #print('detect:{} timer:{}'.format(img_path, t2 - t1))

    #cv2.imwrite(os.path.join(args.save_dir, os.path.basename(img_path)), result_img)
    return result_img