Ejemplo n.º 1
0
def rnet_detect_faces(img,
                      bounding_boxes,
                      r_model_path,
                      thresholds=0.7,
                      nms_thresholds=0.6):
    with torch.no_grad():
        _, rnet, _ = create_mtcnn_model(r_model_path=r_model_path)

        img_boxes = get_image_boxes(bounding_boxes, img, size=24)
        img_boxes = img_boxes.float()
        img_boxes = img_boxes.to(
            torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        probs, offsets, _ = rnet(img_boxes)
        # probs = probs.to('cuda' if torch.cuda.is_available() else 'cpu')
        probs = probs.to("cpu").numpy()  # shape [n_boxes, 1]
        offsets = offsets.to("cpu").numpy()  # shape [n_boxes, 4]

        keep = np.where(probs > thresholds)[0]

        if len(keep) > 0:
            bounding_boxes = bounding_boxes[keep]
            bounding_boxes[:, 4] = probs[keep].reshape((-1, ))
            offsets = offsets[keep]
        else:
            return [], []

        keep = nms(bounding_boxes, nms_thresholds)

        bounding_boxes = bounding_boxes[keep]
        bounding_boxes = calibrate_box(bounding_boxes, offsets[keep])

        bounding_boxes = convert_to_square(bounding_boxes)
        bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])

        return bounding_boxes
Ejemplo n.º 2
0
def gen_rnet_sample_data(data_dir,anno_file,det_boxs_file):

    neg_save_dir = os.path.join(data_dir, "24/negative")
    pos_save_dir = os.path.join(data_dir, "24/positive")
    part_save_dir = os.path.join(data_dir, "24/part")

    for dir_path in [neg_save_dir, pos_save_dir, part_save_dir]:
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)


    # load ground truth from annotation file
    # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image

    with open(anno_file, 'r') as f:
        annotations = f.readlines()

    image_size = 24
    net = "rnet"

    im_idx_list = list()
    gt_boxes_list = list()
    num_of_images = len(annotations)
    print "processing %d images in total" % num_of_images

    for annotation in annotations:
        annotation = annotation.strip().split(' ')
        im_idx = annotation[0]

        boxes = map(float, annotation[1:])
        boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)
        im_idx_list.append(im_idx)
        gt_boxes_list.append(boxes)


    save_path = config.ANNO_STORE_DIR
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    f1 = open(os.path.join(save_path, 'pos_%d.txt' % image_size), 'w')
    f2 = open(os.path.join(save_path, 'neg_%d.txt' % image_size), 'w')
    f3 = open(os.path.join(save_path, 'part_%d.txt' % image_size), 'w')

    det_handle = open(det_boxs_file, 'r')

    det_boxes = cPickle.load(det_handle)
    print len(det_boxes), num_of_images
    assert len(det_boxes) == num_of_images, "incorrect detections or ground truths"

    # index of neg, pos and part face, used as their image names
    n_idx = 0
    p_idx = 0
    d_idx = 0
    image_done = 0
    for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list):
        if image_done % 100 == 0:
            print "%d images done" % image_done
        image_done += 1

        if dets.shape[0] == 0:
            continue
        img = cv2.imread(im_idx)
        dets = convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        for box in dets:
            x_left, y_top, x_right, y_bottom = box[0:4].astype(int)
            width = x_right - x_left + 1
            height = y_bottom - y_top + 1

            # ignore box that is too small or beyond image border
            if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1:
                continue

            # compute intersection over union(IoU) between current box and all gt boxes
            Iou = IoU(box, gts)
            cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
            resized_im = cv2.resize(cropped_im, (image_size, image_size),
                                    interpolation=cv2.INTER_LINEAR)

            # save negative images and write label
            if np.max(Iou) < 0.3:
                # Iou with all gts must below 0.3
                save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
                f2.write(save_file + ' 0\n')
                cv2.imwrite(save_file, resized_im)
                n_idx += 1
            else:
                # find gt_box with the highest iou
                idx = np.argmax(Iou)
                assigned_gt = gts[idx]
                x1, y1, x2, y2 = assigned_gt

                # compute bbox reg label
                offset_x1 = (x1 - x_left) / float(width)
                offset_y1 = (y1 - y_top) / float(height)
                offset_x2 = (x2 - x_right) / float(width)
                offset_y2 = (y2 - y_bottom) / float(height)

                # save positive and part-face images and write labels
                if np.max(Iou) >= 0.65:
                    save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
                    f1.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (
                    offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1

                elif np.max(Iou) >= 0.4:
                    save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
                    f3.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (
                    offset_x1, offset_y1, offset_x2, offset_y2))
                    cv2.imwrite(save_file, resized_im)
                    d_idx += 1
    f1.close()
    f2.close()
    f3.close()
Ejemplo n.º 3
0
def pnet_detect_faces(image,
                      p_model_path,
                      min_face_size=12.0,
                      thresholds=0.6,
                      nms_thresholds=0.7):
    with torch.no_grad():
        pnet, _, _ = create_mtcnn_model(p_model_path=p_model_path)

        # 建立一个图像金字塔
        h = image.shape[0]
        w = image.shape[1]
        max_face_size = min(w, h)

        min_detection_size = 12
        factor = 0.707  # sqrt(0.5)
        # 在某种意义上,应用P-Net相当于用步幅2移动12x12窗口,
        stride = 2
        cell_size = 12

        bounding_boxes = []
        while min_face_size <= max_face_size:
            current_scale = min_detection_size / min_face_size

            img_h = math.ceil(h * current_scale)  # 向上取整 貌似有问题哟
            img_w = math.ceil(w * current_scale)

            resize_img = cv2.resize(image, (img_w, img_h),
                                    interpolation=cv2.INTER_AREA)

            # 必须和训练时候处理一样

            img = trainTransform(resize_img).unsqueeze(0).to('cuda')
            probs, offsets, landmarks = pnet(img)

            probs = probs.to('cpu').squeeze(0).numpy()
            offsets = offsets.to('cpu').numpy()
            # label:[b,], offset:[], landmark:[]

            # 可能有脸的boxs的索引,相当于窗口移动的位置
            inds = np.where(probs > thresholds)

            # 没有发现人脸
            if inds[0].size == 0:
                bounding_boxes.append(None)
            else:
                reg_x1, reg_y1, reg_x2, reg_y2 = [
                    offsets[0, i, inds[1], inds[2]] for i in range(4)
                ]

                # 它们被定义为:
                # w = x2 - x1 + 1
                # h = y2 - y1 + 1
                # x1_true = x1 + tx1*w
                # x2_true = x2 + tx2*w
                # y1_true = y1 + ty1*h
                # y2_true = y2 + ty2*h

                offsets = np.array([reg_x1, reg_y1, reg_x2, reg_y2])
                score = probs[0, inds[1], inds[2]]

                # P-Net应用于缩放图像
                # 所以我们需要重新调整边界框
                _bounding_boxes = np.vstack([
                    np.round((stride * inds[2] + 1.0) / current_scale),  # x1
                    np.round((stride * inds[1] + 1.0) / current_scale),  # y1
                    np.round((stride * inds[2] + 1.0 + cell_size) /
                             current_scale),  # x2
                    np.round((stride * inds[1] + 1.0 + cell_size) /
                             current_scale),  # y2
                    score,
                    offsets
                ])

                # print("offsets=",offsets)

                boxes = _bounding_boxes.T

                keep = nms(boxes[:, 0:5], overlap_threshold=0.5)

                bounding_boxes.append(boxes[keep])

            min_face_size /= factor  # 从12变到图像的 最小长或宽

        if bounding_boxes:
            # 从不同的尺度收集boxs(以及偏移和分数)
            bounding_boxes = [i for i in bounding_boxes if i is not None]
            if len(bounding_boxes) == 0:
                return bounding_boxes

            bounding_boxes = np.vstack(bounding_boxes)

            keep = nms(bounding_boxes[:, 0:5], nms_thresholds)
            bounding_boxes = bounding_boxes[keep]

            # 使用pnet预测的偏移量来变换边界框,根据 w、h 对 x1,y1,x2,y2 的位置进行微调
            bounding_boxes = calibrate_box(bounding_boxes[:, 0:5],
                                           bounding_boxes[:, 5:])
            # shape [n_boxes, 5]
            # 将检测出的框转化成矩形
            bounding_boxes = convert_to_square(bounding_boxes)
            bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4])
            # 不需要 landmarks
        return bounding_boxes
Ejemplo n.º 4
0
    def gen_ro_data(self,
                    batch_idx_list,
                    mm_det,
                    pattern,
                    image_size,
                    vis=False):
        imgs_list = []
        offsets_list = []
        labels_list = []

        random.shuffle(batch_idx_list)
        batch_idx_list_pos = batch_idx_list[
            0:max(1, int(len(batch_idx_list) * 2 / 9))]

        random.shuffle(batch_idx_list)
        batch_idx_list_part = batch_idx_list[
            0:max(1, int(len(batch_idx_list) * 3 / 9))]

        random.shuffle(batch_idx_list)
        batch_idx_list_neg = batch_idx_list[
            0:max(1, int(len(batch_idx_list) * 4 / 9))]

        pos_num_sum = 0
        part_num_sum = 0
        neg_num_sum = 0

        pos_num_sum_thr = 200 + random.randint(-20, 20)
        part_num_sum_thr = 250 + random.randint(-30, 30)
        neg_num_sum_thr = 400 + random.randint(-50, 50)
        #---------------------------------------------
        random.shuffle(batch_idx_list)
        for kk in range(len(batch_idx_list)):

            jj = batch_idx_list[kk]
            #---------------------------------------------
            if (pos_num_sum >= pos_num_sum_thr) and (
                    part_num_sum >= part_num_sum_thr) and (neg_num_sum >=
                                                           neg_num_sum_thr):
                break

            if not ((jj in batch_idx_list_pos) or
                    (jj in batch_idx_list_part) or (jj in batch_idx_list_neg)):
                continue
            annotation = self.annotations[jj].strip().split(' ')
            #image path
            im_path = annotation[0]

            gts = list(map(float, annotation[1:]))
            #gt
            gts = np.array(gts, dtype=np.float32).reshape(-1, 4)
            if self.flag_debug:
                print('   {} {} '.format(jj, im_path))

            #load image
            read_img_path = os.path.join(self.path_img, im_path + '.jpg')

            img = cv2.imread(read_img_path)
            all_boxes = list()
            if pattern == 'R-Net':
                _, boxes_align = mm_det.detect_pnet(img)
            elif pattern == 'O-Net':
                boxes_align, _ = mm_det.detect_face(img)
            if boxes_align is None:
                continue

            if boxes_align.shape[0] == 0:
                continue
            dets = convert_to_square(boxes_align)
            dets[:, 0:4] = np.round(dets[:, 0:4])

            gts = np.array(gts, dtype=np.float32).reshape(-1, 4)

            if vis == True:
                for i in range(len(boxes_align)):
                    box = boxes_align[i]
                    x_min = int(box[0])
                    y_min = int(box[1])
                    x_max = int(box[2])
                    y_max = int(box[3])
                    cv2.rectangle(img, (x_min, y_min), (x_max, y_max),
                                  (0, 255, 225), 2)
                cv2.namedWindow('im', 0)
                cv2.imshow('im', img)
                cv2.waitKey(1)
            #---------------------------------------------在检测基础上随机轻微扰动
            dets_r = []
            for ii in range(len(dets)):
                x_left, y_top, x_right, y_bottom, _ = dets[ii]
                width = x_right - x_left + 1
                height = y_bottom - y_top + 1
                edge = min(width, height)
                r_offset = min(random.random() * 6., edge * 0.2)
                # print('r_offset',r_offset)
                dets_r.append([
                    dets[ii][0] + random.uniform(-r_offset, r_offset),
                    dets[ii][1] + random.uniform(-r_offset, r_offset),
                    dets[ii][2] + random.uniform(-r_offset, r_offset),
                    dets[ii][3] + random.uniform(-r_offset, r_offset),
                    dets[ii][4]
                ])
            dets = np.array(dets_r)
            #-------------------------------------------------------------------

            for box in dets:
                x_left, y_top, x_right, y_bottom, _ = box.astype(int)
                width = x_right - x_left + 1
                height = y_bottom - y_top + 1

                # ignore box that is too small or beyond image border
                if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[
                        1] - 1 or y_bottom > img.shape[0] - 1:
                    continue

                # compute intersection over union(IoU) between current box and all gt boxes
                Iou = IoU(box, gts)
                cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]

                # save negative images and write label
                # Iou with all gts must below 0.3
                if np.max(Iou) < 0.3 and ((neg_num_sum <
                                           (pos_num_sum * 3.5)) or
                                          (neg_num_sum < neg_num_sum_thr)
                                          ) and (jj in batch_idx_list_neg):
                    # save the examples
                    # save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx)
                    # # print(save_file)
                    # f2.write("Rnet_24/negative/%s.jpg" % n_idx + ' 0\n')
                    # cv2.imwrite(save_file, resized_im)
                    # n_idx += 1
                    resized_im = cv2.resize(cropped_im,
                                            (image_size, image_size),
                                            interpolation=random.randint(0, 4))
                    resized_im = img_agu(resized_im)

                    if random.random() > 0.5:  #水平翻转
                        img_ = cv2.flip(resized_im, 1)

                    img_ = resized_im.transpose(2, 0, 1)
                    #--------------
                    imgs_list.append(img_)
                    offsets_list.append((0, 0, 0, 0))
                    labels_list.append(0.0)
                    neg_num_sum += 1
                else:
                    # find gt_box with the highest iou
                    idx = np.argmax(Iou)
                    assigned_gt = gts[idx]
                    x1, y1, x2, y2 = assigned_gt

                    # compute bbox reg label
                    offset_x1 = (x1 - x_left) / float(width)
                    offset_y1 = (y1 - y_top) / float(height)
                    offset_x2 = (x2 - x_right) / float(width)
                    offset_y2 = (y2 - y_bottom) / float(height)

                    # save positive and part-face images and write labels
                    if np.max(Iou) >= 0.65 and (jj in batch_idx_list_pos):
                        # save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx)
                        # f1.write("Rnet_24/positive/%s.jpg"%p_idx + ' 1 %.2f %.2f %.2f %.2f\n'\
                        # %(offset_x1, offset_y1, offset_x2, offset_y2))
                        # cv2.imwrite(save_file, resized_im)
                        # p_idx += 1
                        resized_im = cv2.resize(cropped_im,
                                                (image_size, image_size),
                                                interpolation=random.randint(
                                                    0, 4))
                        resized_im = img_agu(resized_im)

                        if random.random(
                        ) > 0.5:  #水平翻转  千万不能做 水平翻转对应的 offset 一定要变
                            img_ = cv2.flip(resized_im, 1)
                            offset_x1, offset_x2 = -offset_x2, -offset_x1

                        img_ = resized_im.transpose(2, 0, 1)
                        #--------------
                        imgs_list.append(img_)
                        offsets_list.append(
                            (offset_x1, offset_y1, offset_x2, offset_y2))
                        labels_list.append(1.)

                        pos_num_sum += 1

                    elif np.max(Iou) >= 0.4 and (jj in batch_idx_list_part):
                        # save_file = os.path.join(part_save_dir, "%s.jpg" % d_idx)
                        # f3.write("Rnet_24/part/%s.jpg"%d_idx + ' -1 %.2f %.2f %.2f %.2f\n'\
                        # %(offset_x1, offset_y1, offset_x2, offset_y2))
                        # cv2.imwrite(save_file, resized_im)
                        # d_idx += 1

                        resized_im = cv2.resize(cropped_im,
                                                (image_size, image_size),
                                                interpolation=random.randint(
                                                    0, 4))
                        resized_im = img_agu(resized_im)

                        if random.random(
                        ) > 0.5:  #水平翻转  千万不能做 水平翻转对应的 offset 一定要变
                            img_ = cv2.flip(resized_im, 1)
                            offset_x1, offset_x2 = -offset_x2, -offset_x1
                            img_ = img_.transpose(2, 0, 1)
                        else:
                            img_ = resized_im.transpose(2, 0, 1)
                        #--------------
                        imgs_list.append(img_)
                        offsets_list.append(
                            (offset_x1, offset_y1, offset_x2, offset_y2))
                        labels_list.append(-1.)

                        part_num_sum += 1
        # print('pos_num_sum,part_num_sum,neg_num_sum : ',pos_num_sum,part_num_sum,neg_num_sum)

        return imgs_list, offsets_list, labels_list, pos_num_sum, part_num_sum, neg_num_sum
Ejemplo n.º 5
0
def gen_sample_data(data_dir, anno_file, det_boxs_file, prefix_path=''):

    landmark_save_dir = os.path.join(data_dir, "48/landmark")

    if not os.path.exists(landmark_save_dir):
        os.makedirs(landmark_save_dir)

    # load ground truth from annotation file
    # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image

    with open(anno_file, 'r') as f:
        annotations = f.readlines()

    image_size = 48
    net = "onet"

    im_idx_list = list()
    gt_boxes_list = list()
    gt_landmark_list = list()
    num_of_images = len(annotations)
    print "processing %d images in total" % num_of_images

    for annotation in annotations:
        annotation = annotation.strip().split(' ')
        im_idx = annotation[0]

        boxes = map(float, annotation[1:5])
        boxes = np.array(boxes, dtype=np.float32).reshape(-1, 4)
        landmarks = map(float, annotation[5:])
        landmarks = np.array(landmarks, dtype=np.float32).reshape(-1, 10)

        im_idx_list.append(im_idx)
        gt_boxes_list.append(boxes)
        gt_landmark_list.append(landmarks)

    save_path = config.ANNO_STORE_DIR
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    f = open(os.path.join(save_path, 'landmark_48.txt'), 'w')

    det_handle = open(det_boxs_file, 'r')

    det_boxes = cPickle.load(det_handle)
    print len(det_boxes), num_of_images
    assert len(
        det_boxes) == num_of_images, "incorrect detections or ground truths"

    # index of neg, pos and part face, used as their image names
    p_idx = 0
    image_done = 0
    for im_idx, dets, gts, landmark in zip(im_idx_list, det_boxes,
                                           gt_boxes_list, gt_landmark_list):
        if image_done % 100 == 0:
            print "%d images done" % image_done
        image_done += 1

        if dets.shape[0] == 0:
            continue
        img = cv2.imread(os.path.join(prefix_path, im_idx))
        dets = convert_to_square(dets)
        dets[:, 0:4] = np.round(dets[:, 0:4])

        for box in dets:
            x_left, y_top, x_right, y_bottom = box[0:4].astype(int)
            width = x_right - x_left + 1
            height = y_bottom - y_top + 1

            # ignore box that is too small or beyond image border
            if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[
                    1] - 1 or y_bottom > img.shape[0] - 1:
                continue

            # compute intersection over union(IoU) between current box and all gt boxes
            Iou = IoU(box, gts)
            cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :]
            resized_im = cv2.resize(cropped_im, (image_size, image_size),
                                    interpolation=cv2.INTER_LINEAR)

            # save negative images and write label
            if np.max(Iou) < 0.3:
                # Iou with all gts must below 0.3
                continue
            else:
                # find gt_box with the highest iou
                idx = np.argmax(Iou)
                assigned_gt = gts[idx]
                x1, y1, x2, y2 = assigned_gt

                # compute bbox reg label
                offset_x1 = (x1 - x_left) / float(width)
                offset_y1 = (y1 - y_top) / float(height)
                offset_x2 = (x2 - x_right) / float(width)
                offset_y2 = (y2 - y_bottom) / float(height)

                offset_left_eye_x = (landmark[0, 0] - x_left) / float(width)
                offset_left_eye_y = (landmark[0, 1] - y_top) / float(height)

                offset_right_eye_x = (landmark[0, 2] - x_left) / float(width)
                offset_right_eye_y = (landmark[0, 3] - y_top) / float(height)

                offset_nose_x = (landmark[0, 4] - x_left) / float(width)
                offset_nose_y = (landmark[0, 5] - y_top) / float(height)

                offset_left_mouth_x = (landmark[0, 6] - x_left) / float(width)
                offset_left_mouth_y = (landmark[0, 7] - y_top) / float(height)

                offset_right_mouth_x = (landmark[0, 8] - x_left) / float(width)
                offset_right_mouth_y = (landmark[0, 9] - y_top) / float(height)

                # save positive and part-face images and write labels
                if np.max(Iou) >= 0.65:
                    save_file = os.path.join(landmark_save_dir,
                                             "%s.jpg" % p_idx)

                    f.write(save_file + ' -2 %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f \n' % \
                            (offset_x1, offset_y1, offset_x2, offset_y2, \
                             offset_left_eye_x, offset_left_eye_y, offset_right_eye_x, offset_right_eye_y,
                             offset_nose_x, offset_nose_y, offset_left_mouth_x, offset_left_mouth_y,
                             offset_right_mouth_x, offset_right_mouth_y))

                    cv2.imwrite(save_file, resized_im)
                    p_idx += 1

    f.close()
Ejemplo n.º 6
0
def detect_rnet(img, dets, rnet_path):
    h, w, c = img.shape
    if dets is None:
        return None, None
    _, rnet, _ = creat_mtcnn_net(r_model_path=rnet_path)

    dets = convert_to_square(dets)
    dets[:, 0:4] = np.round(dets[:, 0:4])

    num_boxes = dets.shape[0]
    cropped_ims_tensors = []

    for i in range(num_boxes):
        x1, y1, x2, y2, _ = [int(c) for c in dets[i]]

        x1 = 0 if x1 < 0 else x1
        y1 = 0 if y1 < 0 else y1
        x2 = w - 1 if x2 > w - 1 else x2
        y2 = h - 1 if y2 > h - 1 else y2

        tmp = img[y1:y2 + 1, x1:x2 + 1, :]

        crop_im = cv2.resize(tmp, (24, 24), interpolation=cv2.INTER_LINEAR)
        crop_im_tensor = transforms.ToTensor()(crop_im)

        cropped_ims_tensors.append(crop_im_tensor)

    feed_imgs = torch.stack(cropped_ims_tensors)
    cls_map_r, reg_r = rnet(feed_imgs)

    cls_map_np = cls_map_r.detach().numpy()
    reg_np = reg_r.detach().numpy()

    keep_inds = np.where(cls_map_np > 0.7)[0]

    if len(keep_inds) > 0:
        boxes = dets[keep_inds]
        cls = cls_map_np[keep_inds]
        reg = reg_np[keep_inds]
    else:
        return None, None

    keep = nms(boxes, 0.7)

    if len(keep) > 0:
        keep_cls = cls[keep]
        keep_boxes = boxes[keep]
        keep_reg = reg[keep]
    else:
        return None, None

    bw = keep_boxes[:, 2] - keep_boxes[:, 0] + 1
    bh = keep_boxes[:, 3] - keep_boxes[:, 1] + 1

    align_topx = keep_boxes[:, 0] + keep_reg[:, 0] * bw
    align_topy = keep_boxes[:, 1] + keep_reg[:, 1] * bh
    align_bottomx = keep_boxes[:, 2] + keep_reg[:, 2] * bw
    align_bottomy = keep_boxes[:, 3] + keep_reg[:, 3] * bh

    boxes_align = np.vstack(
        [align_topx, align_topy, align_bottomx, align_bottomy, keep_cls[:, 0]])

    boxes_align = boxes_align.T

    return keep_boxes, boxes_align