コード例 #1
0
    def bbox_transform(self, bbox):
        num_priors = self.priors.shape[0]
        if bbox is None or len(bbox) == 0:
            return np.zeros(
                (num_priors,
                 4)).astype(np.float32), np.zeros(num_priors).astype(np.int64)
        elif isinstance(bbox, np.ndarray):
            height, width = self.image_size
            gt_label = None
            gt_box = bbox
            if bbox.shape[-1] % 2 == 1:
                gt_box = bbox[:, :-1]
                gt_label = bbox[:, -1]

            gt_box[:, 0::2] /= width
            gt_box[:, 1::2] /= height

            # match priors (default boxes) and ground truth boxes
            if gt_box is not None and len(gt_box) > 0:
                truths = to_tensor(gt_box).float()
                labels = to_tensor(gt_label).long()
                loc_t, conf_t = match(truths, self.priors.data, (0.1, 0.2),
                                      labels, self.gt_overlap_tolerance)

                return to_numpy(loc_t).astype(
                    np.float32), to_numpy(conf_t).astype(np.int64)
            return np.zeros(
                (num_priors,
                 4)).astype(np.float32), np.zeros(num_priors).astype(np.int64)
コード例 #2
0
ファイル: pytorch_embedded.py プロジェクト: priest671/trident
    def get_enumerators(self,
                        *args,
                        negative_case=None,
                        n=10,
                        exclude_samples=True):
        # 取得整體距離輸入案例最接近,但是離負案例最遠(negative_case)的文字列表
        positive_correlate = 0
        negative_correlate = 0
        exclude_list = []
        for arg in args:
            positive_correlate += element_cosine_distance(
                self.get_words_vector(arg), self.weight)[0]

        correlate = positive_correlate
        if negative_case is None:
            pass
        else:
            if isinstance(negative_case, str):
                negative_case = [negative_case]
            if isinstance(negative_case, (list, tuple)):
                for arg in negative_case:
                    negative_correlate += element_cosine_distance(
                        self.get_words_vector(arg), self.weight)[0]
                correlate = positive_correlate - negative_correlate
        sorted_idxes = argsort(correlate, descending=True)
        sorted_idxes = sorted_idxes[:n + len(exclude_list)]
        sorted_idxes = to_tensor([
            idx for idx in sorted_idxes if idx.item() not in exclude_list
        ]).long()
        probs = to_list(correlate[sorted_idxes])[:n]
        words = [self.idx2word(idx.item()) for idx in sorted_idxes][:n]
        return OrderedDict(zip(words, probs))
コード例 #3
0
ファイル: pytorch_embedded.py プロジェクト: priest671/trident
    def find_similar(self,
                     reprt: (str, Tensor),
                     n: int = 10,
                     ignore_indexes=None):
        # 根據文字或是向量查詢空間中最近文字
        reprt_idx = None
        if ignore_indexes is None:
            ignore_indexes = []
        if isinstance(reprt, str):
            reprt_idx = self.word2idx(reprt)
            ignore_indexes.append(reprt_idx)
            reprt = self.weight[reprt_idx].expand_dims(
                0) if reprt in self._vocabs else None
        if is_tensor(reprt):
            correlate = element_cosine_distance(reprt, self.weight)[0]
            sorted_idxes = argsort(correlate, descending=True)

            sorted_idxes = sorted_idxes[:n + len(ignore_indexes)]
            sorted_idxes = to_tensor([
                idx for idx in sorted_idxes if idx.item() not in ignore_indexes
            ]).long()
            probs = to_list(correlate[sorted_idxes])[:n]
            words = [self.idx2word(idx.item()) for idx in sorted_idxes][:n]
            return OrderedDict(zip(words, probs))
        else:
            raise ValueError('Valid reprt should be a word or a tensor .')
コード例 #4
0
    def forward(self, confidence, locations, target_confidence,
                target_locations):
        """Compute classification loss and smooth l1 loss.

        Args:
            confidence (batch_size, num_priors, num_classes): class predictions.
            locations (batch_size, num_priors, 4): predicted locations.
            target_confidence (batch_size, num_priors): real labels of all the priors.
            target_locations (batch_size, num_priors, 4): real boxes corresponding all the priors.
        """
        num_classes = confidence.size(2)
        num_batch = confidence.size(0)

        confidence_logit = softmax(confidence, -1)
        confidence_logit_probs, confidence_logit_idxs = confidence_logit.max(
            -1)
        probs_mask = confidence_logit_probs > 0.5
        label_mask = confidence_logit_idxs > 0

        pos_target_mask_all = target_confidence > 0
        pos_infer_mask_all = (pos_target_mask_all.float() +
                              probs_mask.float() + label_mask.float() == 3)

        decode_locations_all = decode(
            locations, self.priors, (self.center_variance, self.size_variance))
        decode_target_locations_all = decode(
            target_locations, self.priors,
            (self.center_variance, self.size_variance))
        giou_np = 0.0
        giou = 0.0
        overlaps = 0.0
        num_boxes = 0
        for i in range(num_batch):
            pos_target_mask = pos_target_mask_all[i]
            pos_infer_mask = pos_infer_mask_all[i]
            decode_locations = decode_locations_all[i][pos_infer_mask, :]
            decode_target_locations = decode_target_locations_all[i][
                pos_target_mask, :]
            num_boxes += decode_target_locations.shape[0]
            if decode_target_locations.shape[0] > 0 and decode_locations.shape[
                    0] > 0:
                giou = giou + (1 - (bbox_giou(decode_locations,
                                              decode_target_locations).sum(0) /
                                    decode_target_locations.shape[0])).sum()
                overlaps = overlaps + (-log(
                    clip(jaccard(decode_locations, decode_target_locations),
                         min=1e-8)).sum(0) /
                                       decode_target_locations.shape[0]).sum()
            elif decode_target_locations.shape[
                    0] == 0 and decode_locations.shape[0] == 0:
                pass
            else:
                giou = giou + 1
                overlaps = overlaps - log(to_tensor(1e-8))

        giou = giou / num_boxes
        overlaps = overlaps / num_boxes
        return giou
コード例 #5
0
    def __init__(self, priors, center_variance, size_variance):
        """Implement SSD Multibox Loss.

        Basically, Multibox loss combines classification loss
         and Smooth L1 regression loss.
        """
        super(IoULoss, self).__init__()
        self.center_variance = center_variance
        self.size_variance = size_variance
        self.priors = to_tensor(priors)
コード例 #6
0
ファイル: pytorch_mtcnn.py プロジェクト: AllanYiin/trident
    def rerec(self, bboxA, img_shape):
        """Convert bboxA to square."""
        bboxA = to_numpy(bboxA)
        h = bboxA[:, 3] - bboxA[:, 1]
        w = bboxA[:, 2] - bboxA[:, 0]
        max_len = np.maximum(w, h)

        bboxA[:, 0] = bboxA[:, 0] - 0.5 * (max_len - w)
        bboxA[:, 1] = bboxA[:, 1] - 0.5 * (max_len - h)
        bboxA[:, 2] = bboxA[:, 0] + max_len
        bboxA[:, 3] = bboxA[:, 1] + max_len
        return to_tensor(bboxA)
コード例 #7
0
    def build_discriminator():
        layers = []
        layers.append(
            Conv2d((5, 5), 32, strides=1, auto_pad=True, use_bias=False, activation=activation, name='first_layer'))
        layers.append(Conv2d_Block((3, 3), 64, strides=2, auto_pad=True, use_spectral=use_spectral, use_bias=False,
                                   activation=activation, normalization=discriminator_norm, name='second_layer'))
        filter = 64
        current_width = image_width // 2
        i = 0
        while current_width > 8:
            filter = filter * 2 if i % 2 == 1 else filter
            if discriminator_build_block == BuildBlockMode.base.value:
                layers.append(
                    Conv2d_Block((3, 3), num_filters=filter, strides=2, auto_pad=True, use_spectral=use_spectral, use_bias=False,
                                 activation=activation, normalization=discriminator_norm,
                                 name='base_block{0}'.format(i)))
            elif discriminator_build_block == BuildBlockMode.resnet.value:
                layers.extend(resnet_block(num_filters=filter, strides=2, activation=activation, use_spectral=use_spectral,
                                           normalization=discriminator_norm, name='resnet_block{0}'.format(i)))

            elif discriminator_build_block == BuildBlockMode.bottleneck.value:
                layers.append(
                    bottleneck_block(num_filters=filter, strides=2, reduce=2, activation=activation, use_spectral=use_spectral,
                                     normalization=discriminator_norm, name='bottleneck_block{0}'.format(i)))

            current_width = current_width // 2
            i = i + 1
        if use_self_attention:
            layers.insert(-2, SelfAttention(8, name='self_attention'))
        if use_dropout:
            layers.insert(-1, Dropout(0.5))
        layers.append(Conv2d_Block((3, 3), 128, strides=2, auto_pad=True, use_bias=False, activation='leaky_relu', use_spectral=use_spectral, normalization=discriminator_norm,
                                   name='last_conv'))
        layers.append(Flatten()),
        if use_minibatch_discrimination:
            layers.append(MinibatchDiscriminationLayer(name='minibatch_dis'))
        layers.append(Dense(1, use_bias=False, name='fc'))
        layers.append(Sigmoid())
        dis = Sequential(layers, name='discriminator')
        out = dis(to_tensor(TensorShape([None, 3, image_width, image_width]).get_dummy_tensor()).to(get_device()))
        if use_spectral:
            new_layers = []
            for layer in dis:
                if isinstance(layer, Dense):
                    new_layers.append(torch.nn.utils.spectral_norm(layer))
                else:
                    new_layers.append(layer)
            return Sequential(new_layers, name='discriminator')
        else:
            return dis
コード例 #8
0
def generate_priors(feature_map_list, shrinkage_list, image_size, min_boxes, clamp=True) -> torch.Tensor:
    priors = []
    for index in range(0, len(feature_map_list[0])):
        scale_w = image_size[0] / shrinkage_list[0][index]
        scale_h = image_size[1] / shrinkage_list[1][index]
        for j in range(0, feature_map_list[1][index]):
            for i in range(0, feature_map_list[0][index]):
                x_center = (i + 0.5) / scale_w
                y_center = (j + 0.5) / scale_h

                for min_box in min_boxes[index]:
                    w = min_box / image_size[0]
                    h = min_box / image_size[1]
                    priors.append([x_center, y_center, w, h])
    print("priors nums:{}".format(len(priors)))
    priors = to_tensor(priors).to(get_device())  # .view(-1, 4)
    if clamp:
        torch.clamp(priors, 0.0, 1.0, out=priors)
    return priors
コード例 #9
0
    def boxes_nms(self, box_scores, overlap_threshold=0.5, top_k=-1):
        """Non-maximum suppression.
        Arguments:
            box_scores: a float numpy array of shape [n, 5],
                where each row is (xmin, ymin, xmax, ymax, score).
            overlap_threshold: a float number.
        Returns:
            list with indices of the selected boxes
        """
        # 如果沒有有效的候選區域則回傳空的清單
        box_scores = to_tensor(box_scores)
        if len(box_scores) == 0:
            return []
        score = box_scores[:, 4]
        boxes = box_scores[:, :4]
        # 存放過關的索引值
        picked = []
        # 依照機率信心水準升冪排序
        indexes = argsort(score, descending=False)

        while len(indexes) > 0:
            # 如此一來,最後一筆即是信心水準最高值
            # 加入至過關清單中
            current = indexes[-1]
            picked.append(current.item())

            # 計算其餘所有候選框與此當前框之間的IOU

            if 0 < top_k == len(picked) or len(indexes) == 1:
                break
            current_box = boxes[current, :]
            current_score = score[current]
            # 除了最後一筆以外的都是其餘框
            indexes = indexes[:-1]
            rest_boxes = boxes[indexes, :]
            iou = self.iou_of(
                rest_boxes,
                expand_dims(current_box, axis=0),
            )
            # IOU未超過門檻值的表示未與當前框重疊,則留下,其他排除
            indexes = indexes[iou <= overlap_threshold]
        return box_scores[picked]
コード例 #10
0
    def forward(self, confidence, locations, target_confidence,
                target_locations):
        """Compute classification loss and smooth l1 loss.

        Args:
            confidence (batch_size, num_priors, num_classes): class predictions.
            locations (batch_size, num_priors, 4): predicted locations.
            labels (batch_size, num_priors): real labels of all the priors.
            boxes (batch_size, num_priors, 4): real boxes corresponding all the priors.
        """
        num_classes = confidence.size(2)

        # derived from cross_entropy=sum(log(p))
        with torch.no_grad():
            loss = -F.log_softmax(confidence, dim=2)[:, :, 0]
            mask = hard_negative_mining(loss, target_confidence,
                                        self.neg_pos_ratio)
        weight = to_tensor(np.array([0.05, 1, 5, 20, 10]))
        classification_loss = F.cross_entropy(confidence[mask, :].reshape(
            -1, num_classes),
                                              target_confidence[mask],
                                              weight=weight,
                                              reduction='sum')
        # classification_loss += 0.1*F.cross_entropy(confidence.reshape(-1, num_classes), target_confidence.reshape(
        # -1), weight=weight, reduction='sum')

        pos_mask = target_confidence > 0
        locations = locations[pos_mask, :].reshape(-1, 4)
        target_locations = target_locations[pos_mask, :].reshape(-1, 4)
        smooth_l1_loss = F.mse_loss(locations,
                                    target_locations,
                                    reduction='sum')  # smooth_l1_loss
        smooth_l1_loss += F.l1_loss(locations[:, 2:4].exp(),
                                    target_locations[:, 2:4].exp(),
                                    reduction='sum')
        num_pos = target_locations.size(0)
        return (smooth_l1_loss + classification_loss) / num_pos
コード例 #11
0
ファイル: pytorch_mtcnn.py プロジェクト: AllanYiin/trident
 def get_image_pyrimid(self, img, min_size=None, factor=0.709):
     if min_size is None:
         min_size = self.min_size
     min_face_area = (min_size, min_size)
     h = img.shape[0]
     w = img.shape[1]
     minl = np.amin([h, w])
     m = 12.0 / min_size
     minl = minl * m
     # create scale pyramid
     scales = []
     images = []
     factor_count = 0
     while minl >= 12:
         scales += [m * np.power(factor, factor_count)]
         scaled_img = rescale(scales[-1])(img.copy())
         if img is not None:
             for func in self.preprocess_flow:
                 if inspect.isfunction(func):
                     scaled_img = func(scaled_img)
         images.append(to_tensor(image_backend_adaption(scaled_img)))
         minl = minl * factor
         factor_count += 1
     return images, scales
コード例 #12
0
    def infer_single_image(self, img, scale=1):
        if self._model.built:
            try:
                self._model.to(self.device)
                self._model.eval()
                if self._model.input_spec.object_type is None:
                    self._model.input_spec.object_type = ObjectType.rgb
                img = image2array(img)
                if img.shape[-1] == 4:
                    img = img[:, :, :3]
                img_orig = img.copy()
                rescale_scale = 1
                for func in self.preprocess_flow:
                    if (inspect.isfunction(func) or isinstance(
                            func,
                            Transform)) and func is not image_backend_adaption:
                        img = func(img, spec=self._model.input_spec)
                        if (inspect.isfunction(func) and func.__qualname__
                                == 'resize.<locals>.img_op') or (
                                    isinstance(func, Transform)
                                    and func.name == 'resize'):
                            rescale_scale = func.scale
                    else:
                        print(func)

                img = image_backend_adaption(img)
                inp = to_tensor(np.expand_dims(img, 0)).to(
                    torch.device("cuda" if self._model.weights[0].data.
                                 is_cuda else "cpu")).to(
                                     self._model.weights[0].data.dtype)

                confidence, boxes = self._model(inp)
                boxes = boxes[0]
                confidence = confidence[0]
                probs, label = confidence.data.max(-1)

                mask = probs > self.detection_threshold
                probs = probs[mask]
                label = label[mask]
                boxes = boxes[mask, :]
                mask = label > 0
                probs = probs[mask]
                label = label[mask]
                boxes = boxes[mask, :]

                if boxes is not None and len(boxes) > 0:
                    box_probs = concate([
                        boxes.float(),
                        label.reshape(-1, 1).float(),
                        probs.reshape(-1, 1).float()
                    ],
                                        axis=1)
                    if len(boxes) > 1:
                        box_probs, keep = self.hard_nms(
                            box_probs,
                            nms_threshold=self.nms_threshold,
                            top_k=-1,
                        )

                    boxes = box_probs[:, :4]
                    boxes[:, 0::2] *= self._model.input_spec.shape.dims[-1]
                    boxes[:, 1::2] *= self._model.input_spec.shape.dims[-2]
                    boxes[:, :4] /= rescale_scale

                    # boxes = boxes * (1 / scale[0])
                    return img_orig, to_numpy(boxes), to_numpy(
                        box_probs[:,
                                  4]).astype(np.int32), to_numpy(box_probs[:,
                                                                           5])
                else:
                    return img_orig, None, None, None
            except:
                PrintException()
        else:
            raise ValueError('the model is not built yet.')
コード例 #13
0
ファイル: pytorch_mtcnn.py プロジェクト: AllanYiin/trident
    def forward(self, x, scale):
        inp = x.exand_dims(0)
        boxes = self.pnet(inp)
        boxes_list = []
        if boxes is not None and len(boxes) > 0:
            box = boxes[:, :4] / scale
            score = boxes[:, 4:]
            boxes = concate([box.round_(), score], axis=1)
            if len(boxes) > 0:
                boxes_list.append(boxes)

        #######################################
        #########pnet finish
        #######################################
        if len(boxes_list) > 0:
            boxes = to_tensor(concate(boxes_list, axis=0))

            # print('total {0} boxes in pnet in all scale '.format(len(boxes)))
            boxes = clip_boxes_to_image(boxes, (x.shape[0], x.shape[1]))
            boxes = nms(boxes, threshold=self.detection_threshold[0])
            print('pnet:{0} boxes '.format(len(boxes)))
            # print('total {0} boxes after nms '.format(len(boxes)))
            # score = to_numpy(boxes[:, 4]).reshape(-1)
            if boxes is not None:
                # prepare rnet input

                boxes = self.rerec(boxes, x.shape)
                new_arr = np.zeros((boxes.shape[0], 3, 24, 24))

                for k in range(boxes.shape[0]):
                    box = boxes[k]
                    crop_img = x.copy()[int(box[1]):int(box[3]),
                                        int(box[0]):int(box[2]), :]
                    if crop_img.shape[0] > 0 and crop_img.shape[1] > 0:
                        new_arr[k] = Resize(
                            (24, 24))(crop_img / 255.0).transpose([2, 0, 1])
                    # else:
                    #     print(box)
                new_arr = to_tensor(new_arr)
                r_output1_list = []
                r_output2_list = []
                r_output3_list = []
                if len(new_arr) > 16:
                    for i in range(len(new_arr) // 16 + 1):
                        if i * 16 < len(new_arr):
                            r_out1, r_out2, r_out3 = self.rnet(
                                new_arr[i * 16:(i + 1) * 16, :, :, :])
                            r_output1_list.append(r_out1)
                            r_output2_list.append(r_out2)
                            r_output3_list.append(r_out3)
                    r_out1 = concate(r_output1_list, axis=0)
                    r_out2 = concate(r_output2_list, axis=0)
                    r_out3 = concate(r_output3_list, axis=0)
                else:
                    r_out1, r_out2, r_out3 = self.rnet(new_arr)

                probs = to_numpy(r_out1)
                keep = np.where(probs[:, 0] > self.detection_threshold[1])[0]
                r_out1 = r_out1[keep]
                boxes = boxes[keep]
                boxes[:, 4] = r_out1[:, 0]
                r_out2 = r_out2[keep]
                boxes = calibrate_box(boxes, r_out2)

                #######################################
                #########rnet finish
                #######################################

                boxes = nms(boxes,
                            threshold=self.detection_threshold[1],
                            image_size=(x.shape[0], x.shape[1]),
                            min_size=self.min_size)
                print('rnet:{0} boxes '.format(len(boxes)))
                # print('total {0} boxes after nms '.format(len(boxes)))
                boxes = clip_boxes_to_image(boxes, (x.shape[0], x.shape[1]))
                boxes = self.rerec(boxes, x.shape)
                new_arr = np.zeros((boxes.shape[0], 3, 48, 48))

                for k in range(boxes.shape[0]):
                    box = boxes[k]
                    crop_img = x.copy()[int(box[1]):int(box[3]),
                                        int(box[0]):int(box[2]), :]
                    if crop_img.shape[0] > 0 and crop_img.shape[1] > 0:
                        new_arr[k] = Resize(
                            (48, 48))(crop_img / 255.0).transpose([2, 0, 1])
                    # else:
                    #     print(box)

                new_arr = to_tensor(new_arr)
                o_out1, o_out2, o_out3 = self.onet(new_arr)
                probs = to_numpy(o_out1)
                keep = np.where(probs[:, 0] > self.detection_threshold[2])[0]
                o_out1 = o_out1[keep]
                boxes = boxes[keep]

                boxes[:, 4] = o_out1[:, 0]
                o_out2 = o_out2[keep]
                o_out3 = o_out3[keep]
                boxes = calibrate_box(boxes, o_out2)

                landmarks_x = boxes[:, 0:1] + o_out3[:, 0::2] * (
                    boxes[:, 2:3] - boxes[:, 0:1] + 1)
                landmarks_y = boxes[:, 1:2] + o_out3[:, 1::2] * (
                    boxes[:, 3:4] - boxes[:, 1:2] + 1)

                boxes = concate([boxes, landmarks_x, landmarks_y], axis=-1)
コード例 #14
0
    def infer_single_image(self, img, **kwargs):
        if self.model.built:
            self.model.to(self.device)
            self.model.eval()
            img = image2array(img)
            if img.shape[-1] == 4:
                img = img[:, :, :3]

            imgs, scales = self.get_image_pyrimid(img)
            boxes_list = []
            for i in range(len(scales)):
                scaled_img = imgs[i]
                inp = to_tensor(expand_dims(scaled_img, 0)).to(
                    torch.device("cuda" if self.pnet.weights[0].data.
                                 is_cuda else "cpu")).to(
                                     self.pnet.weights[0].data.dtype)

                boxes = self.pnet(inp)
                if boxes is not None and len(boxes) > 0:
                    scale = scales[i]
                    box = boxes[:, :4] / scale
                    score = boxes[:, 4:]
                    boxes = torch.cat([box.round_(), score], dim=1)
                    if len(boxes) > 0:
                        boxes_list.append(boxes)

            #######################################
            #########pnet finish
            #######################################
            if len(boxes_list) > 0:
                boxes = to_tensor(torch.cat(boxes_list, dim=0))

                #print('total {0} boxes in pnet in all scale '.format(len(boxes)))
                boxes = clip_boxes_to_image(boxes,
                                            (img.shape[0], img.shape[1]))
                boxes = self.boxes_nms(
                    boxes, overlap_threshold=self.detection_threshould[0])
                if self.verbose:
                    print('pnet:{0} boxes '.format(len(boxes)))
                #print('total {0} boxes after nms '.format(len(boxes)))
                #score = to_numpy(boxes[:, 4]).reshape(-1)
                if boxes is not None:
                    #prepare rnet input

                    boxes = self.rerec(boxes, img.shape)
                    new_arr = np.zeros((boxes.shape[0], 3, 24, 24))

                    for k in range(boxes.shape[0]):
                        box = boxes[k]
                        crop_img = img.copy()[int(box[1]):int(box[3]),
                                              int(box[0]):int(box[2]), :]
                        if crop_img.shape[0] > 0 and crop_img.shape[1] > 0:
                            new_arr[k] = resize((24, 24))(crop_img).transpose(
                                [2, 0, 1]) / 255.0
                        # else:
                        #     print(box)
                    new_arr = to_tensor(new_arr)
                    r_output1_list = []
                    r_output2_list = []
                    r_output3_list = []
                    if len(new_arr) > 16:
                        for i in range(len(new_arr) // 16 + 1):
                            if i * 16 < len(new_arr):
                                r_out1, r_out2, r_out3 = self.rnet(
                                    new_arr[i * 16:(i + 1) * 16, :, :, :])
                                r_output1_list.append(r_out1)
                                r_output2_list.append(r_out2)
                                r_output3_list.append(r_out3)
                        r_out1 = torch.cat(r_output1_list, dim=0)
                        r_out2 = torch.cat(r_output2_list, dim=0)
                        r_out3 = torch.cat(r_output3_list, dim=0)
                    else:
                        r_out1, r_out2, r_out3 = self.rnet(new_arr)

                    probs = r_out1
                    keep = probs[:, 0] > self.detection_threshould[1]
                    r_out1 = r_out1[keep]

                    boxes = boxes[keep]
                    if len(boxes) == 0:
                        return boxes
                    boxes[:, 4] = r_out1[:, 0]
                    r_out2 = r_out2[keep]
                    boxes = calibrate_box(boxes, r_out2)

                    #######################################
                    #########rnet finish
                    #######################################
                    boxes = self.boxes_nms(
                        boxes, overlap_threshold=self.detection_threshould[1])
                    if self.verbose:
                        print('rnet:{0} boxes '.format(len(boxes)))
                    #print('total {0} boxes after nms '.format(len(boxes)))
                    boxes = clip_boxes_to_image(boxes,
                                                (img.shape[0], img.shape[1]))
                    boxes = self.rerec(to_tensor(boxes), img.shape)
                    new_arr = np.zeros((boxes.shape[0], 3, 48, 48))

                    for k in range(boxes.shape[0]):
                        box = boxes[k]
                        crop_img = img.copy()[int(box[1]):int(box[3]),
                                              int(box[0]):int(box[2]), :]
                        if crop_img.shape[0] > 0 and crop_img.shape[1] > 0:
                            new_arr[k] = resize((48, 48))(crop_img).transpose(
                                [2, 0, 1]) / 255.0
                        # else:
                        #     print(box)

                    new_arr = to_tensor(new_arr)
                    o_out1, o_out2, o_out3 = self.onet(new_arr)
                    probs = o_out1
                    keep = probs[:, 0] > self.detection_threshould[2]
                    o_out1 = o_out1[keep]
                    boxes = boxes[keep]
                    if len(boxes) == 0:
                        return boxes
                    boxes[:, 4] = o_out1[:, 0]
                    o_out2 = o_out2[keep]
                    o_out3 = o_out3[keep]
                    boxes = calibrate_box(boxes, o_out2)

                    landmarks_x = boxes[:, 0:1] + o_out3[:, 0::2] * (
                        boxes[:, 2:3] - boxes[:, 0:1] + 1)
                    landmarks_y = boxes[:, 1:2] + o_out3[:, 1::2] * (
                        boxes[:, 3:4] - boxes[:, 1:2] + 1)

                    boxes = torch.cat([boxes, landmarks_x, landmarks_y],
                                      dim=-1)

                    #######################################
                    #########onet finish
                    #######################################
                    boxes = self.boxes_nms(
                        boxes, overlap_threshold=self.detection_threshould[2])
                    if self.verbose:
                        print('onet:{0} boxes '.format(len(boxes)))
                    return to_numpy(boxes)
            else:
                return None
            #idx=int(np.argmax(result,-1)[0])

        else:
            raise ValueError('the model is not built yet.')