class Predictor(nn.Module):
    def __init__(self,
                 detect_path='weights/best-checkpoint.bin',
                 model_arc='weights/model_eff_arc.json',
                 weights='weights/model_best_acc.h5'):
        #clf
        super().__init__()
        with open(model_arc, 'r') as f:
            self.model = tf.keras.models.model_from_json(f.read())
        self.model.load_weights(weights)
        self.mapper = [
            'TH', 'ACB', 'Acecook', 'Addidas', 'Agribank', 'Bidv', 'Big C',
            'Cai Lan', 'Chinsu', 'Colgate', 'FPT', 'Habeco', 'Hai Ha',
            'Jollibee', 'KFC', 'Kinh Do', 'Lotte mart', 'Mbbank new',
            'Mbbank old', 'Neptune', 'Nike', 'Pepsi', 'Petrolimex',
            'Phuc Long', 'Samsung', 'SHB', 'Techcombank', 'The Coffe House',
            'The gioi di dong', 'TPbank', 'Vietcombank', 'Vietinbank',
            'Viettel', 'Vinamilk', 'Vinfast', 'Vinmart', 'Vifon', 'Vnpt',
            'Vpbank'
        ]
        #detect
        self.transform = A.Compose([
            A.Resize(height=512, width=512, p=1.0),
            ToTensor(),
        ])

        self.load_detect(detect_path)

    def preprocess_detect(self, path_img):
        image = cv2.imread(path_img)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        transformer = self.transform(image=image)
        image = transformer['image']
        return image.unsqueeze(0).cpu().float()

    def preprocess_clf(self, batch):
        tensor = []
        for img in batch:
            img = cv2.resize(img, (224, 224))
            img = img / 255.0
            tensor.append(img)
        return np.array(tensor)

    def make_predictions(self, images, score_threshold=0.21):
        # images = torch.stack(images).cpu().float()
        predictions = []
        with torch.no_grad():
            det = self.net(
                images, {
                    'img_scale': torch.tensor(
                        [1.] * images.shape[0]).float().cpu(),
                    'img_size': torch.tensor([512, 512]).cpu()
                })
            for i in range(images.shape[0]):
                boxes = det[i].detach().cpu().numpy()[:, :4]
                scores = det[i].detach().cpu().numpy()[:, 4]
                labels = det[i].detach().cpu().numpy()[:, 5]
                indexes = np.where(scores > score_threshold)[0]
                predictions.append({
                    'boxes': boxes[indexes],
                    'scores': scores[indexes],
                    'labels': labels[indexes]
                })
        return [predictions]

    def load_detect(self, checkpoint_path):
        config = get_efficientdet_config('tf_efficientdet_d5')
        config.image_size = [512, 512]
        net = EfficientDet(config, pretrained_backbone=False)
        net.reset_head(num_classes=1)

        checkpoint = torch.load(checkpoint_path,
                                map_location=torch.device('cpu'))
        net.load_state_dict(checkpoint['model_state_dict'])
        del checkpoint
        gc.collect()
        self.net = DetBenchPredict(net)
        self.net.eval()
        self.net.cpu()

    def forward(self, path_test):
        images = self.preprocess_detect(path_test)
        s_t = time.time()
        predictions = self.make_predictions(images)
        print(time.time() - s_t)
        keep_idx = torchvision.ops.nms(
            torch.from_numpy(predictions[0][0]['boxes']),
            torch.from_numpy(predictions[0][0]['scores']), 0.1)
        boxes = []
        scores = []
        labels = []
        for i in keep_idx:
            boxes.append(predictions[0][0]['boxes'][i])
            scores.append(predictions[0][0]['scores'][i])
            labels.append(predictions[0][0]['labels'][i])
        boxes = np.array(boxes).astype(np.float32).clip(min=0, max=511)
        image_original = cv2.imread(path_test)
        image_original = cv2.cvtColor(image_original, cv2.COLOR_BGR2RGB)
        h, w = image_original.shape[0], image_original.shape[1]
        batch_clf = []
        boxes[:, 0] = boxes[:, 0] * (w / 512)
        boxes[:, 1] = boxes[:, 1] * (h / 512)
        boxes[:, 2] = boxes[:, 2] * (w / 512)
        boxes[:, 3] = boxes[:, 3] * (h / 512)
        for box in boxes:
            batch_clf.append(image_original[int(box[1]):int(box[3]),
                                            int(box[0]):int(box[2]), :])

        tensor = self.preprocess_clf(batch_clf)
        time_clf = time.time()
        print(tensor.shape)
        predictions = self.model.predict(tensor)
        print(f"clf:{time.time()-time_clf}")
        result = []
        for box, score, prediction in zip(boxes, scores, predictions):
            box[0] = int(box[0])
            box[1] = int(box[1])
            box[2] = int(box[2])
            box[3] = int(box[3])
            idx = int(np.argmax(prediction))
            result.append({
                'box': box,
                'label': self.mapper[idx],
                'score_detect': score,
                'score_clf': prediction[idx]
            })
        return image_original, result
Exemple #2
0
class Predictor(nn.Module):
    def __init__(self, detect_path):
        super().__init__()
        #detect
        self.transform = A.Compose([
            A.Resize(height=384, width=384, p=1.0),
            ToTensor(),
        ])
        self.load_detect(detect_path)

    def preprocess_detect(self, path_img):
        image = cv2.imread(path_img)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        transformer = self.transform(image=image)
        image = transformer['image']
        return image.unsqueeze(0).cpu().float()

    def make_predictions(self, images, score_threshold=0.25):
        # images = torch.stack(images).cpu().float()
        predictions = []
        with torch.no_grad():
            det = self.net(
                images, {
                    'img_scale': torch.tensor(
                        [1.] * images.shape[0]).float().cpu(),
                    'img_size': torch.tensor([384, 384]).cpu()
                })
            for i in range(images.shape[0]):
                boxes = det[i].detach().cpu().numpy()[:, :4]
                scores = det[i].detach().cpu().numpy()[:, 4]
                labels = det[i].detach().cpu().numpy()[:, 5]
                indexes = np.where(scores > score_threshold)[0]
                predictions.append({
                    'boxes': boxes[indexes],
                    'scores': scores[indexes],
                    'labels': labels[indexes]
                })
        return [predictions]

    def load_detect(self, checkpoint_path):
        config = get_efficientdet_config('tf_efficientdet_d0')
        config.image_size = [384, 384]
        net = EfficientDet(config, pretrained_backbone=False)
        net.reset_head(num_classes=1)
        checkpoint = torch.load(checkpoint_path,
                                map_location=torch.device('cpu'))
        net.load_state_dict(checkpoint['model_state_dict'])
        del checkpoint
        gc.collect()
        self.net = DetBenchPredict(net)
        self.net.eval()
        self.net.cpu()

    def forward(self, path_test):
        images = self.preprocess_detect(path_test)
        s_t = time.time()
        predictions = self.make_predictions(images)
        print(time.time() - s_t)
        batch_recognize = []
        boxes = []
        scores = []
        labels = []
        if (predictions[0][0]['boxes'] != []):
            keep_idx = torchvision.ops.nms(
                torch.from_numpy(predictions[0][0]['boxes']),
                torch.from_numpy(predictions[0][0]['scores']), 0.1)
            for i in keep_idx:
                boxes.append(predictions[0][0]['boxes'][i])
                scores.append(predictions[0][0]['scores'][i])
                labels.append(predictions[0][0]['labels'][i])
            boxes = np.array(boxes).astype(np.float32).clip(min=0, max=511)
            image_original = cv2.imread(path_test)
            image_original = cv2.cvtColor(image_original, cv2.COLOR_BGR2RGB)
            h, w = image_original.shape[0], image_original.shape[1]
            boxes[:, 0] = boxes[:, 0] * (w / 384)
            boxes[:, 1] = boxes[:, 1] * (h / 384)
            boxes[:, 2] = boxes[:, 2] * (w / 384)
            boxes[:, 3] = boxes[:, 3] * (h / 384)
            for box in boxes:
                batch_recognize.append(
                    image_original[int(box[1]):int(box[3]),
                                   int(box[0]):int(box[2]), :])
        return batch_recognize, boxes