def start_evaluation(test_data_loader: DataLoader, model: FasterRCNN,
                     device: str, epoch: int, logger: Logger,
                     args: argparse.Namespace) -> Tensor:
    """
    Evaluate the model with the test set
    :param test_data_loader: Data loader for test data:
    :param model: Model that is being tested
    :param device: Device for the computation
    :param epoch: Current epoch
    :param logger: Logger for logging handling
    :param args: Arguments
    :return:
    """
    logger.info(f'Start evaluation after {epoch} epochs')
    model.eval()
    scores = []
    for idx, result in enumerate(test_data_loader):
        images = list(image.to(device) for image in result[0])
        targets = result[1]
        with torch.set_grad_enabled(False):
            outputs = model(images)
            for output_idx, element in enumerate(outputs):
                predicted_labels = element['labels']
                true_labels = targets[output_idx]['labels']
                if len(element['scores']) != 0:
                    scores.append(torch.mean(element['scores']))
        if idx % args.print_status:
            logger.info(
                f'Scores {element["scores"]} \n'
                f'Labels predicted: {predicted_labels} Groundtruth labels: {true_labels}'
            )
    avg_score = torch.mean(torch.Tensor(scores))
    return avg_score
Exemple #2
0
def demo():
    # load a pre-trained model for classification and return
    # only the features
    backbone = torchvision.models.mobilenet_v2(pretrained=True).features
    # FasterRCNN needs to know the number of
    # output channels in a backbone. For mobilenet_v2, it's 1280
    # so we need to add it here
    backbone.out_channels = 1280
    # let's make the RPN generate 5 x 3 anchors per spatial
    # location, with 5 different sizes and 3 different aspect
    # ratios. We have a Tuple[Tuple[int]] because each feature
    # map could potentially have different sizes and
    # aspect ratios
    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                       aspect_ratios=((0.5, 1.0, 2.0), ))
    # let's define what are the feature maps that we will
    # use to perform the region of interest cropping, as well as
    # the size of the crop after rescaling.
    # if your backbone returns a Tensor, featmap_names is expected to
    # be [0]. More generally, the backbone should return an
    # OrderedDict[Tensor], and in featmap_names you can choose which
    # feature maps to use.
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                    output_size=7,
                                                    sampling_ratio=2)
    # put the pieces together inside a FasterRCNN model
    model = FasterRCNN(backbone,
                       num_classes=2,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)
    model.eval()
    x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
    predictions = model(x)
    print(predictions)
Exemple #3
0
class SegmentationNetwork(nn.Module):
    def __init__(self,
                 backbone=None,
                 output_channels=2,
                 backbone_output_channels=512):
        super().__init__()

        if not backbone:
            b, _ = remove_backbone_head(resnet18(pretrained=False))
            backbone = b

        # ResNet produces 512-length outputs
        backbone.out_channels = backbone_output_channels
        self.segmentation_network = FasterRCNN(backbone,
                                               num_classes=output_channels)

    def forward(self, x, boxes=None):
        x = self.segmentation_network(x, boxes)
        return x

    def infer(self, x):
        self.eval()
        self.segmentation_network.eval()
        x = self.segmentation_network(x)
        return convert_bounding_box_inference(x)
class ResNet50_FasterRCNN:
    def __init__(self, pretrained=False):
        # Building our FasterRCNN model for objects detection
        backbone = resnet_fpn_backbone('resnet50', pretrained=pretrained)
        num_classes = 4 + 1

        anchor_generator = AnchorGenerator(sizes=(40, 60, 150, 200, 250),
                                           aspect_ratios=(0.7, 1.0, 1.3))
        self.model = FRCNN(backbone,
                           num_classes=num_classes,
                           rpn_anchor_generator=anchor_generator)

    def train(self):
        self.model.train()

    def to(self, device):
        self.model.to(device)

    def eval(self):
        self.model.eval()

    def parameters(self):
        return self.model.parameters()

    def get_state_dict(self):
        return self.model.state_dict()

    def set_state_dict(self, state_dict):
        self.model.load_state_dict(state_dict)

    def fit_batch(self, images, target):
        return self.model(images, target)

    def predict_batch(self, images):
        return self.model(images)
Exemple #5
0
class TorchDetector:
    """
    Torch object detector
    """
    def __init__(self, config, logger):
        self._logger = logger
        self._threshold = config['threshold']
        modelfile = config['model']
        self._device = config['device']  # cpu, cuda, cuda:0
        backbone = resnet_fpn_backbone('resnet50', False)
        self._model = FasterRCNN(backbone, 8)  # 8 classes
        checkpoint = torch.load(modelfile, map_location=self._device)
        self._model.load_state_dict(checkpoint['model_state_dict'])
        device = torch.device(self._device)
        self._model.to(device)
        self._model.eval()

    def stop(self):
        """
        Destruction
        """

    def detectObjects(self, img) -> List[e.DetectedObject]:
        """
        Implementation of detector interface
        """
        wsize = 1600
        hsize = 800
        _pretransform = A.Compose([
            A.Resize(hsize, wsize),
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
            ToTensorV2(),
        ])

        image_tensor = _pretransform(image=img)['image']

        tstart = time.time()

        outputs = self._model.forward(
            image_tensor.unsqueeze(0).float().to(device=self._device))

        classes = outputs[0]['labels'].detach().cpu().numpy()
        scores = outputs[0]['scores'].detach().cpu().numpy()
        boxes = outputs[0]['boxes'].detach().cpu().numpy()

        self._logger.debug(
            f'Torch model inferring time: {time.time() - tstart}')

        result = zip(classes, scores, boxes)

        h, w, _ = img.shape
        wscale = w / wsize
        hscale = h / hsize
        #print(f'h,w:{h},{w}; wsc,hsc:{wscale},{hscale}')
        #print(list(result))

        return ObjectDetector.getDetectedObjectsCollection(
            result, hscale, wscale, self._threshold, False)
Exemple #6
0
class Detect:
    def __init__(self):
        super().__init__()
        backbone = torchvision.models.vgg16(pretrained=False).features
        backbone.out_channels = 512
        anchor_sizes = ((8, 16, 32, 64, 128, 256, 512), )
        aspect_ratios = ((1 / 2, 1 / 3, 1 / 4, 1 / 5, 1 / 6, 1 / math.sqrt(2),
                          1, 2, math.sqrt(2), 3, 4, 5, 6, 7, 8), )
        anchor_generator = AnchorGenerator(sizes=anchor_sizes,
                                           aspect_ratios=aspect_ratios)
        roi_pooler = torchvision.ops.MultiScaleRoIAlign(
            featmap_names=['0', '1', '2', '3', '4'],
            output_size=7,
            sampling_ratio=2)
        self.model = FasterRCNN(backbone,
                                num_classes=7,
                                rpn_anchor_generator=anchor_generator,
                                box_roi_pool=roi_pooler)
        self.device = torch.device('cpu')
        self.model.load_state_dict(torch.load('2.pth'))
        self.model.to(self.device)
        self.model.eval()

    def forward(self, img):
        img = torch.tensor(img, dtype=torch.float32) / 255
        img = img.permute((2, 0, 1))
        output = model([img.to(self.device)])
        boxes = output[0]['boxes']
        labels = output[0]['labels']
        scores = output[0]['scores']
        last = {}
        result = {}
        for i, v in enumerate(labels):
            if v == 1 and scores[i] > last['send']:
                last['send'] = scores[i]
                result['send'] = boxes[i]
            elif v == 2 and scores[i] > last['number']:
                last['number'] = scores[i]
                result['number'] = boxes[i]
            elif v == 3 and scores[i] > last['date']:
                last['date'] = scores[i]
                result['date'] = boxes[i]
            elif v == 4 and scores[i] > last['quote']:
                last['quote'] = scores[i]
            elif v == 5 and scores[i] > last['header']:
                last['header'] = scores[i]
                result['header'] = boxes[i]
            elif v == 6 and scores[i] > last['motto']:
                last['motto'] = scores[i]
                result['motto'] = boxes[i]
            # elif v == 7 and scores[i] > last['secrete']:
            #     last['secrete'] = scores[i]
            #     result['secrete'] = boxes[i]
            # elif v == 8 and scores[i] > last['sign']:
            #     last['sign'] = scores[i]
            #     result['sign'] = boxes[i]
        return result
Exemple #7
0
EPOCH = 250
CLASSES = 3
DEVICE = torch.device("cuda")
BATCH_SIZE = 10

anchor_generator = AnchorGenerator(sizes=((32, 64), ),
                                   aspect_ratios=((0.6, 1.0, 1.6), ))
backbone = torchvision.models.vgg19(pretrained=False).features
backbone.out_channels = 512
model = FasterRCNN(backbone,
                   num_classes=CLASSES,
                   rpn_anchor_generator=anchor_generator)
model.load_state_dict(
    torch.load('models_new/' + 'model_' + str(EPOCH) + '.pth'))
model.to(DEVICE)
model.eval()
start_time = time.time()
ear_count = 0
for T in types:
    for E in ears:
        CTs = os.listdir(data_path + dataset_name + T + E)
        for CT in CTs:
            print('current path:{}'.format(data_path + dataset_name + T + E +
                                           CT))
            ear_count += 1
            img_names = glob.glob(data_path + dataset_name + T + E + CT +
                                  '/*.jpg')
            sorted(img_names, key=lambda x: x.split('\\')[-1])
            with torch.no_grad():
                start, end = 0, BATCH_SIZE
                path = data_path + result_name + T + E + CT
Exemple #8
0
class FasterRCNNFood:
    def __init__(self,
                 backbone_name: str,
                 pretrained: bool = True,
                 finetune: bool = True,
                 num_classes: int = 2):
        self.__pretrained = pretrained
        self.__num_classes = num_classes
        self.__model_name = backbone_name
        backbone = build_backbone(backbone_name, pretrained, finetune)

        anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                           aspect_ratios=((0.5, 1.0, 2.0), ))

        roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                        output_size=7,
                                                        sampling_ratio=2)

        self.model = FasterRCNN(backbone=backbone,
                                num_classes=num_classes,
                                rpn_anchor_generator=anchor_generator,
                                box_roi_pool=roi_pooler)

        self.params = [p for p in self.model.parameters() if p.requires_grad]
        self.optimizer = torch.optim.Adam(params=self.params,
                                          lr=0.005,
                                          weight_decay=0.0005)

        self.lr_scheduler = torch.optim.lr_scheduler.StepLR(
            optimizer=self.optimizer, step_size=3, gamma=0.1)

    def train(self,
              data_loader: DataLoader,
              data_loader_test: DataLoader,
              num_epochs: int = 10,
              use_cuda: bool = True,
              epoch_save_ckpt: Union[int, list] = None,
              dir: str = None):
        """
        Method to train FasterRCNNFood model.
        Args:
            data_loader (torch.utils.data.DataLoader): data loader to train model on
            data_loader_test (torch.utils.data.DataLoader): data loader to evaluate model on
            num_epochs (int = 10): number of epoch to train model
            use_cuda (bool = True): use cuda or not
            epoch_save_ckpt (list or int): Epoch at which you want to save the model. If -1 save only last epoch.
            dir (str = "models/): Directory where model are saved under the name "{model_name}_{date}_ep{epoch}.pth"
        """
        if epoch_save_ckpt == -1:
            epoch_save_ckpt = [num_epochs - 1]
        if not dir:
            dir = "models"
        dir = Path(dir)
        dir.mkdir(parents=True, exist_ok=True)
        # choose device
        if use_cuda and torch.cuda.is_available():
            device = torch.device("cuda")
        else:
            device = torch.device("cpu")

        # define dataset
        self.model.to(device)
        writer = SummaryWriter()

        for epoch in range(num_epochs):
            # train for one epoch, printing every 50 iterations
            train_one_epoch(self.model,
                            self.optimizer,
                            data_loader,
                            device,
                            epoch,
                            print_freq=50,
                            writer=writer)
            # update the learning rate
            self.lr_scheduler.step()
            # evaluate on the test dataset
            evaluate(self.model,
                     data_loader_test,
                     device=device,
                     writer=writer,
                     epoch=epoch)
            # save checkpoint
            if epoch in epoch_save_ckpt:
                self.save_checkpoint(dir.as_posix(), epoch)
        writer.close()
        print("That's it!")

    def save_checkpoint(self, dir: str, epoch: int):
        """
        Save a model checkpoint at a given epoch.
        Args:
            dir: dir folder to save the .pth file
            epoch: epoch the model is
        """
        state = {
            'epoch': epoch + 1,
            'state_dict': self.model.state_dict(),
            'optimizer': self.optimizer.state_dict(),
            'num_classes': self.__num_classes,
            'pretrained': self.__pretrained,
            "model_name": self.__model_name
        }
        now = datetime.now()
        filename = "{model_name}_{date}_ep{epoch}.pth".format(
            model_name=self.__model_name,
            date=now.strftime("%b%d_%H-%M"),
            epoch=epoch)
        torch.save(state, Path(dir) / filename)
        "Checkpoint saved : {}".format(Path(dir) / filename)

    def predict(self, dataset, idx):
        img, _ = dataset[idx]
        img.to("cpu")
        self.model.eval()
        self.model.to("cpu")
        pred = self.model([img])
        return img, pred[0]

    @staticmethod
    def load_checkpoint(filename: str,
                        cuda: bool = True) -> ("FasterRCNNFood", int):
        """
        Load a model checkpoint to continue training.
        Args:
            filename (str): filename/path of the checkpoint.pth
            cuda (bool = True): use cuda

        Returns:
            (FasterRCNNFood) model
            (int) number of epoch + 1 the model was trained with
        """
        device = torch.device("cuda") if (
            cuda and torch.cuda.is_available()) else torch.device("cpu")
        start_epoch = 0
        if Path(filename).exists():
            print("=> loading checkpoint '{}'".format(filename))
            checkpoint = torch.load(filename, map_location=device)
            # Load params
            pretrained = checkpoint['pretrained']
            num_classes = checkpoint["num_classes"]
            start_epoch = checkpoint['epoch']
            model_name = checkpoint['model_name']
            # Build model key/architecture
            model = FasterRCNNFood(model_name, pretrained, num_classes)
            # Update model and optimizer
            model.model.load_state_dict(checkpoint['state_dict'])
            model.optimizer.load_state_dict(checkpoint['optimizer'])

            model.model = model.model.to(device)
            # now individually transfer the optimizer parts...
            for state in model.optimizer.state.values():
                for k, v in state.items():
                    if isinstance(v, torch.Tensor):
                        state[k] = v.to(device)

            print("=> loaded checkpoint '{}' (epoch {})".format(
                filename, checkpoint['epoch']))
            return model, start_epoch
        else:
            print("=> no checkpoint found at '{}'".format(filename))

    @staticmethod
    def load_for_inference(filename: str,
                           cuda: bool = True) -> "FasterRCNNFood":
        """
        Load a model checkpoint to make inference.
        Args:
            filename (str): filename/path of the checkpoint.pth
            cuda (bool = True): use cuda
        Returns:
            (FasterRCNNFood) model
        """
        device = torch.device("cuda") if (
            cuda and torch.cuda.is_available()) else torch.device("cpu")
        if Path(filename).exists():
            print("=> loading checkpoint '{}'".format(filename))
            checkpoint = torch.load(filename, map_location=device)
            # Load params
            pretrained = checkpoint['pretrained']
            num_classes = checkpoint["num_classes"]
            model_name = checkpoint['model_name']
            # Build model key/architecture
            model = FasterRCNNFood(model_name, pretrained, num_classes)
            # Update model and optimizer
            model.model.load_state_dict(checkpoint['state_dict'])
            model.model = model.model.to(device)
            model.model = model.model.eval()

            print("=> loaded checkpoint '{}'".format(filename))
            return model
        else:
            print("=> no checkpoint found at '{}'".format(filename))
Exemple #9
0
                                                       num_classes)

    return model

print("注意:从https://github.com/pytorch/vision/tree/master/references/detection下载:engine.py, utils.py, transforms.py,coco_eval.py,coco_utils.py拷贝到本目录")

# 写一些辅助函数来进行数据扩充/转换
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

# 测试forward()方法(可选)
'''
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
dataset = PennFudanDataset('PennFudanPed', get_transform(train=True))
data_loader = torch.utils.data.DataLoader(
                                        dataset, batch_size=2, shuffle=True, num_workers=4,
                                        collate_fn=utils.collate_fn)
# For Training
images,targets = next(iter(data_loader))
images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]
output = model(images,targets)   # Returns losses and detections
# For inference
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)           # Returns predictions
'''
Exemple #10
0
class FasterRCNNMODEL:
    #TODO: Later on enable passing params params

    def __init__(self, model_params=None):
        self.params = model_params
        self.model = None
        self.optimizer = None
        self.device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')

    def set_backbone(self, backbone):
        """
        backbone is a string containing the backbone we want to use in the model. add more options
        """
        if 'vgg' in backbone.lower():
            "to somthing-check for options"
        elif 'mobilenet_v2' in backbone.lower():
            self.backbone = torchvision.models.mobilenet_v2(
                pretrained=True).features
            self.backbone.out_channels = 1280
        elif 'resnet50' in backbone.lower():
            self.backbone = torchvision.models.resnet50(
                pretrained=True).features
            self.backbone.out_channels = 256

    def set_model(self):
        """
        Set model and determine configuration
        :return: None, generate self.model to be used for training and testing
        """
        # Default values: box_score_thresh = 0.05, box_nms_thresh = 0.5
        kwargs = {
            'box_score_thresh': 0.3,
            'box_nms_thresh': 0.3,
            'box_detections_per_img': 6
        }
        # self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False,
        #                                                                   pretrained_backbone=True,
        #                                                                   **kwargs)
        self.model = FasterRCNN(self.backbone, num_classes=7, **kwargs)

        device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        num_classes = 7
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        self.model.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)

        # Allow Multiple GPUs:
        # if torch.cuda.device_count() > 1:
        #     self.model = nn.DataParallel(self.model)

        self.model = self.model.to(device)

        if self.params is None:
            params = [p for p in self.model.parameters() if p.requires_grad]
        else:
            # TODO: Enable user defined model params
            pass

        self.optimizer = torch.optim.SGD(params, lr=0.01)

    def train_model(self, train_loader, num_epochs):
        """
        Train (only!) of the model
        :param train_loader: DataLoader object
        :param num_epochs: int. Number of epochs to train the model
        :return: None,
        """
        self.model.train()  # Set to training mode
        for epoch in range(num_epochs):
            for images, targets in train_loader:
                images = list(image.to(self.device) for image in images)
                targets = [{k: v.to(self.device)
                            for k, v in t.items()} for t in targets]

                # Zero Gradients
                self.optimizer.zero_grad()

                # self.model = self.model.double()

                # Calculate Loss
                loss_dict = self.model(images, targets)  # what happens here?
                losses = sum(loss for loss in loss_dict.values())
                losses.backward()

                # Update weights
                self.optimizer.step()

            print('Train Loss = {:.4f}'.format(losses.item()))

    def train_eval_model(self, train_loader, val_loader, num_epochs):
        """
        Train model and evaluate performance after each epoch
        :param train_loader: DataLoader object. Training images and targets
        :param val_loader: DataLoader object. validation images and targets
        :param num_epochs: int. Number of epochs for training and validation
        :return:
        """
        # For evaluation
        imgs_name_list = []
        bbox_list = []
        labels_list = []

        for epoch in range(num_epochs):
            train_loss = 0
            val_loss = 0
            self.model.train()  # Set to training mode
            with torch.set_grad_enabled(True):
                for images, targets in train_loader:
                    # Pass data to GPU
                    images = list(image.to(self.device) for image in images)
                    targets = [{k: v.to(self.device)
                                for k, v in t.items()} for t in targets]

                    # Zero Gradients
                    self.optimizer.zero_grad()

                    # self.model = self.model.double()

                    # Calculate Loss
                    loss_dict = self.model(images,
                                           targets)  # what happens here?
                    losses = sum(loss for loss in loss_dict.values())
                    train_loss += losses.item() * len(images)

                    # Backward Prop & Update weights
                    losses.backward()
                    self.optimizer.step()

                print('Train Loss = {:.4f}'.format(train_loss /
                                                   len(train_loader.dataset)))

            # TODO: Calculate Dice and IoU loss for it

            with torch.no_grad():
                for idx, (imgs_name, images, targets) in enumerate(val_loader):
                    self.model.train()
                    images = list(image.to(self.device) for image in images)
                    targets = [{k: v.to(self.device)
                                for k, v in t.items()} for t in targets]

                    loss_dict = self.model(images, targets)
                    losses = sum(loss for loss in loss_dict.values())
                    val_loss += losses.item() * len(images)

                    if epoch == num_epochs - 1:
                        self.model.eval()  # Set model to evaluate performance
                        targets = self.model(images)

                        # Think of moving all this into gen_out_file - Looks nicer
                        imgs_name_list.extend(imgs_name)
                        bbox_list.extend([
                            target['boxes'].int().cpu().tolist()
                            for target in targets
                        ])
                        labels_list.extend([
                            target['labels'].int().cpu().tolist()
                            for target in targets
                        ])
                    """Optional - SEE the performance on the second last batch"""
                    if (epoch == num_epochs - 1) and idx == (len(val_loader) -
                                                             2):
                        self.model.eval()  # Set model to evaluate performance
                        targets = self.model(images)
                        MiscUtils.view(images,
                                       targets,
                                       k=len(images),
                                       model_type='faster_rcnn')

                DataUtils.gen_out_file('output_file.txt', imgs_name_list,
                                       bbox_list, labels_list)
                print('Validation Loss = {:.4f}'.format(
                    val_loss / len(val_loader.dataset)))
Exemple #11
0
class glimpse_network(nn.Module):
    """
    A network that combines the "what" and the "where"
    into a glimpse feature vector `g_t`.

    - "what": glimpse extracted from the retina.
    - "where": location tuple where glimpse was extracted.

    Concretely, feeds the output of the retina `phi` to
    a fc layer and the glimpse location vector `l_t_prev`
    to a fc layer. Finally, these outputs are fed each
    through a fc layer and their sum is rectified.

    In other words:

        `g_t = relu( fc( fc(l) ) + fc( fc(phi) ) )`

    Args
    ----
    - h_g: hidden layer size of the fc layer for `phi`.
    - h_l: hidden layer size of the fc layer for `l`.
    - g: size of the square patches in the glimpses extracted
      by the retina.
    - k: number of patches to extract per glimpse.
    - s: scaling factor that controls the size of successive patches.
    - c: number of channels in each image.
    - x: a 4D Tensor of shape (B, H, W, C). The minibatch
      of images.
    - l_t_prev: a 2D tensor of shape (B, 2). Contains the glimpse
      coordinates [x, y] for the previous timestep `t-1`.

    Returns
    -------
    - g_t: a 2D tensor of shape (B, hidden_size). The glimpse
      representation returned by the glimpse network for the
      current timestep `t`.
    """
    def __init__(self, h_g, h_l, g, k, s, c):
        super(glimpse_network, self).__init__()
        self.retina = retina(g, k, s)

        self.feature_extractor = nn.Sequential(
            *list(resnet50(pretrained=True).children())[:-1])

        #detection feature
        backbone = torchvision.models.mobilenet_v2(pretrained=True).features
        backbone.out_channels = 1280
        anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                           aspect_ratios=((0.5, 1.0, 2.0), ))
        roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                        output_size=7,
                                                        sampling_ratio=2)
        self.detection_model = FasterRCNN(
            backbone,
            num_classes=2,
            rpn_anchor_generator=anchor_generator,
            box_roi_pool=roi_pooler)
        self.detection_model.eval()

        # glimpse layer
        D_in = k * g * g * c
        D_in = 2048
        self.fc1 = nn.Linear(D_in, h_g)

        # location layer
        D_in = 2
        self.fc2 = nn.Linear(D_in, h_l)

        self.fc3 = nn.Linear(h_g, h_g + h_l)
        self.fc4 = nn.Linear(h_l, h_g + h_l)

    def forward(self, x, l_t_prev, frame_index):
        # generate glimpse phi from image x
        phi = self.retina.foveate(x, l_t_prev, frame_index)
        # temp = self.detection_model(x[:,:,frame_index,:,:].squeeze())

        # train resnet or not
        # phi = self.feature_extractor(phi).detach()
        phi = self.feature_extractor(phi)
        phi = phi.view(phi.size(0), -1)

        # flatten location vector
        l_t_prev = l_t_prev.view(l_t_prev.size(0), -1)

        # feed phi and l to respective fc layers
        phi_out = F.relu(self.fc1(phi))
        l_out = F.relu(self.fc2(l_t_prev))

        what = self.fc3(phi_out)
        where = self.fc4(l_out)

        # feed to fc layer
        g_t = F.relu(what + where)

        return g_t
Exemple #12
0
def get_result_from_model(test_img, thresh):

    test_data = torchvision.datasets.ImageFolder('C:/Users/skyho/Desktop/test_image_folder/',loader = plt.imread,transform=transforms.ToTensor())

    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 200),), aspect_ratios=((0.5, 1.0, 2.0),))
    backbone = torchvision.models.mobilenet_v2(pretrained=True).features
    backbone.out_channels = 1280
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2)
    rcnn_v1 = FasterRCNN(backbone, num_classes=32, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler)

    try:
        # model_path = "bs{0}_lr{1}_epoch{2}_checkpoint_{3}".format(8,0.00005,2,15)
        checkpoint = torch.load('C:/Users/skyho/Desktop/final_model.pth', map_location='cpu') #, map_location='cpu'
        rcnn_v1.load_state_dict(checkpoint['model_state_dict'])
        rcnn_v1.eval()
    except IOError:
        print("Can't find saved model~")

    # result = []
    #data = torchvision.datasets.ImageFolder(img_path, loader=plt.imread, transform=transforms.ToTensor())
    result = []
    with torch.no_grad():
        result.append(rcnn_v1([test_data[0][0]]))

    # plot the boxes on the result image
    # print labels
    # save the image somewhere and return the path
    # cv2_im = cv2.imread(test_img)

    font                   = cv2.FONT_HERSHEY_SIMPLEX
    fontScale              = 1
    fontColor              = (255,255,255)
    lineType               = 2

    cv2_im = []
    #for i in range(len(test_data)):
    cv2_im.append(cv2.imread(test_data.imgs[0][0]))

    #for i in range(len(test_data)):
    i = 0
    first_box = result[i][0]['boxes'][0].unsqueeze(0)
    box_id = 0

    for box in result[i][0]['boxes']:

        if (box_id==0 or jaccard(first_box,box.unsqueeze(0)).tolist()[0][0]<0.6):
            if result[i][0]['scores'].tolist()[box_id]>=thresh:
                x1 = int(box[0])
                y1 = int(box[1])
                x2 = int(box[2])
                y2 = int(box[3])
                #check other boxes

                flag = True
                for each in range(result[i][0]['boxes'].shape[0]):
                    if each!=box_id and result[i][0]['scores'].tolist()[each]>=thresh and jaccard(first_box,result[i][0]['boxes'][each].unsqueeze(0)).tolist()[0][0]<0.6:
                        o_x1 = int(result[i][0]['boxes'][each][0])
                        o_y1 = int(result[i][0]['boxes'][each][1])
                        o_x2 = int(result[i][0]['boxes'][each][2])
                        o_y2 = int(result[i][0]['boxes'][each][3])
                        if x1>=o_x1-3 and y1>=o_y1-3 and x2<=o_x2+3 and y2<=o_y2+3 and result[i][0]['labels'][box_id]==result[i][0]['labels'][each]:
                            flag = False
                            break
                    if flag:
                        cv2_im[i] = cv2.rectangle(cv2_im[i],(x1,y1),(x2,y2),(0,255,0),3)
                        cv2.putText(cv2_im[i],classes[result[i][0]['labels'][box_id]],
                            (x1,y2),
                            font,
                            fontScale,
                            fontColor,
                            lineType)
        box_id += 1

    detection_result = test_img[:-4] + '_result.png'
    #for i in range(len(test_data)):
    cv2.imwrite(detection_result, cv2_im[i])
    return detection_result
Exemple #13
0
def main():
    parser = argparse.ArgumentParser(
        description='VISUM 2019 competition - baseline inference script',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('-d',
                        '--data_path',
                        default='/home/master/dataset/test',
                        metavar='',
                        help='test data directory path')
    parser.add_argument('-m',
                        '--model_path',
                        default='./model.pth',
                        metavar='',
                        help='model file')
    parser.add_argument('-o',
                        '--output',
                        default='./predictions.csv',
                        metavar='',
                        help='output CSV file name')
    args = vars(parser.parse_args())

    NMS_THR = 0.1  # non maximum suppresion threshold
    REJECT_THR_KNOWN = 0.9  # rejection threshold to classify as unknown class (naive approach!)
    REJECT_THR = 0.17  # rejection threshold to classify as unknown class (naive approach!)

    def get_transform(train):
        transforms = []
        # converts the image, a PIL image, into a PyTorch Tensor
        transforms.append(T.ToTensor())
        if train:
            # during training, randomly flip the training images
            # and ground-truth for data augmentation
            transforms.append(T.RandomHorizontalFlip(0.5))
        return T.Compose(transforms)

    # Load datasets
    test_data = VisumData(args['data_path'],
                          'rgb',
                          mode='test',
                          transforms=get_transform(False))

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    # initial
    # model = torch.load(args['model_path'])
    # new
    backbone = torchvision.models.detection.backbone_utils.resnet_fpn_backbone(
        'resnet50', True)
    backbone.out_channels = 256
    anchor_generator = AnchorGenerator(sizes=(8, 16, 32, 64, 128),
                                       aspect_ratios=(0.5, 1.0, 2.0))
    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0],
                                                    output_size=7,
                                                    sampling_ratio=2)
    # put the pieces together inside a FasterRCNN model
    model = FasterRCNN(backbone,
                       num_classes=11,
                       rpn_anchor_generator=anchor_generator,
                       box_roi_pool=roi_pooler)
    model.load_state_dict(args['model_path'])

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=4,
                                              collate_fn=utils.collate_fn)

    predictions = list()
    for i, (imgs, _, file_names) in enumerate(test_loader):
        # set the model to evaluation mode
        model.eval()
        with torch.no_grad():
            prediction = model(list(img.to(device) for img in imgs))

        boxes = np.array(prediction[0]['boxes'].cpu())
        labels = list(prediction[0]['labels'].cpu())
        scores = list(prediction[0]['scores'].cpu())

        nms_boxes, nms_labels, nms_scores = nms(boxes, labels, scores, NMS_THR)

        for bb in range(len(nms_labels)):
            if nms_scores[bb] >= REJECT_THR:
                pred = np.concatenate(
                    (list(file_names), list(nms_boxes[bb, :])))  # bounding box
                if nms_scores[bb] >= REJECT_THR_KNOWN:
                    pred = np.concatenate(
                        (pred, [nms_labels[bb] - 1]))  # object label
                else:
                    pred = np.concatenate((pred, [-1]))  # Rejects to classify
                pred = np.concatenate(
                    (pred, [nms_scores[bb]]))  # BEST CLASS SCORE
                pred = list(pred)
                predictions.append(pred)

    with open(args['output'], 'w') as f:
        for pred in predictions:
            f.write("{},{},{},{},{},{},{}\n".format(pred[0], float(pred[1]),
                                                    float(pred[2]),
                                                    float(pred[3]),
                                                    float(pred[4]),
                                                    int(pred[5]),
                                                    float(pred[6])))
                          max_size=cfg.max_size)

    model_ft.load_state_dict(torch.load(cfg.model_name).state_dict())
    model_ft.to(device)

    with open(cfg.json_name, 'w', encoding='utf-8') as json_f:
        for file in allFileList:
            if os.path.isfile(cfg.test_path + file):
                print(file)
                output_dict = {}
                path = test_path + file
                img = Image.open(path).convert('RGB')
                img = data_transforms(img)
                img = img.unsqueeze(0)
                with torch.no_grad():
                    model_ft.eval()
                    img = img.to(device)
                    output = model_ft(img)

                    bbox = output[0]["boxes"].cpu().numpy()
                    label = output[0]["labels"].cpu().numpy()
                    score = output[0]["scores"].cpu().numpy()
                    bbox = bbox[score > score_threshold].astype('int')
                    label = label[score > score_threshold]
                    score = score[score > score_threshold]

                    # remove redundant bounding box
                    bbox, label, score = process_bbox_iou(
                        bbox, label, score, cfg.score_threshold,
                        cfg.IoU_threshold)