def predict_and_show(model: FasterRCNN,
                     im_pil: Image.Image,
                     im_tensor: Tensor,
                     score_th: float = 0.5) -> None:
    model.eval()

    pred = model([im_tensor])[0]

    # filter predicted boxes by scores and nms
    ii_select = nms(pred['boxes'], pred['scores'], iou_threshold=0.5).tolist()
    ii_select.extend(torch.nonzero(pred['scores'] > score_th)[:0].tolist())
    ii_select = list(set(ii_select))
    pred = {key: val[ii_select] for key, val in pred.items()}

    plt.figure(figsize=(14, 14))

    plt.subplot(1, 3, 1)
    plt.imshow(im_pil)
    plt.axis('image')
    plt.axis('off')

    plt.subplot(1, 3, 2)
    plt.imshow(draw_boxes(img=im_pil, annot=pred))
    plt.title('predict')
    plt.axis('off')

    plt.subplot(1, 3, 3)
    show_legend(CLS_SELECT, COLORS)
    plt.show()
def test():
    backbone = torchvision.models.mobilenet_v2(pretrained=False).features

    backbone.out_channels = 1280

    anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ),
                                       aspect_ratios=((0.5, 1.0, 2.0), ))

    # roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2)

    # model = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler)
    model = FasterRCNN(backbone,
                       num_classes=2,
                       rpn_anchor_generator=anchor_generator)

    model.eval().cuda()

    x = [
        torch.rand(3, 300, 400).float().cuda(),
        torch.rand(3, 500, 400).float().cuda()
    ]

    predictions = model(x)
    print(len(predictions))
    print(predictions[0].keys())
Beispiel #3
0
class PyTorchModel:

    #create model by loading it in from Google Drive path
    def __init__(self, f):
        trainable_backbone_layers = 5
        pretrained = True
        backbone = resnet_fpn_backbone(
            'resnet50', True, trainable_layers=trainable_backbone_layers)
        self.model = FasterRCNN(backbone,
                                num_classes=10,
                                max_size=3840,
                                min_size=2160,
                                rpn_pre_nms_top_n_train=2000,
                                rpn_pre_nms_top_n_test=2000,
                                rpn_post_nms_top_n_train=2000,
                                rpn_post_nms_top_n_test=2000,
                                box_detections_per_img=100,
                                rpn_nms_thresh=0.01,
                                box_nms_thresh=0.01)

        #num_classes = 10
        #self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
        #in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        #self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

        device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        self.model.to(device)
        if (isinstance(f, str)):  #local file
            print("Loading model from local file at {}".format(f))
            self.model.load_state_dict(torch.load(f, map_location=device))
        elif (isinstance(f, io.BytesIO)):  #stream
            print("Loading model from stream")
            pass

    def predict(self, image) -> List[Label]:
        frame = torchvision.transforms.ToTensor()(image)
        frame = frame[None, :, :]
        self.model.eval()
        prediction = self.model(frame)
        print(prediction)
        boxes = prediction[0]["boxes"]
        labels = prediction[0]["labels"]
        scores = prediction[0]["scores"]
        ret = list()
        for i in range(0, len(boxes)):
            score: float = float(scores[i].item())
            xmin: int = int(boxes[i][0].item())
            ymin: int = int(boxes[i][1].item())
            xmax: int = int(boxes[i][2].item())
            ymax: int = int(boxes[i][3].item())
            group: str = classes[str(labels[i].item())]["category"]
            color: str = classes[str(labels[i].item())]["color"]
            ret.append(Label(i, group, xmin, xmax, ymin, ymax, color, score))
        return ret
Beispiel #4
0
class LitFRCNN(pl.LightningModule):
    """
    Creates a Faster CNN which can be fine-tuned.
    """
    def __init__(
        self,
        learning_rate: float = 0.0001,
        num_classes: int = 91,
        backbone: str = None,
        fpn: bool = True,
        pretrained_backbone: str = None,
        trainable_backbone_layers: int = 3,
        **kwargs,
    ):
        """
        Args:
            learning_rate: the learning rate
            num_classes: number of detection classes (including background)
            pretrained: if true, returns a model pre-trained on COCO train2017
            pretrained_backbone (str): if "imagenet", returns a model with backbone pre-trained on Imagenet
            trainable_backbone_layers: number of trainable resnet layers starting from final block
        """
        super().__init__()
        self.learning_rate = learning_rate
        self.num_classes = num_classes
        self.backbone = backbone
        if backbone is None:
            self.model = fasterrcnn_resnet50_fpn(
                pretrained=True,
                trainable_backbone_layers=trainable_backbone_layers,
            )

            in_features = self.model.roi_heads.box_predictor.cls_score.in_features
            self.model.roi_heads.box_predictor = FastRCNNPredictor(
                in_features, self.num_classes)

        else:
            backbone_model = create_fastercnn_backbone(
                self.backbone,
                fpn,
                pretrained_backbone,
                trainable_backbone_layers,
                **kwargs,
            )
            self.model = FasterRCNN(backbone_model,
                                    num_classes=num_classes,
                                    **kwargs)

    def forward(self, x):
        self.model.eval()
        return self.model(x)

    def training_step(self, batch, batch_idx):
        images, targets = batch
        targets = [{k: v for k, v in t.items()} for t in targets]

        # fasterrcnn takes both images and targets for training, returns
        loss_dict = self.model(images, targets)
        loss = sum(loss for loss in loss_dict.values())
        return {"loss": loss, "log": loss_dict}

    def validation_step(self, batch, batch_idx):
        images, targets = batch
        # fasterrcnn takes only images for eval() mode
        outs = self.model(images)
        iou = torch.stack([_evaluate_iou(t, o)
                           for t, o in zip(targets, outs)]).mean()
        giou = torch.stack(
            [_evaluate_giou(t, o) for t, o in zip(targets, outs)]).mean()
        return {"val_iou": iou, "val_giou": giou}

    def validation_epoch_end(self, outs):
        avg_iou = torch.stack([o["val_iou"] for o in outs]).mean()
        avg_giou = torch.stack([o["val_giou"] for o in outs]).mean()
        logs = {"val_iou": avg_iou, "val_giou": avg_giou}
        return {"avg_val_iou": avg_iou, "avg_val_giou": avg_giou, "log": logs}

    def configure_optimizers(self):
        return torch.optim.SGD(
            self.model.parameters(),
            lr=self.learning_rate,
            momentum=0.9,
            weight_decay=0.005,
        )
Beispiel #5
0
import torchvision
from class_labels import GROCERY_LIST_V0
from PIL import Image
from torch.autograd import Variable
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FasterRCNN
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone

model = FasterRCNN(resnet_fpn_backbone("resnet50", False), num_classes=64)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_path = "synthdet_faster_rcnn.pth"
model_save = torch.load(model_path, map_location=device) # no CUDA on macOS   
model.load_state_dict(model_save["model"])
model.to(device)
model.eval()

# preprocess on test image
image_path = sys.argv[1]
image = Image.open(image_path)
image_to_tensor = transforms.Compose([
    transforms.ToTensor()
])
tensor = image_to_tensor(image)

# inference
threshold = 0.5

# Start timing
time_inference_start = time.time()