def predict_and_show(model: FasterRCNN, im_pil: Image.Image, im_tensor: Tensor, score_th: float = 0.5) -> None: model.eval() pred = model([im_tensor])[0] # filter predicted boxes by scores and nms ii_select = nms(pred['boxes'], pred['scores'], iou_threshold=0.5).tolist() ii_select.extend(torch.nonzero(pred['scores'] > score_th)[:0].tolist()) ii_select = list(set(ii_select)) pred = {key: val[ii_select] for key, val in pred.items()} plt.figure(figsize=(14, 14)) plt.subplot(1, 3, 1) plt.imshow(im_pil) plt.axis('image') plt.axis('off') plt.subplot(1, 3, 2) plt.imshow(draw_boxes(img=im_pil, annot=pred)) plt.title('predict') plt.axis('off') plt.subplot(1, 3, 3) show_legend(CLS_SELECT, COLORS) plt.show()
def test(): backbone = torchvision.models.mobilenet_v2(pretrained=False).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) # roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) # model = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) model = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator) model.eval().cuda() x = [ torch.rand(3, 300, 400).float().cuda(), torch.rand(3, 500, 400).float().cuda() ] predictions = model(x) print(len(predictions)) print(predictions[0].keys())
class PyTorchModel: #create model by loading it in from Google Drive path def __init__(self, f): trainable_backbone_layers = 5 pretrained = True backbone = resnet_fpn_backbone( 'resnet50', True, trainable_layers=trainable_backbone_layers) self.model = FasterRCNN(backbone, num_classes=10, max_size=3840, min_size=2160, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=2000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=2000, box_detections_per_img=100, rpn_nms_thresh=0.01, box_nms_thresh=0.01) #num_classes = 10 #self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) #in_features = self.model.roi_heads.box_predictor.cls_score.in_features #self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') self.model.to(device) if (isinstance(f, str)): #local file print("Loading model from local file at {}".format(f)) self.model.load_state_dict(torch.load(f, map_location=device)) elif (isinstance(f, io.BytesIO)): #stream print("Loading model from stream") pass def predict(self, image) -> List[Label]: frame = torchvision.transforms.ToTensor()(image) frame = frame[None, :, :] self.model.eval() prediction = self.model(frame) print(prediction) boxes = prediction[0]["boxes"] labels = prediction[0]["labels"] scores = prediction[0]["scores"] ret = list() for i in range(0, len(boxes)): score: float = float(scores[i].item()) xmin: int = int(boxes[i][0].item()) ymin: int = int(boxes[i][1].item()) xmax: int = int(boxes[i][2].item()) ymax: int = int(boxes[i][3].item()) group: str = classes[str(labels[i].item())]["category"] color: str = classes[str(labels[i].item())]["color"] ret.append(Label(i, group, xmin, xmax, ymin, ymax, color, score)) return ret
class LitFRCNN(pl.LightningModule): """ Creates a Faster CNN which can be fine-tuned. """ def __init__( self, learning_rate: float = 0.0001, num_classes: int = 91, backbone: str = None, fpn: bool = True, pretrained_backbone: str = None, trainable_backbone_layers: int = 3, **kwargs, ): """ Args: learning_rate: the learning rate num_classes: number of detection classes (including background) pretrained: if true, returns a model pre-trained on COCO train2017 pretrained_backbone (str): if "imagenet", returns a model with backbone pre-trained on Imagenet trainable_backbone_layers: number of trainable resnet layers starting from final block """ super().__init__() self.learning_rate = learning_rate self.num_classes = num_classes self.backbone = backbone if backbone is None: self.model = fasterrcnn_resnet50_fpn( pretrained=True, trainable_backbone_layers=trainable_backbone_layers, ) in_features = self.model.roi_heads.box_predictor.cls_score.in_features self.model.roi_heads.box_predictor = FastRCNNPredictor( in_features, self.num_classes) else: backbone_model = create_fastercnn_backbone( self.backbone, fpn, pretrained_backbone, trainable_backbone_layers, **kwargs, ) self.model = FasterRCNN(backbone_model, num_classes=num_classes, **kwargs) def forward(self, x): self.model.eval() return self.model(x) def training_step(self, batch, batch_idx): images, targets = batch targets = [{k: v for k, v in t.items()} for t in targets] # fasterrcnn takes both images and targets for training, returns loss_dict = self.model(images, targets) loss = sum(loss for loss in loss_dict.values()) return {"loss": loss, "log": loss_dict} def validation_step(self, batch, batch_idx): images, targets = batch # fasterrcnn takes only images for eval() mode outs = self.model(images) iou = torch.stack([_evaluate_iou(t, o) for t, o in zip(targets, outs)]).mean() giou = torch.stack( [_evaluate_giou(t, o) for t, o in zip(targets, outs)]).mean() return {"val_iou": iou, "val_giou": giou} def validation_epoch_end(self, outs): avg_iou = torch.stack([o["val_iou"] for o in outs]).mean() avg_giou = torch.stack([o["val_giou"] for o in outs]).mean() logs = {"val_iou": avg_iou, "val_giou": avg_giou} return {"avg_val_iou": avg_iou, "avg_val_giou": avg_giou, "log": logs} def configure_optimizers(self): return torch.optim.SGD( self.model.parameters(), lr=self.learning_rate, momentum=0.9, weight_decay=0.005, )
import torchvision from class_labels import GROCERY_LIST_V0 from PIL import Image from torch.autograd import Variable from torchvision import transforms from torchvision.models.detection.faster_rcnn import FasterRCNN from torchvision.models.detection.backbone_utils import resnet_fpn_backbone model = FasterRCNN(resnet_fpn_backbone("resnet50", False), num_classes=64) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model_path = "synthdet_faster_rcnn.pth" model_save = torch.load(model_path, map_location=device) # no CUDA on macOS model.load_state_dict(model_save["model"]) model.to(device) model.eval() # preprocess on test image image_path = sys.argv[1] image = Image.open(image_path) image_to_tensor = transforms.Compose([ transforms.ToTensor() ]) tensor = image_to_tensor(image) # inference threshold = 0.5 # Start timing time_inference_start = time.time()