class ResNet50_FasterRCNN: def __init__(self, pretrained=False): # Building our FasterRCNN model for objects detection backbone = resnet_fpn_backbone('resnet50', pretrained=pretrained) num_classes = 4 + 1 anchor_generator = AnchorGenerator(sizes=(40, 60, 150, 200, 250), aspect_ratios=(0.7, 1.0, 1.3)) self.model = FRCNN(backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator) def train(self): self.model.train() def to(self, device): self.model.to(device) def eval(self): self.model.eval() def parameters(self): return self.model.parameters() def get_state_dict(self): return self.model.state_dict() def set_state_dict(self, state_dict): self.model.load_state_dict(state_dict) def fit_batch(self, images, target): return self.model(images, target) def predict_batch(self, images): return self.model(images)
def get_model( backbone_name="resnet50", detector_name="fasterrcnn", trainable_layers=3, model_ckpt=None, ): """Constructs a fasterrcnn or maskrcnn detector with the given backbone""" num_classes = 2 # 1 class (wheat) + background if model_ckpt: # backbone = resnet_fpn_backbone('resnet101', True) backbone = timm_resnet_fpn_backbone(backbone_name, False, trainable_layers) else: backbone = timm_resnet_fpn_backbone(backbone_name, True, trainable_layers) if detector_name == "fasterrcnn": model = FasterRCNN(backbone, num_classes) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) elif detector_name == "maskrcnn": model = MaskRCNN(backbone, num_classes) in_features_mask = ( model.roi_heads.mask_predictor.conv5_mask.in_channels) hidden_layer = 256 model.roi_heads.mask_predictor = MaskRCNNPredictor( in_features_mask, hidden_layer, num_classes) else: raise Exception(f"{detector_name} is not supported") if model_ckpt is not None: model.load_state_dict(torch.load(model_ckpt)["model_state_dict"]) print("loaded ckpt") return model
def fasterrcnn_resnetxx_fpnxx(cfg): backbone = resnet.__dict__[cfg['backbone_name']]( pretrained=cfg['backbone_pretrained'], norm_layer=misc_nn_ops.FrozenBatchNorm2d) # freeze layers for name, parameter in backbone.named_parameters(): if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name: parameter.requires_grad_(False) # wrapper backbone with fpn return_layers = cfg['fpn']['return_layers'] in_channels_stage2 = backbone.inplanes // 8 in_channels_list = [in_channels_stage2 * 2**i for i in range(len(return_layers))] out_channels = cfg['fpn']['out_channels'] backbone_fpn = BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels) anchor_generator = AnchorGenerator(**cfg['anchor_generator']) # print(anchor_generator.num_anchors_per_location()) roi_pooler = MultiScaleRoIAlign(**cfg['box_roi_pool']) model = FasterRCNN(backbone_fpn, num_classes=cfg['num_classes'], rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) if os.path.exists(cfg['fasterrcnn_pretrained']): state_dict = torch.load(cfg['fasterrcnn_pretrained']) model.load_state_dict(state_dict) return model
class TorchDetector: """ Torch object detector """ def __init__(self, config, logger): self._logger = logger self._threshold = config['threshold'] modelfile = config['model'] self._device = config['device'] # cpu, cuda, cuda:0 backbone = resnet_fpn_backbone('resnet50', False) self._model = FasterRCNN(backbone, 8) # 8 classes checkpoint = torch.load(modelfile, map_location=self._device) self._model.load_state_dict(checkpoint['model_state_dict']) device = torch.device(self._device) self._model.to(device) self._model.eval() def stop(self): """ Destruction """ def detectObjects(self, img) -> List[e.DetectedObject]: """ Implementation of detector interface """ wsize = 1600 hsize = 800 _pretransform = A.Compose([ A.Resize(hsize, wsize), A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensorV2(), ]) image_tensor = _pretransform(image=img)['image'] tstart = time.time() outputs = self._model.forward( image_tensor.unsqueeze(0).float().to(device=self._device)) classes = outputs[0]['labels'].detach().cpu().numpy() scores = outputs[0]['scores'].detach().cpu().numpy() boxes = outputs[0]['boxes'].detach().cpu().numpy() self._logger.debug( f'Torch model inferring time: {time.time() - tstart}') result = zip(classes, scores, boxes) h, w, _ = img.shape wscale = w / wsize hscale = h / hsize #print(f'h,w:{h},{w}; wsc,hsc:{wscale},{hscale}') #print(list(result)) return ObjectDetector.getDetectedObjectsCollection( result, hscale, wscale, self._threshold, False)
class Detect: def __init__(self): super().__init__() backbone = torchvision.models.vgg16(pretrained=False).features backbone.out_channels = 512 anchor_sizes = ((8, 16, 32, 64, 128, 256, 512), ) aspect_ratios = ((1 / 2, 1 / 3, 1 / 4, 1 / 5, 1 / 6, 1 / math.sqrt(2), 1, 2, math.sqrt(2), 3, 4, 5, 6, 7, 8), ) anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios) roi_pooler = torchvision.ops.MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3', '4'], output_size=7, sampling_ratio=2) self.model = FasterRCNN(backbone, num_classes=7, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) self.device = torch.device('cpu') self.model.load_state_dict(torch.load('2.pth')) self.model.to(self.device) self.model.eval() def forward(self, img): img = torch.tensor(img, dtype=torch.float32) / 255 img = img.permute((2, 0, 1)) output = model([img.to(self.device)]) boxes = output[0]['boxes'] labels = output[0]['labels'] scores = output[0]['scores'] last = {} result = {} for i, v in enumerate(labels): if v == 1 and scores[i] > last['send']: last['send'] = scores[i] result['send'] = boxes[i] elif v == 2 and scores[i] > last['number']: last['number'] = scores[i] result['number'] = boxes[i] elif v == 3 and scores[i] > last['date']: last['date'] = scores[i] result['date'] = boxes[i] elif v == 4 and scores[i] > last['quote']: last['quote'] = scores[i] elif v == 5 and scores[i] > last['header']: last['header'] = scores[i] result['header'] = boxes[i] elif v == 6 and scores[i] > last['motto']: last['motto'] = scores[i] result['motto'] = boxes[i] # elif v == 7 and scores[i] > last['secrete']: # last['secrete'] = scores[i] # result['secrete'] = boxes[i] # elif v == 8 and scores[i] > last['sign']: # last['sign'] = scores[i] # result['sign'] = boxes[i] return result
def fasterrcnn_resnet_fpn(backbone_name, pretrained=False, trainable_backbone_layers=None, num_classes=81, pretrained_backbone=True, **kwargs): """ Construct Faster R-CNN with a ResNet-FPN backbone Arguments: backbone_name(str): Name of the backbone. Refer to torchvision.models.resnet.__dict__ for details pretrained(bool, optional): If True, load weights for the detector pretrained on MS COCO. Only ResNet50-FPN is supported for the moment. trainable_backbone_layers(int, optional): Number of trainable (not frozen) resnet layers starting from final block. num_classes(int, optional): Number of target classes. pretrained_backbone(bool, optional): If True, load weights for backbone pre-trained on ImageNet Refer to torchvision.models.detection.FasterRCNN for kwargs """ trainable_backbone_layers = _validate_trainable_layers( pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3) if pretrained and backbone_name == 'resnet50': # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone(backbone_name, pretrained_backbone) model = FasterRCNN(backbone, num_classes, **kwargs) if pretrained and backbone_name == 'resnet50': state_dict = models.utils.load_state_dict_from_url( model_urls['fasterrcnn_resnet50_fpn_coco']) if num_classes == 81: # Remove the parameters for the additional classes state_dict['roi_heads.box_predictor.cls_score.weight'] = \ state_dict['roi_heads.box_predictor.cls_score.weight'][KEEP] state_dict['roi_heads.box_predictor.cls_score.bias'] = \ state_dict['roi_heads.box_predictor.cls_score.bias'][KEEP] state_dict['roi_heads.box_predictor.bbox_pred.weight'] = \ state_dict['roi_heads.box_predictor.bbox_pred.weight'][KEEPX4.flatten()] state_dict['roi_heads.box_predictor.bbox_pred.bias'] = \ state_dict['roi_heads.box_predictor.bbox_pred.bias'][KEEPX4.flatten()] model.load_state_dict(state_dict) elif pretrained: print( "WARNING: No pretrained detector on MS COCO with {}.".format( backbone_name), "Proceed with only pretrained backbone on ImageNet.") return model
class FasterRCNN_Encoder(nn.Module): def __init__(self, out_dim=None, fine_tune=False): super(FasterRCNN_Encoder, self).__init__() backbone = resnet_fpn_backbone('resnet50', False) self.faster_rcnn = FasterRCNN(backbone, num_classes=91, rpn_post_nms_top_n_train=200, rpn_post_nms_top_n_test=100) state_dict = load_state_dict_from_url( model_urls['fasterrcnn_resnet50_fpn_coco'], progress=True) self.faster_rcnn.load_state_dict(state_dict) # modify the last linear layer of the ROI pooling if there is # a special requirement of output size if out_dim is not None: self.faster_rcnn.roi_heads.box_head.fc7 = nn.Linear( in_features=1024, out_features=out_dim) # in captioning task, we may not want fine-tune faster-rcnn model if not fine_tune: for param in self.faster_rcnn.parameters(): param.requires_grad = False def forward(self, images, targets=None): ''' Forward propagation of faster-rcnn encoder Args: images: List[Tensor], a list of image data targets: List[Tensor], a list of ground-truth bounding box data, used only in fine-tune Returns: proposal features after ROI pooling and RPN loss ''' images, targets = self.faster_rcnn.transform(images, targets) # the base features produced by backbone network, i.e. resnet50 features = self.faster_rcnn.backbone(images.tensors) if isinstance(features, torch.Tensor): features = OrderedDict([(0, features)]) # proposals produced by RPN, i.e. the coordinates of bounding box # which contain foreground objects proposals, proposal_losses = self.faster_rcnn.rpn( images, features, targets) # get the corresponding features of the proposals produced by RPN and perform roi pooling box_features = self.faster_rcnn.roi_heads.box_roi_pool( features, proposals, images.image_sizes) # project the features to shape (batch_size, num_boxes, feature_dim) box_features = self.faster_rcnn.roi_heads.box_head(box_features) return box_features, proposal_losses
def fasterrcnn_resnet_fpn(pretrained=False, progress=True, resnet='resnet50', num_classes=91, pretrained_backbone=True, **kwargs): if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone(resnet, pretrained_backbone) model = FasterRCNN(backbone, num_classes, **kwargs) if pretrained: target_url = model_urls['fasterrcnn_' + resnet + '_fpn_coco'] state_dict = load_state_dict_from_url(target_url, progress=progress) model.load_state_dict(state_dict) return model
def fasterrcnn_resnet101_fpn(pretrained=False, progress=False, num_classes=91, pretrained_backbone=True, **kwargs): """ Constructs a Faster R-CNN model with a ResNet-101-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction Example:: >>> model = torchvision.models.detection.fasterrcnn_resnet101_fpn(pretrained=False) >>> model.eval() >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> predictions = model(x) Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017(currently don't have pre-trained models) progress (bool): If True, displays a progress bar of the download to stderr """ if pretrained: raise Exception("resnet101 cannot use pre-trained models") # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet101', pretrained_backbone) model = FasterRCNN(backbone, num_classes, **kwargs) if pretrained: state_dict = load_state_dict_from_url(model_urls['fasterrcnn_resnet101_fpn_coco'], progress=progress) model.load_state_dict(state_dict) return model
def fasterrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs): """ Constructs a Faster R-CNN model with a ResNet-50-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size. Example:: >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) >>> # For training >>> images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4) >>> labels = torch.randint(1, 91, (4, 11)) >>> images = list(image for image in images) >>> targets = [] >>> for i in range(len(images)): >>> d = {} >>> d['boxes'] = boxes[i] >>> d['labels'] = labels[i] >>> targets.append(d) >>> output = model(images, targets) >>> # For inference >>> model.eval() >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> predictions = model(x) >>> >>> # optionally, if you want to export the model to ONNX: >>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11) Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr """ if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) model = FasterRCNN(backbone, num_classes, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress) model.load_state_dict(state_dict) return model
data_loader_validation = torch.utils.data.DataLoader( dataset_validation, batch_size=1, shuffle=False, num_workers=0, collate_fn=utils.collate_fn) data_loader_test = torch.utils.data.DataLoader( dataset_test, batch_size=1, shuffle=False, num_workers=0, collate_fn=utils.collate_fn) device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # move model to the right device model.to(device) model_path = './model_mixed_epoch10.pt' model.load_state_dict(torch.load(model_path,map_location=torch.device('cpu'))) ''' evaluate(model, data_loader_validation, device=device) ''' # construct an optimizer params = [p for p in model.parameters() if p.requires_grad] # optimizer = torch.optim.Adam(params) # # torch.optim.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False) optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) # and a learning rate scheduler which decreases the learning rate by # 10x every 3 epochs lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
allFileList.sort() allFileList.sort(key=lambda x: int(x[:-4])) # Load model backbone = backboneNet_efficient() # use efficientnet as our backbone backboneFPN = backboneWithFPN(backbone) # add FPN anchor_generator = AnchorGenerator(cfg.anchor_sizes, cfg.aspect_ratios) model_ft = FasterRCNN(backboneFPN, num_classes=cfg.num_classes, rpn_anchor_generator=anchor_generator, min_size=cfg.min_size, max_size=cfg.max_size) model_ft.load_state_dict(torch.load(cfg.model_name).state_dict()) model_ft.to(device) with open(cfg.json_name, 'w', encoding='utf-8') as json_f: for file in allFileList: if os.path.isfile(cfg.test_path + file): print(file) output_dict = {} path = test_path + file img = Image.open(path).convert('RGB') img = data_transforms(img) img = img.unsqueeze(0) with torch.no_grad(): model_ft.eval() img = img.to(device) output = model_ft(img)
def get_fasterrcnn_model(arch_str, num_classes, pretrained=True, pretrained_backbone=True, trainable_layers=5, **kwargs): """Creates FasterRCNN model with resnet backbone""" #if pretrained == True: pretrained_backbone=False backbone = resnet_fpn_backbone(arch_str, pretrained=pretrained_backbone, trainable_layers=trainable_layers) anchor_sizes = ( (16, ), (32, ), (64, ), (128, ), (256, ), ) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios) model = FasterRCNN( backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, image_mean=[0.0, 0.0, 0.0], # already normalized by fastai image_std=[1.0, 1.0, 1.0], #min_size = 1, #box_score_thresh=0.6, **kwargs) if pretrained: try: pretrained_dict = load_state_dict_from_url( _model_urls['fasterrcnn_' + arch_str + '_fpn_coco'], progress=True) model_dict = model.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if (k in model_dict) and ( model_dict[k].shape == pretrained_dict[k].shape) } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) #overwrite_eps(model, 0.0) for module in model.modules(): if isinstance(module, FrozenBatchNorm2d): module.eps = 0.0 except Exception as e: #print(e) print("No pretrained coco model found for fasterrcnn_" + arch_str) print("This does not affect the backbone.") return model.train()
def main(): parser = argparse.ArgumentParser( description='VISUM 2019 competition - baseline inference script', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-d', '--data_path', default='/home/master/dataset/test', metavar='', help='test data directory path') parser.add_argument('-m', '--model_path', default='./model.pth', metavar='', help='model file') parser.add_argument('-o', '--output', default='./predictions.csv', metavar='', help='output CSV file name') args = vars(parser.parse_args()) NMS_THR = 0.1 # non maximum suppresion threshold REJECT_THR_KNOWN = 0.9 # rejection threshold to classify as unknown class (naive approach!) REJECT_THR = 0.17 # rejection threshold to classify as unknown class (naive approach!) def get_transform(train): transforms = [] # converts the image, a PIL image, into a PyTorch Tensor transforms.append(T.ToTensor()) if train: # during training, randomly flip the training images # and ground-truth for data augmentation transforms.append(T.RandomHorizontalFlip(0.5)) return T.Compose(transforms) # Load datasets test_data = VisumData(args['data_path'], 'rgb', mode='test', transforms=get_transform(False)) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # initial # model = torch.load(args['model_path']) # new backbone = torchvision.models.detection.backbone_utils.resnet_fpn_backbone( 'resnet50', True) backbone.out_channels = 256 anchor_generator = AnchorGenerator(sizes=(8, 16, 32, 64, 128), aspect_ratios=(0.5, 1.0, 2.0)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) # put the pieces together inside a FasterRCNN model model = FasterRCNN(backbone, num_classes=11, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) model.load_state_dict(args['model_path']) test_loader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=4, collate_fn=utils.collate_fn) predictions = list() for i, (imgs, _, file_names) in enumerate(test_loader): # set the model to evaluation mode model.eval() with torch.no_grad(): prediction = model(list(img.to(device) for img in imgs)) boxes = np.array(prediction[0]['boxes'].cpu()) labels = list(prediction[0]['labels'].cpu()) scores = list(prediction[0]['scores'].cpu()) nms_boxes, nms_labels, nms_scores = nms(boxes, labels, scores, NMS_THR) for bb in range(len(nms_labels)): if nms_scores[bb] >= REJECT_THR: pred = np.concatenate( (list(file_names), list(nms_boxes[bb, :]))) # bounding box if nms_scores[bb] >= REJECT_THR_KNOWN: pred = np.concatenate( (pred, [nms_labels[bb] - 1])) # object label else: pred = np.concatenate((pred, [-1])) # Rejects to classify pred = np.concatenate( (pred, [nms_scores[bb]])) # BEST CLASS SCORE pred = list(pred) predictions.append(pred) with open(args['output'], 'w') as f: for pred in predictions: f.write("{},{},{},{},{},{},{}\n".format(pred[0], float(pred[1]), float(pred[2]), float(pred[3]), float(pred[4]), int(pred[5]), float(pred[6])))
api = Namespace('frcnn', description='Model related operations') image_upload = reqparse.RequestParser() image_upload.add_argument('image', location='files', type=FileStorage, required=True, help='Image') categories = os.getenv('CATEGORIES').split(',') device = int(os.getenv('GPU_DEVICE')) logger.info('Loading model') backbone = resnet_fpn_backbone(os.getenv('BACKBONE'), False) model = FasterRCNN(backbone, len(categories)) state_dict = torch.load(os.getenv('MODEL_PATH')) model.load_state_dict(state_dict['model']) model.to(device) model.eval() logger.info('Model ready') logger.info('Loading detecton model') cfg = get_cfg() cfg.merge_from_file(os.getenv('DETECTRON_CONFIG')) cfg.MODEL.WEIGHTS = os.getenv('DETECTRON_MODEL') cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model cfg.freeze() predictor = DefaultPredictor(cfg) logger.info('Detecton model ready') @api.route('/vision')
def get_result_from_model(test_img, thresh): test_data = torchvision.datasets.ImageFolder('C:/Users/skyho/Desktop/test_image_folder/',loader = plt.imread,transform=transforms.ToTensor()) anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 200),), aspect_ratios=((0.5, 1.0, 2.0),)) backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) rcnn_v1 = FasterRCNN(backbone, num_classes=32, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) try: # model_path = "bs{0}_lr{1}_epoch{2}_checkpoint_{3}".format(8,0.00005,2,15) checkpoint = torch.load('C:/Users/skyho/Desktop/final_model.pth', map_location='cpu') #, map_location='cpu' rcnn_v1.load_state_dict(checkpoint['model_state_dict']) rcnn_v1.eval() except IOError: print("Can't find saved model~") # result = [] #data = torchvision.datasets.ImageFolder(img_path, loader=plt.imread, transform=transforms.ToTensor()) result = [] with torch.no_grad(): result.append(rcnn_v1([test_data[0][0]])) # plot the boxes on the result image # print labels # save the image somewhere and return the path # cv2_im = cv2.imread(test_img) font = cv2.FONT_HERSHEY_SIMPLEX fontScale = 1 fontColor = (255,255,255) lineType = 2 cv2_im = [] #for i in range(len(test_data)): cv2_im.append(cv2.imread(test_data.imgs[0][0])) #for i in range(len(test_data)): i = 0 first_box = result[i][0]['boxes'][0].unsqueeze(0) box_id = 0 for box in result[i][0]['boxes']: if (box_id==0 or jaccard(first_box,box.unsqueeze(0)).tolist()[0][0]<0.6): if result[i][0]['scores'].tolist()[box_id]>=thresh: x1 = int(box[0]) y1 = int(box[1]) x2 = int(box[2]) y2 = int(box[3]) #check other boxes flag = True for each in range(result[i][0]['boxes'].shape[0]): if each!=box_id and result[i][0]['scores'].tolist()[each]>=thresh and jaccard(first_box,result[i][0]['boxes'][each].unsqueeze(0)).tolist()[0][0]<0.6: o_x1 = int(result[i][0]['boxes'][each][0]) o_y1 = int(result[i][0]['boxes'][each][1]) o_x2 = int(result[i][0]['boxes'][each][2]) o_y2 = int(result[i][0]['boxes'][each][3]) if x1>=o_x1-3 and y1>=o_y1-3 and x2<=o_x2+3 and y2<=o_y2+3 and result[i][0]['labels'][box_id]==result[i][0]['labels'][each]: flag = False break if flag: cv2_im[i] = cv2.rectangle(cv2_im[i],(x1,y1),(x2,y2),(0,255,0),3) cv2.putText(cv2_im[i],classes[result[i][0]['labels'][box_id]], (x1,y2), font, fontScale, fontColor, lineType) box_id += 1 detection_result = test_img[:-4] + '_result.png' #for i in range(len(test_data)): cv2.imwrite(detection_result, cv2_im[i]) return detection_result
optimizer.step() # model.eval() # prediction = model(images) # print(prediction[0]) torch.save(model.state_dict(), "./weight_2/{}.pt".format(ep)) # img_array = predictions.permute(1,2,0).detach().cpu().numpy().astype(uint8) # cv2.imshow("img", cv2.fromarray(img_array)) if cfg.predict: img_path = "../../data/SUMIT/rs_images_sampled/" dataset = os.listdir(img_path) indices = torch.randperm(len(dataset)).tolist() model.load_state_dict(torch.load("./weight_2/9.pt"), strict=False) model.to(device) model.eval() for idi in indices[-10:]: img = Image.open(img_path + dataset[idi]).convert("RGB") # print(np.array(img).shape) img = torch.tensor(np.array(img)).float().permute( 2, 0, 1).unsqueeze(0).to(device) predict = model(img) boxes_list = predict[0]["boxes"].data.cpu().numpy() print(predict[0]["boxes"].data.cpu().numpy()) iimg = cv2.imread(img_path + dataset[idi]) for box in boxes_list: print(box)
device = torch.device('cuda:0') backbone = torchvision.models.vgg16(pretrained=False).features backbone.out_channels = 512 anchor_sizes = ((8, 16, 32, 64, 128, 256, 512), ) aspect_ratios = ((1 / 2, 1 / 3, 1 / 4, 1 / 5, 1 / 6, 1 / math.sqrt(2), 1, 2, math.sqrt(2), 3, 4, 5, 6, 7, 8), ) anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios) roi_pooler = torchvision.ops.MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3', '4'], output_size=7, sampling_ratio=2) model = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) model.load_state_dict(torch.load('1.pth')) model.to(device) model.eval() # real_img = cv2.imread( # '/home/dung/DocData/cp/145/110.png') real_img = cv2.imread( '/home/dung/Project/Python/keras-frcnn/result/0_19_0.png') img = torch.tensor(real_img, dtype=torch.float32) / 255 img = img.permute((2, 0, 1)) output = model([img.to(device)]) boxes = output[0]['boxes'] a = output[0]['boxes'].detach().to('cpu').numpy() a = np.round(a)
ears = ["right ear/", "left ear/"] transform_img = transforms.Compose([transforms.ToTensor()]) EPOCH = 250 CLASSES = 3 DEVICE = torch.device("cuda") BATCH_SIZE = 10 anchor_generator = AnchorGenerator(sizes=((32, 64), ), aspect_ratios=((0.6, 1.0, 1.6), )) backbone = torchvision.models.vgg19(pretrained=False).features backbone.out_channels = 512 model = FasterRCNN(backbone, num_classes=CLASSES, rpn_anchor_generator=anchor_generator) model.load_state_dict( torch.load('models_new/' + 'model_' + str(EPOCH) + '.pth')) model.to(DEVICE) model.eval() start_time = time.time() ear_count = 0 for T in types: for E in ears: CTs = os.listdir(data_path + dataset_name + T + E) for CT in CTs: print('current path:{}'.format(data_path + dataset_name + T + E + CT)) ear_count += 1 img_names = glob.glob(data_path + dataset_name + T + E + CT + '/*.jpg') sorted(img_names, key=lambda x: x.split('\\')[-1]) with torch.no_grad():
# feature maps to use. roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) # put the pieces together inside a FasterRCNN model model = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, min_size=800, max_size=1200, box_roi_pool=roi_pooler, box_detections_per_img=200) model.cuda() model.load_state_dict(torch.load('./checkpoint/efficient_model_L_7.pth')) model.eval() start = time.time() print(img.size()) results = model([img.cuda()]) open_cv_image = np.array(imge) open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR) for box in results[0]['boxes']: box = box[:4].tolist() cv2.rectangle(open_cv_image, (int(box[0]), int( box[1]), int(box[2]) - int(box[0]), int(box[3]) - int(box[1])), (255, 225, 0), 2) cv2.imshow("sd", open_cv_image) cv2.imwrite("demo.jpg", open_cv_image) cv2.waitKey(30000)