Ejemplo n.º 1
0
    def __init__(self, hparams):
        super().__init__()

        # Гиперпараметры модели в pytorch_lightning
        self.hparams = hparams

        # Mask-RCNN
        self.mask_rcnn = maskrcnn_resnet50_fpn(pretrained_backbone=True,
                                               pretrained=True)

        # Обновляем выход для предсказания номерного знака
        num_classes = 2
        in_features = self.mask_rcnn.roi_heads.box_predictor.cls_score.in_features
        self.mask_rcnn.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)
        self.mask_rcnn.roi_heads.mask_predictor = MaskRCNNPredictor(
            256, 256, num_classes)

        # Разрешаем обновлять только определенные параметры
        for parameter in self.mask_rcnn.parameters():
            parameter.requires_grad = False
        for parameter in self.mask_rcnn.backbone.fpn.parameters():
            parameter.requires_grad = True
        for parameter in self.mask_rcnn.rpn.parameters():
            parameter.requires_grad = True
        for parameter in self.mask_rcnn.roi_heads.parameters():
            parameter.requires_grad = True
Ejemplo n.º 2
0
    def __init__(self, dictionary=None):
        super(MaskRCNN, self).__init__()

        self.dictionary = dictionary
        self.input_size = [512, 512]
        self.dummy_input = torch.zeros(1, 3, self.input_size[0],
                                       self.input_size[1])

        self.num_classes = len(self.dictionary)
        self.category = [v for d in self.dictionary for v in d.keys()]
        self.weight = [
            d[v] for d in self.dictionary for v in d.keys()
            if v in self.category
        ]

        # load an instance segmentation model pre-trained pre-trained on COCO
        self.model = maskrcnn_resnet50_fpn(pretrained=True)

        # get number of input features for the classifier
        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        # replace the pre-trained head with a new one
        self.model.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, self.num_classes)

        # now get the number of input features for the mask classifier
        in_features_mask = self.model.roi_heads.mask_predictor.conv5_mask.in_channels
        hidden_layer = 256
        # and replace the mask predictor with a new one
        self.model.roi_heads.mask_predictor = MaskRCNNPredictor(
            in_features_mask, hidden_layer, self.num_classes)
Ejemplo n.º 3
0
def maskrcnn_resnet50_fpn(input_size=None, output_size=None):
    """with pretrained_backbone"""
    if import_error is not None:
        raise import_error

    if not isinstance(output_size, int):
        output_size = numpy.product(input_size)

    return detection.maskrcnn_resnet50_fpn(num_classes=output_size)
Ejemplo n.º 4
0
def make_model(cfg):
    """Initializes the model.

    Args:
        cfg (Config): pass in all configurations
    """

    if cfg.model_name == 'maskrcnn_resnet50_fpn':
        if cfg.coco_pretrained:
            model = maskrcnn_resnet50_fpn(pretrained=True)
        else:
            model = maskrcnn_resnet50_fpn(num_classes=cfg.num_classes,
                                          pretrained=False)
        pretrained_num_classes = (
            model.roi_heads.mask_predictor.mask_fcn_logits.out_channels)
        swap_predictors = ((cfg.num_classes != pretrained_num_classes)
                           or cfg.swap_model_predictors)
        if swap_predictors:
            # replace the pre-trained FasterRCNN head with a new one
            model.roi_heads.box_predictor = FastRCNNPredictor(
                # in_features
                model.roi_heads.box_predictor.cls_score.in_features,
                # num_classes
                cfg.num_classes)
            # replace the pre-trained MaskRCNN head with a new one
            model.roi_heads.mask_predictor = MaskRCNNPredictor(
                # in_features_mask
                model.roi_heads.mask_predictor.conv5_mask.in_channels,
                # hidden_layer
                model.roi_heads.mask_predictor.conv5_mask.out_channels,
                # num_classes
                cfg.num_classes)
    elif cfg.model_name == 'adjust_anchor':
        anchor_generator = AnchorGenerator(
            sizes=((16, ), (32, ), (64, ), (128, ), (256, )),
            aspect_ratios=((0.8, 1.0, 1.25), ) * 5)
        backbone = resnet_fpn_backbone('resnet50', pretrained=True)
        model = MaskRCNN(backbone=backbone,
                         num_classes=cfg.num_classes,
                         rpn_anchor_generator=anchor_generator)
    else:
        raise NotImplementedError
    return model
Ejemplo n.º 5
0
 def __init__(self):
     super(MaskRCNN, self).__init__()
     net = maskrcnn_resnet50_fpn(pretrained=True)
     net.eval()
     raw_layers = list(net.children())
     self.transform = raw_layers[0]
     self.backbone = raw_layers[1]
     self.rpn = raw_layers[2]
     self.roi_heads = raw_layers[3]
     self.interested_ids = [3, 6, 8]
Ejemplo n.º 6
0
 def __init__(self, num_classes=2, hidden_size=256):
     super().__init__()
     self.model_ft = maskrcnn_resnet50_fpn(pretrained=True)
     in_features = self.model_ft.roi_heads.box_predictor.cls_score.in_features
     self.model_ft.roi_heads.box_predictor = FastRCNNPredictor(
         in_features, num_classes)
     in_features_mask = self.model_ft.roi_heads.mask_predictor.conv5_mask.in_channels
     self.model_ft.roi_heads.mask_predictor = MaskRCNNPredictor(
         in_features_mask, hidden_size, num_classes)
     for param in self.model_ft.parameters():
         param.requires_grad = True
Ejemplo n.º 7
0
def mask_rcnn(pretrained=False,
              num_classes=1 + 90,
              representation=1024,
              backbone=None,
              with_mask=True,
              **kwargs):
    if backbone is None:
        model = maskrcnn_resnet50_fpn(pretrained,
                                      pretrained_backbone=not pretrained,
                                      progress=True,
                                      **kwargs)
    else:
        model = maskrcnn_resnet50_fpn(pretrained,
                                      pretrained_backbone=False,
                                      progress=True,
                                      **kwargs)
        model.backbone = backbone

    in_features = model.roi_heads.box_predictor.cls_score.in_features
    out_features = model.roi_heads.box_predictor.cls_score.out_features
    if representation != in_features:
        logging.info(
            f"Replaced box_head with representation size of {representation}")
        out_channels = model.backbone.out_channels
        resolution = model.roi_heads.box_roi_pool.output_size[0]
        model.roi_heads.box_head = TwoMLPHead(out_channels * resolution**2,
                                              representation)

    if representation != in_features or num_classes != out_features:
        logging.info(
            f"Replaced box_predictor with (representation, num_classes) = ({representation}, {num_classes})"
        )
        model.roi_heads.box_predictor = FastRCNNPredictor(
            representation, num_classes)

    if not with_mask:
        model.roi_heads.mask_roi_pool = None
        model.roi_heads.mask_head = None
        model.roi_heads.mask_predictor = None

    return THDetector(model)
Ejemplo n.º 8
0
 def __init__(self, batch, device):
     from torchvision.models.detection import maskrcnn_resnet50_fpn
     import torch
     # from torchvision import transforms
     super(MaskRCNNDetectorTorch, self).__init__()
     self.model = maskrcnn_resnet50_fpn(pretrained=True)
     self.model.eval()
     self.batch = batch
     self.device = torch.device(device)
     self.model.to(self.device)
     self.MEAN = np.array([.485, .456, .406])
     self.STD = np.array([.299, .224, .225])
Ejemplo n.º 9
0
def get_model_instance_segmentation(num_classes):
    model = maskrcnn_resnet50_fpn(pretrained=True)

    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    mask_predictor_in_channels = model.roi_heads.mask_predictor.conv5_mask.in_channels
    model.roi_heads.mask_predictor = MaskRCNNPredictor(
        mask_predictor_in_channels,
        mask_dim_reduced=256,
        num_classes=num_classes)
    return model
Ejemplo n.º 10
0
def get_torchvision_maskrcnn(
    num_classes: int = 91,
    trainable_backbone_layers: int = 3,
    anchor_sizes: list = [32, 64, 128, 256, 512],
    anchor_aspect_ratios: list = [0.5, 1.0, 2.0],
    rpn_pre_nms_top_n_train: int = 2000,
    rpn_pre_nms_top_n_test: int = 1000,
    rpn_post_nms_top_n_train: int = 2000,
    rpn_post_nms_top_n_test: int = 1000,
    rpn_nms_thresh: float = 0.7,
    rpn_fg_iou_thresh: float = 0.7,
    rpn_bg_iou_thresh: float = 0.3,
    box_detections_per_img: int = 100,
    pretrained: bool = False,
):
    # prepare anchor params
    anchor_sizes = tuple(
        [tuple((anchor_size, )) for anchor_size in anchor_sizes])
    aspect_ratios = tuple(anchor_aspect_ratios)
    aspect_ratios = (aspect_ratios, ) * len(anchor_sizes)

    # load an instance segmentation model pre-trained on COCO
    rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
    model = maskrcnn_resnet50_fpn(
        trainable_backbone_layers=trainable_backbone_layers,
        pretrained=pretrained,
        pretrained_backbone=pretrained,
        rpn_anchor_generator=rpn_anchor_generator,
        rpn_pre_nms_top_n_train=rpn_pre_nms_top_n_train,
        rpn_pre_nms_top_n_test=rpn_pre_nms_top_n_test,
        rpn_post_nms_top_n_train=rpn_post_nms_top_n_train,
        rpn_post_nms_top_n_test=rpn_post_nms_top_n_test,
        rpn_nms_thresh=rpn_nms_thresh,
        rpn_fg_iou_thresh=rpn_fg_iou_thresh,
        rpn_bg_iou_thresh=rpn_bg_iou_thresh,
        box_detections_per_img=box_detections_per_img,
    )

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model
Ejemplo n.º 11
0
 def mcoco():
     """Common preparation routine:
     Obtain coco dataset handle for mask_r_cnn model"""
     # Dataset initialization:
     model_stump = ModelStump(
         model=maskrcnn_resnet50_fpn(pretrained=True),
         stump_head=TestCOCOConceptActivationDataset.LAYER_KEY)
     dataset: ConceptDataset = ConceptDataset(**default_coco_spec())
     coco = ActivationDatasetWrapper(
         act_map_gen=model_stump,
         dataset=dataset)
     yield coco
     TestCOCOConceptActivationDataset.cleanup(coco)
Ejemplo n.º 12
0
def detect_test(args):
    print("Loading detector...")
    maskrcnn = maskrcnn_resnet50_fpn(pretrained=True)
    if torch.cuda.is_available():
        maskrcnn.cuda()
    maskrcnn.eval()
    print("Loaded !\n")

    for f in tqdm(os.listdir(args.data + '/test_images/mistery_category')):
        if 'jpg' in f:
            data = data_transforms['detect'](
                pil_loader(args.data + '/test_images/mistery_category/' + f))
            data = data.view(1, data.size(0), data.size(1),
                             data.size(2)).cuda()

            results = maskrcnn(data.cuda())

            for e, result in enumerate(results):
                boxes = result['boxes'].tolist()  # Bounding boxes
                labels = result['labels'].tolist()  # Labels
                scores = result['scores'].tolist(
                )  # Confidence associated with bounding box

                # Keep only bird labels and boxes (label 16 in COCO)
                only_bird_boxes = np.array(
                    [boxes[i] for i in range(len(boxes)) if labels[i] == 16])
                only_birds_scores = np.array(
                    [scores[i] for i in range(len(boxes)) if labels[i] == 16])

                # if low confidence -> hard image
                if only_bird_boxes.size == 0 or only_birds_scores.max() < 0.85:
                    shutil.copy(
                        args.data + '/test_images/mistery_category/' + f,
                        args.data + '/test_images/hard_test_images')
                else:
                    try:
                        i = np.argmax(only_birds_scores)
                        box = only_bird_boxes[i]

                        a, b, c, d = int(box[0]), int(box[1]), int(
                            box[2]), int(box[3])

                        # Crop image on bird
                        cropped = data[e, :, b:d, a:c]

                        shutil.copy(
                            args.data + '/test_images/mistery_category/' + f,
                            args.data + '/test_images/easy_test_images')
                    except ValueError:
                        # Bounding box outside image (very rare)
                        pass
Ejemplo n.º 13
0
 def __init__(self, num_classes=2, hidden_size=256):
     super().__init__()
     # load an instance segmentation model pre-trained pre-trained on COCO
     self.model_ft = maskrcnn_resnet50_fpn(pretrained=True)
     # get number of input features for the classifier
     in_features = self.model_ft.roi_heads.box_predictor.cls_score.in_features
     # replace the pre-trained head with a new one
     self.model_ft.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
     # now get the number of input features for the mask classifier
     in_features_mask = self.model_ft.roi_heads.mask_predictor.conv5_mask.in_channels
     # and replace the mask predictor with a new one
     self.model_ft.roi_heads.mask_predictor = MaskRCNNPredictor(
         in_features_mask, hidden_size, num_classes
     )
Ejemplo n.º 14
0
    def connect(self, model_name='model'):
        model_name = 'model_fbgemm'
        cached_file = self.get_checkpoint(model_urls[model_name])
        if model_name == 'model':
            model = maskrcnn_resnet50_fpn(pretrained=False,
                                          pretrained_backbone=False)
            model.load_state_dict(torch.load(cached_file))
        else:  # scripted model loading...
            model = torch.jit.load(cached_file)
        model.transform.max_size = 800
        model.transform.min_size = (640, )
        model.eval()

        return model
Ejemplo n.º 15
0
    def __init__(self, calibration_file):
        """
		calibration_file: [str] path to calibration.txt file
		"""
        self.maskrnn = maskrcnn_resnet50_fpn(pretrained=True)
        self.toTensor = transforms.ToTensor()

        _ = self.maskrnn.eval()

        with open(calibration_file, 'r') as f:
            calib = f.readlines()
        fx = float(calib[3].split()[0])
        fy = float(calib[3].split()[1])
        cx = float(calib[3].split()[2])
        cy = float(calib[3].split()[3])
        self.intrinsics = [fx, fy, cx, cy]
Ejemplo n.º 16
0
    def __init__(self,
                 n_channels=3,
                 n_classes=21,
                 softmax_out=False,
                 resnet_type=101,
                 pretrained=False):
        super(MaskRCNN, self).__init__()

        self.resnet_type = resnet_type
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.pretrained = pretrained

        # Input conv is applied to convert the input to 3 ch depth
        self.inconv = None
        if n_channels != 3:
            self.inconv = FwdConv(n_channels, 3, kernel_size=1, padding=0)
        # Pre-trained model needs to be an identical network
        if pretrained:
            self.body = maskrcnn_resnet50_fpn(pretrained=pretrained,
                                              num_classes=91,
                                              min_size=512)
            # Reset output
            if n_classes != 91:
                self.body.roi_heads.box_predictor.cls_score = nn.Linear(
                    in_features=1024, out_features=n_classes, bias=True)
                self.body.roi_heads.box_predictor.bbox_pred = nn.Linear(
                    in_features=1024, out_features=4 * n_classes, bias=True)

                self.body.roi_heads.mask_predictor.mask_fcn_logits = nn.Conv2d(
                    256, n_classes, kernel_size=(1, 1), stride=(1, 1))

        else:
            self.body = fasterrcnn_resnet50_fpn(pretrained=pretrained,
                                                num_classes=n_classes,
                                                min_size=512)

        # Softmax alternative
        self.has_softmax = softmax_out
        if softmax_out:
            self.softmax = nn.Softmax2d()
        else:
            self.softmax = None
Ejemplo n.º 17
0
 def test_maskrcnn_resnet50_fpn_frozen_layers(self):
     # we know how many initial layers and parameters of the maskrcnn should
     # be frozen for each trainable_backbone_layers paramter value
     # i.e all 53 params are frozen if trainable_backbone_layers=0
     # ad first 24 params are frozen if trainable_backbone_layers=2
     expected_frozen_params = {0: 53, 1: 43, 2: 24, 3: 11, 4: 1, 5: 0}
     for train_layers, exp_froz_params in expected_frozen_params.items():
         model = maskrcnn_resnet50_fpn(
             pretrained=True,
             progress=False,
             num_classes=91,
             pretrained_backbone=False,
             trainable_backbone_layers=train_layers)
         # boolean list that is true if the parameter at that index is frozen
         is_frozen = [
             not parameter.requires_grad
             for _, parameter in model.named_parameters()
         ]
         # check that expected initial number of layers in maskrcnn are frozen
         self.assertTrue(all(is_frozen[:exp_froz_params]))
Ejemplo n.º 18
0
def do_main(model,
            data=None,
            output_path=None,
            priorities=None,
            th_mask=0.5,
            th_scores=0.75,
            class_num=91):
    src_img = None
    if data is not None:
        src_img = data
        if output_path is None:
            output_path = os.getcwd() + "temp_output.png"
    else:
        raise RuntimeError("invalid data input")

    if model is None:
        model = detection.maskrcnn_resnet50_fpn(num_classes=91,
                                                pretrained=True)
        model.eval()
    # in_features = model.roi_heads.box_predictor.cls_score.in_features
    # model.roi_heads.box_predictor = detection.faster_rcnn.FastRCNNPredictor(in_features, class_num)

    masked_img = None
    masks = None
    masks = []
    for idx in range(len(src_img)):
        if idx == priorities[0]:
            masks.append(None)
            continue
        input_img = []
        # prepare ndarray with normalization and switch channel

        # print("index for model: ", idx)
        img = torch.from_numpy(np.expand_dims(src_img[idx] / 255.,
                                              2)).permute(2, 0, 1).float()
        input_img.append(img)
        prediction = model(input_img)
        mask = get_mask_gray(prediction, src_img[idx], th_mask, th_scores)
        masks.append(mask)
    return masks
Ejemplo n.º 19
0
    def __init__(self, categories):
        super().__init__()
        logging.info(f'creating model with categories: {categories}')

        # todo(will.brennan) - find a nicer way of saving the categories in the state dict...
        self._categories = nn.ParameterDict(
            {i: nn.Parameter(torch.Tensor(0))
             for i in categories})
        num_categories = len(self._categories)

        self.model = detection.maskrcnn_resnet50_fpn(pretrained=True)

        logging.debug('changing num_categories for bbox predictor')

        in_features = self.model.roi_heads.box_predictor.cls_score.in_features
        self.model.roi_heads.box_predictor = detection.faster_rcnn.FastRCNNPredictor(
            in_features, num_categories)

        logging.debug('changing num_categories for mask predictor')

        in_features_mask = self.model.roi_heads.mask_predictor.conv5_mask.in_channels
        self.model.roi_heads.mask_predictor = detection.mask_rcnn.MaskRCNNPredictor(
            in_features_mask, 256, num_categories)
Ejemplo n.º 20
0
def main():
    anchor_generator = AnchorGenerator(sizes=tuple([(16, 24, 32, 48, 96)
                                                    for _ in range(5)]),
                                       aspect_ratios=tuple([
                                           (0.5, 1.0, 2.0) for _ in range(5)
                                       ]))
    rpnhead = RPNHead(256, anchor_generator.num_anchors_per_location()[0])
    model = maskrcnn_resnet50_fpn(num_classes=2,
                                  pretrained_backbone=True,
                                  max_size=MAX_SIZE,
                                  rpn_head=rpnhead,
                                  rpn_anchor_generator=anchor_generator,
                                  rpn_pre_nms_top_n_train=12000,
                                  rpn_pre_nms_top_n_test=6000,
                                  rpn_post_nms_top_n_train=2000,
                                  rpn_post_nms_top_n_test=300,
                                  rpn_fg_iou_thresh=0.5,
                                  rpn_bg_iou_thresh=0.3,
                                  rpn_positive_fraction=0.7,
                                  bbox_reg_weights=(1.0, 1.0, 1.0, 1.0),
                                  box_batch_size_per_image=32)
    model.load_state_dict(
        torch.load('saved_models' + os.sep + '0_deeplesion.pth',
                   map_location='cpu'))
    data_transforms = {
        'train':
        T.Compose([
            T.ToOriginalHU(INTENSITY_OFFSET),
            T.IntensityWindowing(WINDOWING),
            T.SpacingResize(NORM_SPACING, MAX_SIZE),
            T.ToTensor()
        ]),
        'val':
        T.Compose([
            T.ToOriginalHU(INTENSITY_OFFSET),
            T.IntensityWindowing(WINDOWING),
            T.SpacingResize(NORM_SPACING, MAX_SIZE),
            T.ToTensor()
        ]),
        'test':
        T.Compose([
            T.ToOriginalHU(INTENSITY_OFFSET),
            T.IntensityWindowing(WINDOWING),
            T.SpacingResize(NORM_SPACING, MAX_SIZE),
            T.ToTensor()
        ])
    }
    image_datasets = {
        x: DeepLesion(DIR_IN + os.sep + x, GT_FN_DICT[x], data_transforms[x])
        for x in ['train', 'val', 'test']
    }

    dataloaders = {
        x: DataLoader(image_datasets[x],
                      batch_size=3,
                      shuffle=True,
                      num_workers=0,
                      collate_fn=BatchCollator)
        for x in ['train', 'val', 'test']
    }
    for batch_id, (inputs, targets) in enumerate(dataloaders['test']):
        outputs = test_model(model, inputs)
        outputs = remove_overlapping(outputs, 0.655)
        for image, target, output in zip(inputs, targets, outputs):
            img_copy = image.squeeze().numpy()
            images = [img_copy] * 3
            images = [im.astype(float) for im in images]
            img_copy = cv2.merge(images)
            for bbox, pseudo_mask in zip(target["boxes"], target["masks"]):
                bbox = bbox.squeeze().numpy()
                bbox = np.int16(bbox)
                mask = pseudo_mask.squeeze().numpy()
                cv2.rectangle(img_copy, (bbox[0], bbox[1]), (bbox[2], bbox[3]),
                              (0, 255, 0), 1)
                msk_idx = np.where(mask == 1)
                img_copy[msk_idx[0], msk_idx[1], 0] = 255
            for predbox, predmask, score in zip(output['boxes'],
                                                output['masks'],
                                                output['scores']):
                if score < 0.655:
                    break
                predbox = predbox.numpy()
                predmask = predmask.squeeze().numpy()
                score = score.numpy()
                predmask = np.where(predmask > 0.5, 1, 0)
                cv2.rectangle(img_copy, (predbox[0], predbox[1]),
                              (predbox[2], predbox[3]), (0, 0, 255), 1)
                pmsk_idx = np.where(predmask == 1)
                img_copy[pmsk_idx[0], pmsk_idx[1], 2] = 255
                cv2.putText(img_copy, str(score),
                            (int(predbox[0]), int(predbox[1] - 5)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1,
                            cv2.LINE_AA)
            # cv2.imshow(str(target['image_id']), img_copy)
            cv2.imwrite(
                'simple_test' + os.sep +
                str(target['image_id']).replace(os.sep, '_') + '_pred.jpg',
                img_copy * 255)
Ejemplo n.º 21
0
 def __init__(self):
     self.model = maskrcnn_resnet50_fpn(pretrained=True)
     self.model.eval()
Ejemplo n.º 22
0
    def __init__(self, config=None, torchvision_init=True, lidar=False):
        '''
        Handles everything
        - training, validation testing
        - checkpoint loading and saving
        - logging | tensorboard summaries

        Accordingly everything is specified here
        - model
        - loss
        - optimizer
        - lr scheduling

        Arguments:
            torchvision_init: boolean
                - True:     load densenet state dict from torchvision
                - False:    load checkpoint; if no checkpoint just normal init
        '''

        self.logger = logging.getLogger('Agent')

        # model and config if lazy
        self.model = maskrcnn_resnet50_fpn(pretrained=True,
                                           progress=True,
                                           num_classes=91,  # have to if pretrained
                                           pretrained_backbone=True,
                                           trainable_backbone_layers=3)  # 0 being noe and 5 all

        '''
        # get number of input features for the classifier
        in_features = model.roi_heads.box_predictor.cls_score.in_features
        # replace the pre-trained head with a new one
        model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
        
        # now get the number of input features for the mask classifier
        in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
        hidden_layer = 256
        # and replace the mask predictor with a new one
        model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                            hidden_layer,
                                                            num_classes)
        '''
        self.lidar = lidar
        if self.lidar:
            # add one channel to first layer
            self.model.backbone.body.conv1 = nn.Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3),
                                                       bias=False)
        # replace final layer to 4 classes: background, vehicle, pedestrian, cyclist
        self.model.roi_heads.mask_predictor.mask_fcn_logits = nn.Conv2d(256, 4, kernel_size=(1, 1),
                                                                        stride=(1, 1))

        # in case config is empty it is created in model
        if config is None:
            self.config = utils.get_config()
        else:
            self.config = config

        # dataloader
        self.data_loader = WaymoDataset_Loader(self.config)

        # pixel-wise cross-entropy loss
        self.loss = torch.nn.BCEWithLogitsLoss(reduction='none').cuda()

        # optimizer
        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          lr=self.config.optimizer.learning_rate,
                                          betas=(self.config.optimizer.beta1, self.config.optimizer.beta2),
                                          eps=self.config.optimizer.eps,
                                          weight_decay=self.config.optimizer.weight_decay,
                                          amsgrad=self.config.optimizer.amsgrad)

        # learning rate decay scheduler
        if self.config.optimizer.lr_scheduler.want:
            self.lr_scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,
                                                                step_size=self.config.optimizer.lr_scheduler.every_n_epochs,
                                                                gamma=self.config.optimizer.lr_scheduler.gamma)

        # initialize counters; updated in load_checkpoint
        self.current_epoch = 0
        self.current_train_iteration = 0
        self.current_val_iteration = 0
        self.best_val_iou = 0

        # if cuda is available export model to gpu
        self.cuda = torch.cuda.is_available()
        if self.cuda:
            self.device = torch.device('cuda')
            torch.cuda.manual_seed_all(self.config.agent.seed)
            self.logger.info('Operation will be on *****GPU-CUDA***** ')
        else:
            self.device = torch.device('cpu')
            torch.manual_seed(self.config.agent.seed)
            self.logger.info('Operation will be on *****CPU***** ')
        self.model = self.model.to(self.device)
        self.loss = self.loss.to(self.device)

        if not torchvision_init:
            self.load_checkpoint()

        # Tensorboard Writers
        Path(self.config.dir.current_run.summary).mkdir(exist_ok=True, parents=True)
        self.train_summary_writer = SummaryWriter(log_dir=self.config.dir.current_run.summary,
                                                  comment='FasterRCNNResNet50')
        self.val_summary_writer = SummaryWriter(log_dir=self.config.dir.current_run.summary,
                                                comment='FasterRCNNResNet50')
plt.show()
# fig = plt.figure()
# for i in range(10):
#     fig.add_subplot(1, 10, i + 1)
#     plt.imshow(d[22551 + i * 42, 22551 + i * 42][0].permute((1, 2, 0)))
# plt.show()
# fig = plt.figure()
# for i in range(10):
#     fig.add_subplot(1, 10, i+1)
#     plt.imshow(d[1178494 + i * 42, 1178494 + i * 42][0].permute((1, 2, 0)))
# plt.show()

#%%
from torchvision.models.detection import maskrcnn_resnet50_fpn

m = maskrcnn_resnet50_fpn(pretrained=True).eval().cuda(2)

#%%
min_length = 423
prestine_video_start = 22551
neural_textures_start = 1178494

from torchvision.transforms import ToTensor
from tqdm import tqdm
import torch


def get_lable_scores(dataset, start, end):
    label_scores = []
    i = 0
    for idx in tqdm(range(start, end)):
Ejemplo n.º 24
0
    def evaluate(cls, env, model, r_idx, resnet, traj_data, args, lock,
                 successes, failures, results):
        # reset model
        model.reset()

        # setup scene
        reward_type = 'dense'
        cls.setup_scene(env, traj_data, r_idx, args, reward_type=reward_type)

        # extract language features
        feat = model.featurize([(traj_data, False)], load_mask=False)

        # goal instr
        goal_instr = traj_data['turk_annotations']['anns'][r_idx]['task_desc']

        maskrcnn = maskrcnn_resnet50_fpn(num_classes=119)
        maskrcnn.eval()
        maskrcnn.load_state_dict(torch.load('weight_maskrcnn.pt'))
        maskrcnn = maskrcnn.cuda()

        prev_image = None
        prev_action = None
        nav_actions = [
            'MoveAhead_25', 'RotateLeft_90', 'RotateRight_90', 'LookDown_15',
            'LookUp_15'
        ]

        prev_class = 0
        prev_center = torch.zeros(2)

        done, success = False, False
        fails = 0
        t = 0
        reward = 0
        while not done:
            # break if max_steps reached
            if t >= args.max_steps:
                break

            # extract visual features
            curr_image = Image.fromarray(np.uint8(env.last_event.frame))
            feat['frames'] = resnet.featurize([curr_image],
                                              batch=1).unsqueeze(0)

            # forward model
            m_out = model.step(feat)
            m_pred = model.extract_preds(m_out, [(traj_data, False)],
                                         feat,
                                         clean_special_tokens=False)
            m_pred = list(m_pred.values())[0]

            # action prediction
            action = m_pred['action_low']
            if prev_image == curr_image and prev_action == action and prev_action in nav_actions and action in nav_actions and action == 'MoveAhead_25':
                dist_action = m_out['out_action_low'][0][0].detach().cpu()
                idx_rotateR = model.vocab['action_low'].word2index(
                    'RotateRight_90')
                idx_rotateL = model.vocab['action_low'].word2index(
                    'RotateLeft_90')
                action = 'RotateLeft_90' if dist_action[
                    idx_rotateL] > dist_action[
                        idx_rotateR] else 'RotateRight_90'

            if action == cls.STOP_TOKEN:
                print("\tpredicted STOP")
                break

            # mask prediction
            mask = None
            if model.has_interaction(action):
                class_dist = m_pred['action_low_mask'][0]
                pred_class = np.argmax(class_dist)

                # mask generation
                with torch.no_grad():
                    out = maskrcnn([to_tensor(curr_image).cuda()])[0]
                    for k in out:
                        out[k] = out[k].detach().cpu()

                if sum(out['labels'] == pred_class) == 0:
                    mask = np.zeros(
                        (constants.SCREEN_WIDTH, constants.SCREEN_HEIGHT))
                else:
                    masks = out['masks'][out['labels'] ==
                                         pred_class].detach().cpu()
                    scores = out['scores'][out['labels'] ==
                                           pred_class].detach().cpu()

                    # Instance selection based on the minimum distance between the prev. and cur. instance of a same class.
                    if prev_class != pred_class:
                        scores, indices = scores.sort(descending=True)
                        masks = masks[indices]
                        prev_class = pred_class
                        prev_center = masks[0].squeeze(
                            dim=0).nonzero().double().mean(dim=0)
                    else:
                        cur_centers = torch.stack([
                            m.nonzero().double().mean(dim=0)
                            for m in masks.squeeze(dim=1)
                        ])
                        distances = ((cur_centers - prev_center)**2).sum(dim=1)
                        distances, indices = distances.sort()
                        masks = masks[indices]
                        prev_center = cur_centers[0]

                    mask = np.squeeze(masks[0].numpy(), axis=0)

            # print action
            if args.debug:
                print(action)

            # use predicted action and mask (if available) to interact with the env
            t_success, _, _, err, _ = env.va_interact(
                action,
                interact_mask=mask,
                smooth_nav=args.smooth_nav,
                debug=args.debug)

            if not t_success:
                fails += 1
                if fails >= args.max_fails:
                    print("Interact API failed %d times" % fails +
                          "; latest error '%s'" % err)
                    break

            # next time-step
            t_reward, t_done = env.get_transition_reward()
            reward += t_reward
            t += 1

            prev_image = curr_image
            prev_action = action

        # check if goal was satisfied
        goal_satisfied = env.get_goal_satisfied()
        if goal_satisfied:
            print("Goal Reached")
            success = True

        # goal_conditions
        pcs = env.get_goal_conditions_met()
        goal_condition_success_rate = pcs[0] / float(pcs[1])

        # SPL
        path_len_weight = len(traj_data['plan']['low_actions'])
        s_spl = (1 if goal_satisfied else 0) * min(
            1., path_len_weight / (float(t) + 1e-4))
        pc_spl = goal_condition_success_rate * min(
            1., path_len_weight / (float(t) + 1e-4))

        # path length weighted SPL
        plw_s_spl = s_spl * path_len_weight
        plw_pc_spl = pc_spl * path_len_weight

        # log success/fails
        lock.acquire()
        log_entry = {
            'trial': traj_data['task_id'],
            'type': traj_data['task_type'],
            'repeat_idx': int(r_idx),
            'goal_instr': goal_instr,
            'completed_goal_conditions': int(pcs[0]),
            'total_goal_conditions': int(pcs[1]),
            'goal_condition_success': float(goal_condition_success_rate),
            'success_spl': float(s_spl),
            'path_len_weighted_success_spl': float(plw_s_spl),
            'goal_condition_spl': float(pc_spl),
            'path_len_weighted_goal_condition_spl': float(plw_pc_spl),
            'path_len_weight': int(path_len_weight),
            'reward': float(reward)
        }
        if success:
            successes.append(log_entry)
        else:
            failures.append(log_entry)

        # overall results
        results['all'] = cls.get_metrics(successes, failures)

        print("-------------")
        print("SR: %d/%d = %.5f" % (results['all']['success']['num_successes'],
                                    results['all']['success']['num_evals'],
                                    results['all']['success']['success_rate']))
        print("PLW SR: %.5f" %
              (results['all']['path_length_weighted_success_rate']))
        print(
            "GC: %d/%d = %.5f" %
            (results['all']['goal_condition_success']
             ['completed_goal_conditions'],
             results['all']['goal_condition_success']['total_goal_conditions'],
             results['all']['goal_condition_success']
             ['goal_condition_success_rate']))
        print(
            "PLW GC: %.5f" %
            (results['all']['path_length_weighted_goal_condition_success_rate']
             ))
        print("-------------")

        # task type specific results
        task_types = [
            'pick_and_place_simple', 'pick_clean_then_place_in_recep',
            'pick_heat_then_place_in_recep', 'pick_cool_then_place_in_recep',
            'pick_two_obj_and_place', 'look_at_obj_in_light',
            'pick_and_place_with_movable_recep'
        ]
        for task_type in task_types:
            task_successes = [
                s for s in (list(successes)) if s['type'] == task_type
            ]
            task_failures = [
                f for f in (list(failures)) if f['type'] == task_type
            ]
            if len(task_successes) > 0 or len(task_failures) > 0:
                results[task_type] = cls.get_metrics(task_successes,
                                                     task_failures)
            else:
                results[task_type] = {}

        lock.release()
Ejemplo n.º 25
0
def get_model(pre_trained, pretrained_backbone, numclasses):
    anchor_generator = AnchorGenerator(sizes=tuple([(16, 24, 32, 48, 96)
                                                    for _ in range(5)]),
                                       aspect_ratios=tuple([
                                           (0.5, 1.0, 2.0) for _ in range(5)
                                       ]))
    rpnhead = RPNHead(256, anchor_generator.num_anchors_per_location()[0])
    if pre_trained:
        # dl_model = maskrcnn_resnet50_fpn(pretrained=pre_trained, max_size=MAX_SIZE, rpn_head=rpnhead
        #                                  , rpn_anchor_generator=anchor_generator, rpn_pre_nms_top_n_train=12000
        #                                  , rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_train=2000
        #                                  , rpn_post_nms_top_n_test=300, rpn_fg_iou_thresh=0.5, rpn_bg_iou_thresh=0.3
        #                                  , rpn_positive_fraction=0.7, bbox_reg_weights=(1.0, 1.0, 1.0, 1.0)
        #                                  , box_batch_size_per_image=32)
        dl_model = maskrcnn_resnet50_fpn(pretrained=pre_trained,
                                         max_size=MAX_SIZE,
                                         rpn_pre_nms_top_n_train=12000,
                                         rpn_pre_nms_top_n_test=6000,
                                         rpn_post_nms_top_n_train=2000,
                                         rpn_post_nms_top_n_test=300,
                                         rpn_fg_iou_thresh=0.5,
                                         rpn_bg_iou_thresh=0.3,
                                         rpn_positive_fraction=0.7,
                                         bbox_reg_weights=(1.0, 1.0, 1.0, 1.0),
                                         box_batch_size_per_image=32)
        # dl_model = maskrcnn_resnet50_fpn(pretrained=pre_trained, max_size=MAX_SIZE)

        # del dl_model.state_dict()["roi_heads.box_predictor.bbox_pred.weight"]
        # del dl_model.state_dict()["roi_heads.box_predictor.cls_score.weight"]
        # del dl_model.state_dict()["roi_heads.box_predictor.cls_score.bias"]
        # del dl_model.state_dict()["roi_heads.box_predictor.bbox_pred.bias"]

        # Remove incompatible parameters
        # newdict = removekey(dl_model.state_dict(), ['roi_heads.box_predictor.cls_score.bias'
        #                                             , 'roi_heads.box_predictor.cls_score.weight'
        #                                             , 'roi_heads.box_predictor.bbox_pred.bias'
        #                                             , 'roi_heads.box_predictor.bbox_pred.weight'])
        # dl_model.state_dict = newdict
        # dl_model.load_state_dict(newdict)
        for param in dl_model.parameters():
            param.requires_grad = False

        # replace the classifier with a new one, that has
        # num_classes which is user-defined
        num_classes = numclasses  # 1 class (lesion) + background

        # get number of input features for the classifier
        in_features = dl_model.roi_heads.box_predictor.cls_score.in_features
        # replace the pre-trained head with a new one
        dl_model.roi_heads.box_predictor = FastRCNNPredictor(
            in_features, num_classes)

        # now get the number of input features for the mask classifier
        in_features_mask = dl_model.roi_heads.mask_predictor.conv5_mask.in_channels
        hidden_layer = 256
        # and replace the mask predictor with a new one
        dl_model.roi_heads.mask_predictor = MaskRCNNPredictor(
            in_features_mask, hidden_layer, num_classes)
    else:
        dl_model = maskrcnn_resnet50_fpn(
            num_classes=numclasses,
            pretrained_backbone=pretrained_backbone,
            max_size=MAX_SIZE,
            rpn_head=rpnhead,
            rpn_anchor_generator=anchor_generator,
            rpn_pre_nms_top_n_train=12000,
            rpn_pre_nms_top_n_test=6000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=300,
            rpn_fg_iou_thresh=0.5,
            rpn_bg_iou_thresh=0.3,
            rpn_positive_fraction=0.7,
            bbox_reg_weights=(1.0, 1.0, 1.0, 1.0),
            box_batch_size_per_image=32)
    return dl_model
Ejemplo n.º 26
0
 def __init__(self, backbone='fasterrcnn', use_pretrained=True, num_classes=91):
     super(Detection, self).__init__()
     if backbone == 'fasterrcnn':
         self.detection_backbone = fasterrcnn_resnet50_fpn(pretrained=use_pretrained, num_classes=num_classes).cuda()
     else:
         self.detection_backbone = maskrcnn_resnet50_fpn(pretrained=use_pretrained, num_classes=num_classes).cuda()
Ejemplo n.º 27
0
Used the pytorch Mask R-CNN Resnet50 library to identify the child
and then  using  the  mask, applied binary image-segmentation  to
represent  the  child pixel as '1'  and  background pixel as '0'
Further,  calculating  the  mask  area  and  the  percentage of
body pixels to total image pixels
"""
import time

from imgseg.predict import predict

import numpy as np

from torchvision.models.detection import maskrcnn_resnet50_fpn

model = maskrcnn_resnet50_fpn(pretrained=True)


def predict_by_resize(image, factor=10):
    """Applied MaskRCNN on downscaled image, by default the factor is 10x."""
    print("Resizing image by", factor, "x")
    newsize = (int(image.size[0] / factor), int(image.size[1] / factor))
    print("Resized Dimension", newsize)
    start_time = time.time()
    out = predict(image.resize(newsize), model)
    print("Time: %s s" % (time.time() - start_time))

    # Binary Image Segmentation
    threshold = 0.5
    masks = out['masks'][0][0]
    masks = masks > threshold
Ejemplo n.º 28
0
#
#     We will here describe the output of a Mask-RCNN model. The models in
#     :ref:`object_det_inst_seg_pers_keypoint_det` all have a similar output
#     format, but some of them may have extra info like keypoints for
#     :func:`~torchvision.models.detection.keypointrcnn_resnet50_fpn`, and some
#     of them may not have masks, like
#     :func:`~torchvision.models.detection.fasterrcnn_resnet50_fpn`.

from torchvision.models.detection import maskrcnn_resnet50_fpn, MaskRCNN_ResNet50_FPN_Weights

weights = MaskRCNN_ResNet50_FPN_Weights.DEFAULT
transforms = weights.transforms()

batch = transforms(batch_int)

model = maskrcnn_resnet50_fpn(weights=weights, progress=False)
model = model.eval()

output = model(batch)
print(output)

#####################################
# Let's break this down. For each image in the batch, the model outputs some
# detections (or instances). The number of detections varies for each input
# image. Each instance is described by its bounding box, its label, its score
# and its mask.
#
# The way the output is organized is as follows: the output is a list of length
# ``batch_size``. Each entry in the list corresponds to an input image, and it
# is a dict with keys 'boxes', 'labels', 'scores', and 'masks'. Each value
# associated to those keys has ``num_instances`` elements in it.  In our case
Ejemplo n.º 29
0
    for img, mask in zip(batch_int, all_classes_masks)
]
show(dogs_with_masks)

#####################################
# Instance segmentation models
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# Instance segmentation models have a significantly different output from the
# semantic segmentation models. We will see here how to plot the masks for such
# models. Let's start by analyzing the output of a Mask-RCNN model. Note that
# these models don't require the images to be normalized, so we don't need to
# use the normalized batch.

from torchvision.models.detection import maskrcnn_resnet50_fpn
model = maskrcnn_resnet50_fpn(pretrained=True, progress=False)
model = model.eval()

output = model(batch)
print(output)

#####################################
# Let's break this down. For each image in the batch, the model outputs some
# detections (or instances). The number of detection varies for each input
# image. Each instance is described by its bounding box, its label, its score
# and its mask.
#
# The way the output is organized is as follows: the output is a list of length
# ``batch_size``. Each entry in the list corresponds to an input image, and it
# is a dict with keys 'boxes', 'labels', 'scores', and 'masks'. Each value
# associated to those keys has ``num_instances`` elements in it.  In our case
Ejemplo n.º 30
0
print(f'Memory after server started: {mem()}')
model_name = 'model_fbgemm_bool'
cached_file = load_model(model_urls[model_name])
print(f'Memory after weights loaded: {mem()}')
torch.set_grad_enabled(False)
#print('Supported engines: ', torch.backends.quantized.supported_engines)
torch._C._jit_set_profiling_executor(False)
torch._C._jit_set_profiling_mode(False)
torch.jit.optimized_execution(False)
#torch.backends.quantized.engine = 'qnnpack'

if model_name == 'model':
    from torchvision.models.detection import maskrcnn_resnet50_fpn
    checkpoint = torch.load(cached_file)
    if 'model' in checkpoint.keys(): checkpoint = checkpoint['model']
    model = maskrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False)
    sys.stderr.write('Torchvision model loading...\n')
    model.load_state_dict(checkpoint)
else:  # scripted model loading...
    sys.stderr.write('Scripted model loading...\n')
    model = torch.jit.load(cached_file)
print(f'Memory after model loaded: {mem()}')
model.transform.max_size = 800
model.transform.min_size = (640, )
model.eval()

# model warm-up
'''
t = time.time()
with torch.jit.optimized_execution(True), torch.no_grad():
    for i in range(1):