def maskrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, config=None, **kwargs): if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone, config) sizes = (32, 64, 128, 256, 512) # roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'], # output_size=7, # sampling_ratio=2) anchor_generator = AnchorGenerator(sizes=(sizes), aspect_ratios=((0.5, 1.0, 2.0))) model = MaskRCNN(backbone, num_classes, rpn_anchor_generator=anchor_generator, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls['maskrcnn_resnet50_fpn_coco'], progress=progress) model.load_state_dict(state_dict) return model
def __init__(self): backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) mask_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=14, sampling_ratio=2) self.net = MaskRCNN(backbone, 5, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, mask_roi_pool=mask_pooler) for p in self.net.backbone.parameters(): p.requires_grad = False params = [p for p in self.net.parameters() if p.requires_grad] self.optim = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005) self.lr_schuduler = torch.optim.lr_scheduler.StepLR(self.optim, step_size=3, gamma=0.1)
def __init__(self, backbone=None, architecture=None, detector=None, num_classes=None, device='cpu', *args, **kwargs): assert backbone is not None, ValueError('backbone can not None') assert architecture is not None, ValueError( 'architecture can not None') assert detector is not None, ValueError('detector can not None') assert num_classes is not None, ValueError('num_classes can not None') assert device is not None, ValueError('device can not None.') self.device = device super.__init__() if backbone == 'efficientnet': backbone = EfficientNet.from_pretrained(architecture) backbone.out_channels = 1280 elif backbone == 'fishnet': if architecture == 'fishnet99': backbone = fishnet99() elif architecture == 'fishnet150': backbone = fishnet150() else: backbone = fishnet201() backbone.out_channels = 1000 elif backbone == 'resnet': backbone = resnet_fpn_backbone(architecture, pretrained=True) self.model = MaskRCNN(backbone, num_classes=num_classes) self.model.to(device)
def get_model(num_classes): # load an instance segmentation model pre-trained on COCO # m = timm.create_model('cspresnet50', pretrained=True, num_classes=0, global_pool='') # backbone = TimmToVision(m) m = timm.create_model('cspresnet50', features_only=True, pretrained=True) backbone = TimmToVisionFPN(m) #backbone = resnet50_fpn() model = MaskRCNN(backbone, num_classes) ''' anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) # ["0"] rather than [0] roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=["0"], output_size=7, sampling_ratio=2) model = MaskRCNN(backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) # get the number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) # now get the number of input features for the mask classifier in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 # and replace the mask predictor with a new one model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes) ''' return model
def get_model( backbone_name="resnet50", detector_name="fasterrcnn", trainable_layers=3, model_ckpt=None, ): """Constructs a fasterrcnn or maskrcnn detector with the given backbone""" num_classes = 2 # 1 class (wheat) + background if model_ckpt: # backbone = resnet_fpn_backbone('resnet101', True) backbone = timm_resnet_fpn_backbone(backbone_name, False, trainable_layers) else: backbone = timm_resnet_fpn_backbone(backbone_name, True, trainable_layers) if detector_name == "fasterrcnn": model = FasterRCNN(backbone, num_classes) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) elif detector_name == "maskrcnn": model = MaskRCNN(backbone, num_classes) in_features_mask = ( model.roi_heads.mask_predictor.conv5_mask.in_channels) hidden_layer = 256 model.roi_heads.mask_predictor = MaskRCNNPredictor( in_features_mask, hidden_layer, num_classes) else: raise Exception(f"{detector_name} is not supported") if model_ckpt is not None: model.load_state_dict(torch.load(model_ckpt)["model_state_dict"]) print("loaded ckpt") return model
def get_mask_rcnn(num_classes, max_instances, backbone="resnet101"): # load an instance segmentation model pre-trained pre-trained on COCO if backbone == "resnet50": print("**************Adding Resnet 50 backbone***************") model = torchvision.models.detection.maskrcnn_resnet50_fpn( pretrained=True, box_detections_per_img=max_instances) else: bb = resnet_fpn_backbone(backbone, False) model = MaskRCNN(bb, num_classes=91, box_detections_per_img=max_instances) # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) # now get the number of input features for the mask classifier in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 # and replace the mask predictor with a new one model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes) return model
def get_model_instance_segmentation_v2(num_classes, architecture: str = 'resnet18'): """ By modifying this function we will be able to use a large variety of pretrained backbones but besides the backbones nothing else will be trained. A better solution seems to be to load a pre-trained model and then to change the mask and box predictors. """ # Pretrained model for num_classes=1000, but we will not use the final layers anyway. model = pretrainedmodels.__dict__[architecture](num_classes=1000, pretrained='imagenet') my_backbone = MyBackbone(model.features, 512) anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=14, sampling_ratio=2) model = MaskRCNN(my_backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, mask_roi_pool=mask_roi_pooler) return model
def get_instance_segmentation_model(num_classes, model_name='maskrcnn_resnet50_fpn'): # load a pre-trained model for classification # and return only the features if model_name.startswith('efficientnet'): backbone = EfficientNet.from_pretrained(model_name, num_classes=num_classes, include_top=False) # number of output channels backbone.out_channels = int( round_filters(1280, backbone._global_params)) model = MaskRCNN(backbone, num_classes) else: # load an instance segmentation model pre-trained on COCO model = torchvision.models.detection.__dict__[model_name]( pretrained=True) # get the number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) if model_name.startswith('mask') or model_name.startswith('efficientnet'): # now get the number of input features for the mask classifier in_features_mask = \ model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 # and replace the mask predictor with a new one model.roi_heads.mask_predictor = MaskRCNNPredictor( in_features_mask, hidden_layer, num_classes) return model
class MaskNet(): def __init__(self): backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) mask_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=14, sampling_ratio=2) self.net = MaskRCNN(backbone, 5, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, mask_roi_pool=mask_pooler) for p in self.net.backbone.parameters(): p.requires_grad = False params = [p for p in self.net.parameters() if p.requires_grad] self.optim = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005) self.lr_schuduler = torch.optim.lr_scheduler.StepLR(self.optim, step_size=3, gamma=0.1) def cuda(self): self.net = self.net.cuda() def train(self, images, targets): loss_dict = self.net(images, targets) losses = sum(loss for loss in loss_dict.values()) print(losses.item()) self.optim.zero_grad() losses.backward() self.optim.step() def save(self): torch.save(self.net, os.getcwd() + '/mymodel.pth') def load(self): self.net = torch.load(os.getcwd() + '/mymodel.pth')
def get_backbone(num_classes): # get backbone backbone = torchvision.models.resnet50(pretrained=True) # remove the fc layers new_backbone = torch.nn.Sequential(*(list(backbone.children())[:-2])) new_backbone.out_channels = 2048 model = MaskRCNN(new_backbone, num_classes) return model
def get_model_instance_segmentation4(num_classes): # COCO 에서 미리 학습된 인스턴스 분할 모델을 읽어옵니다 #model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False) #model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False) backbone = torchvision.models.squeezenet1_1(pretrained=False).features #backbone.out_channels = 1 backbone.out_channels = 512 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=14, sampling_ratio=2) model = MaskRCNN(backbone, num_classes=num_classes, box_roi_pool=roi_pooler, mask_roi_pool=mask_roi_pooler) #print("squeezenet1_0 call2 - out_channels :1280, 18,052,473 / 72M") #print("squeezenet1_0 call2 - out_channels :516, 4,862,777 / 19.5M") #print("squeezenet1_1 call2 - out_channels :516, 4,849,849 4,862,777 / 19.5M") print( "squeezenet1_1 call2 - out_channels :256, 2,757,369 / 11M (15,000,000 / 15,000,000)" ) print( "squeezenet1_1 call2 - out_channels :512, 4,808,441 / 19.2M (15,000,000)" ) print( "squeezenet1_1 call2 - out_channels :512, 33,192,463 33,161,683 / 172M (15,000,000)" ) # # 분류를 위한 입력 특징 차원을 얻습니다 #in_features = backbone # 미리 학습된 헤더를 새로운 것으로 바꿉니다 #model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) #in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels #hidden_layer = 1 # and replace the mask predictor with a new one #model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, # hidden_layer, # num_classes) return model
def get_model(num_classes): # load an instance segmentation model pre-trained on COCO # m = timm.create_model('cspresnet50', pretrained=True, num_classes=0, global_pool='') # backbone = TimmToVision(m) # m = timm.create_model('cspresnet50', features_only=True, pretrained=True) m = timm.create_model('cspresnet50', features_only=True, pretrained=True, pretrained_strict=False) backbone = TimmToVisionFPN(m) # m = timm.create_model('cspresnet50', pretrained=True, num_classes=0, global_pool='') # backbone = TimmToVision(m,1024) anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) anchor_generator = AnchorGenerator(sizes=anchor_sizes, aspect_ratios=aspect_ratios) # ["0"] rather than [0] out_channels = backbone.out_channels num_anchors = anchor_generator.num_anchors_per_location()[0] # CascadeRPN rpn_head = CascadeRPNHead(out_channels, feat_channels=out_channels, num_anchors=num_anchors, stage=2) # model = FasterRCNN(backbone, num_classes=num_classes, rpn_head=rpn_head) model = MaskRCNN(backbone, num_classes=num_classes) # IA branch # model = FasterRCNNIA(backbone, num_classes=num_classes, rpn_head=rpn_head) # Box head branch model.roi_heads.box_head = RoIFeatureExtractor(num_inputs=256, resolution=7) model.roi_heads.box_predictor = RoIBoxPredictor(num_classes) return model
def get_model_instance_segmentation_mn2(num_classes): # load a pre-trained model for classification and return # only the features backbone = torchvision.models.mobilenet_v2(pretrained=True).features # MaskRCNN needs to know the number of # output channels in a backbone. For mobilenet_v2, it's 1280 # so we need to add it here backbone.out_channels = 1280 # let's make the RPN generate 5 x 3 anchors per spatial # location, with 5 different sizes and 3 different aspect # ratios. We have a Tuple[Tuple[int]] because each feature # map could potentially have different sizes and # aspect ratios anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) # let's define what are the feature maps that we will # use to perform the region of interest cropping, as well as # the size of the crop after rescaling. # if your backbone returns a Tensor, featmap_names is expected to # be [0]. More generally, the backbone should return an # OrderedDict[Tensor], and in featmap_names you can choose which # feature maps to use. roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2) mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=14, sampling_ratio=2) # put the pieces together inside a MaskRCNN model model = MaskRCNN(backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, mask_roi_pool=mask_roi_pooler) return model
class MaskRCNN(Model, metaclass=ModelType): """ MaskRCNN model for Document Layout Analysis with different backbone: - ResNet - EfficientNet - FishNet """ def __init__(self, backbone=None, architecture=None, detector=None, num_classes=None, device='cpu', *args, **kwargs): assert backbone is not None, ValueError('backbone can not None') assert architecture is not None, ValueError( 'architecture can not None') assert detector is not None, ValueError('detector can not None') assert num_classes is not None, ValueError('num_classes can not None') assert device is not None, ValueError('device can not None.') self.device = device super.__init__() if backbone == 'efficientnet': backbone = EfficientNet.from_pretrained(architecture) backbone.out_channels = 1280 elif backbone == 'fishnet': if architecture == 'fishnet99': backbone = fishnet99() elif architecture == 'fishnet150': backbone = fishnet150() else: backbone = fishnet201() backbone.out_channels = 1000 elif backbone == 'resnet': backbone = resnet_fpn_backbone(architecture, pretrained=True) self.model = MaskRCNN(backbone, num_classes=num_classes) self.model.to(device) def load(self, path=None, *args, **kwargs): assert path is not None, ValueError('path can not None.') if self.device == 'cuda': self.model.load_state_dict(torch.load(path)) else: self.model.load_state_dict( torch.load(path, map_location=lambda storage, loc: storage)) if 'nn_parallel_to_cpu' in kwargs: state_dict = torch.load( path, map_location=lambda storage, loc: storage) state_dict_without_nnparallel = OrderedDict() for key, item in state_dict.items(): state_without_nnparallel[key[7:]] = item self.model.load_state_dict(state_dict_without_nnparallel) def _analyze(self, img=None, *args, **kwargs): assert img is not None, ValueError('img can not be None') img = F.to_tensor(img) output = self.model([img])[0] for key, item in output.items(): if self.device == 'cuda': item = item.cpu() output[key] = item.detach().numpy() boxes = [[x1, y1, x2 - x1, y2 - y1] for x1, y1, x2, y2 in output['boxes']] scores = output['scores'] rects = nms.boxes(rects=boxes, scores=scores, nms_threshold=0.25) output['boxes'] = [ output['boxes'][id] for id in rects if output['scores'][id] > 0.5 ] output['labels'] = [ output['labels'][id] for id in rects if output['scores'][id] > 0.5 ] output['scores'] = [ output['scores'][id] for id in rects if output['scores'][id] > 0.5 ] return output def batch_analyze(self, images=None, *args, **kwargs): """ Analyze for a batch of images :param images: :return: """ assert images is not None, ValueError('images can not be None') with torch.no_grad(): if self.device == 'cuda': torch.cuda.synchronize() _images = [] for image in images: _images.append(F.to_tensor(image).to(self.device)) del image l_images = images.__len__() del images output = self.model(_images) _images = [] del _images if 'use_listmemmap' in kwargs: f_out = ListMemMap() else: f_out = List() for id in range(output.__len__()): for key, item in output[id].items(): if self.device == 'cuda': item = item.cpu() output[id][key] = item.detach().numpy() del item boxes = [[x1, y1, x2 - x1, y2 - y1] for x1, y1, x2, y2 in output[id]['boxes']] scores = output[id]['scores'] rects = nms.boxes(rects=boxes, scores=scores, nms_threshold=0.25) tmp = list() tmp.append([ output[id]['boxes'][idx] for idx in rects if output[id]['scores'][idx] > 0.5 ]) tmp.append([ output[id]['labels'][idx] for idx in rects if output[id]['scores'][idx] > 0.5 ]) tmp.append([ output[id]['scores'][idx] for idx in rects if output[id]['scores'][idx] > 0.5 ]) f_out.append(tmp) del tmp del output, l_images, boxes, scores, rects if self.device == 'cuda': torch.cuda.empty_cache() gc.collect() return f_out def analyze(self, img=None, *args, **kwargs): """ :param img: PIL.Image :return: """ assert img is not None, ValueError('img can not be None') with torch.no_grad(): img = F.to_tensor(img) output = self.model([img])[0] for key, item in output.items(): if self.device == 'cuda': item = item.cpu() output[key] = item.detach().numpy() boxes = [[x1, y1, x2 - x1, y2 - y1] for x1, y1, x2, y2 in output['boxes']] scores = output['scores'] rects = nms.boxes(rects=boxes, scores=scores, nms_threshold=0.25) output['boxes'] = [ output['boxes'][id] for id in rects if output['scores'][id] > 0.5 ] output['labels'] = [ output['labels'][id] for id in rects if output['scores'][id] > 0.5 ] output['scores'] = [ output['scores'][id] for id in rects if output['scores'][id] > 0.5 ] del boxes, scores, rects, img return output def analyze_pdf(self, pdf_file=None, *args, **kwargs): assert pdf_file is not None, ValueError('pdf_file is not None') images = pdf2image.convert_from_path(pdf_file) for idx, image in enumerate(images): out = self.analyze(image) img = self.box_display(image, out) cv2.imwrite( os.path.join('test', 'show', pdf_file.split('/')[-1] + '_' + str(idx) + '.png'), img) def box_display(self, image, output, *args, **kwargs): assert image is not None, ValueError('image can not None') assert output is not None, ValueError('output can not None ') image = np.array(image) for idx, box in enumerate(output[0]): x1, y1, x2, y2 = box cv2.rectangle(image, (x1, y1), (x2, y2), (0, 20, 200), 10) font = cv2.FONT_HERSHEY_DUPLEX cv2.putText( image, '%s: %.2f' % (self.class_names[output[1][idx]], output[2][idx]), (int(x1) + 10, int(y1) + 35), font, 1, (0, 0, 255), 2, cv2.LINE_AA) return image def mask_display(self, image, output): assert image is not None, ValueError('image can not None') assert output is not None, ValueError('output can not None') image = np.array(image) masks = output['masks'] _masks = masks.argmax(axis=0) _masks = np.reshape(_masks, (_masks.shape[1], _masks.shape[2])) for i in range(_masks.shape[0]): for j in range(_masks.shape[1]): if (_masks[i][j] > 0) and (_masks[i][j] < 8): image[i][j] = self.colours[_masks[i][j]] return image def train(self, train_set=None, valid_set=None, epoch=None, optimizer=None, lr_scheduler=None, *args, **kwargs): """ Default training model """ assert train_set is not None, ValueError('train_set can not None') assert valid_set is not None, ValueError('valid_set can not None') assert epoch is not None, ValueError('epoch can not None') self.model.train() params = [p for p in self.model.parameters() if p.requires_grad] if optimizer is None: optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) else: optimizer = optimizer if lr_scheduler is None: lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.978) else: lr_scheduler = lr_scheduler for ep in range(epoch): metric_logger = MetricLogger(delimiter=' ') metric_logger.add_meter( 'lr', SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(ep) for images, targets in metric_logger.log_every( train_set, 10, header): images = list(image.to(device) for image in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] loss_dict = model(images, targets) losses = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() if not math.isfinite(loss_value): print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) optimizer.zero_grad() losses.backward() optimizer.step() if lr_scheduler is not None: lr_scheduler.step() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) metric_logger.update(lr=optimizer.param_groups[0]["lr"]) coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) for image, targets in metric_logger.log_every( data_loader, 100, header): image = list(img.to(device) for img in image) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] if device is 'cuda': torch.cuda.synchronize() model_time = time.time() outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = { target["image_id"].item(): output for target, output in zip(targets, outputs) } evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats: ", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads)
import torch import torchvision from torchvision.models.detection import MaskRCNN from torchvision.models.detection.anchor_utils import AnchorGenerator backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),aspect_ratios=((0.5, 1.0, 2.0),)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7,sampling_ratio=2) mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=14,sampling_ratio=2) MaskRCNN_mobile_model = MaskRCNN(backbone,num_classes=2,rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,mask_roi_pool=mask_roi_pooler) if __name__ == '__main__': model = MaskRCNN_mobile_model model.eval() x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] predictions = model(x)
backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2) mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=14, sampling_ratio=2) model = MaskRCNN(backbone, num_classes=4, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, mask_roi_pool=mask_roi_pooler) device = torch.device('cuda') if torch.cuda.is_available() else torch.device( 'cpu') dataSet = BrainTumor('/content/drive/My Drive/Colab Notebooks/BrainTumor/', transforms=None) data_loader = torch.utils.data.DataLoader(dataSet, batch_size=4, shuffle=True, num_workers=4, collate_fn=utils.collate_fn) model.to(device) params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params,
from torch.autograd import Variable from torchvision import datasets, transforms backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=14, sampling_ratio=2) model = MaskRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, mask_roi_pool=mask_roi_pooler) optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005) dataDir = "../ChemLabScapeDataset/TrainAnnotations" #dataset = ChemScapeDataset(dataDir, None, "Vessel", False) d = datasets.CocoDetection( root="../coco/train2014", annFile="../coco/annotations/instances_train2014.json", transform=transforms.ToTensor()) dataLoader = torch.utils.data.DataLoader(d,