def get_model(model_name: str, device, num_classes: int, pretrained: bool = False) -> torch.nn: """ Gets the model requested from the config file :param model_name: name of model used in the program :param device: name of device model is hosted on :param num_classes: number of classes in the model (including background 0,0,0) :param pretrained: boolean of starting on a pretrained model :return: the nn.model to be trained """ print(pretrained) model_dict = {'fcn_resnet50': models.fcn_resnet50(pretrained=pretrained, num_classes=num_classes).to(device).eval(), 'fcn_resnet101': models.fcn_resnet101(pretrained=pretrained, num_classes=num_classes).to(device).eval(), 'deeplabv3_resnet50': models.deeplabv3_resnet50(pretrained=pretrained, num_classes=num_classes).to(device).eval(), 'deeplabv3_resnet101': models.deeplabv3_resnet101(pretrained=pretrained, num_classes=num_classes).to(device).eval(), 'deeplabv3_mobilenet_v3_large': models.deeplabv3_mobilenet_v3_large(pretrained=pretrained, num_classes=num_classes).to( device).eval(), 'lraspp_mobilenet_v3_large': models.lraspp_mobilenet_v3_large(pretrained=pretrained, num_classes=num_classes).to( device).eval() } try: model = model_dict[model_name] except KeyError: print(f"KeyError, model_name is not valid allowable names are: {model_dict.keys()}") model = None model = model.eval() return model
def __init__(self, learning_rate=1e-3, *args, **kwargs): super().__init__() self.save_hyperparameters() self.model = fcn_resnet50(pretrained=True, num_classes=21) self.preds = None self.targets = None
def resume(self, file, test=False): import torch if test and not file: self.fcn = fcn_resnet50(pretrained=True, num_classes=21) return if file: print('Loading checkpoint from: ' + file) checkpoint = torch.load(file) checkpoint = checkpoint['model_state_dict'] self.load_state_dict(checkpoint)
def create_fcn_resnet(**kwargs) -> nn.Module: """Create fully convolutional resnet for 1 channel image segmentation""" m = fcn_resnet50(pretrained=False, num_classes=2, **kwargs) m.backbone._modules['conv1'] = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False) return m
def make_model(args): if args.model == 'fcn_mobilenetv2': # Convolutional neural netorks global_model = fcn_mobilenetv2(num_classes=args.num_classes, aux_loss=bool(args.aux_lr)) if args.no_dropout: global_model.classifier[3].p = 0 global_model.aux_classifier[3].p = 0 elif args.model == 'deeplabv3_mobilenetv2': global_model = deeplabv3_mobilenetv2(num_classes=args.num_classes, aux_loss=bool(args.aux_lr)) if args.no_dropout: global_model.classifier[3].p = 0 global_model.aux_classifier[3].p = 0 elif args.model == 'deeplabv3_mobilenetv3': global_model = deeplabv3_mobilenet_v3_large( num_classes=args.num_classes, aux_loss=bool(args.aux_lr), pretrained=args.pretrained) # fix batchnorm channels divisible by 8 in_channels = global_model.aux_classifier[0].in_channels global_model.aux_classifier = FCNHead(in_channels, args.num_classes) if args.no_dropout: global_model.classifier[0].project[3].p = 0 elif args.model == 'lraspp_mobilenetv3': global_model = lraspp_mobilenet_v3_large(num_classes=args.num_classes, pretrained=args.pretrained) # no aux classifier, no dropout layer global_model.aux_classifier = None #resnet for test only as too many params elif args.model == 'fcn_resnet50': global_model = fcn_resnet50(num_classes=args.num_classes, pretrained=True) else: exit('Error: unrecognized model') if args.activation == 'tanh': # test tanh for DP-SGD global_model = convert_relu_tanh(global_model) if args.freeze_backbone: # test for DP-SGD for p in global_model.backbone.parameters(): p.requires_grad = False # change model architecutre from batch_norm to group_norm for DP if args.dp: global_model = convert_batchnorm_modules(global_model) inspector = DPModelInspector() assert inspector.validate(global_model) == True return global_model
def __init__(self, n_channels=3, n_classes=21, softmax_out=True, resnet_type=101, pretrained=False): super(FCN, self).__init__() self.resnet_type = resnet_type self.n_channels = n_channels self.n_classes = n_classes self.pretrained = pretrained # Input conv is applied to convert the input to 3 ch depth self.inconv = None if n_channels != 3: self.inconv = FwdConv(n_channels, 3, kernel_size=1, padding=0) # Pre-trained model needs to be an identical network if pretrained: mid_classes = 21 else: mid_classes = n_classes # Maind body if resnet_type == 50: self.fcn_body = fcn_resnet50(pretrained=False, num_classes=mid_classes) self.pretrained = False else: self.fcn_body = fcn_resnet101(pretrained=pretrained, num_classes=mid_classes) if n_classes != 21: self.fcn_body.classifier[-1] = nn.Conv2d(512, n_classes, kernel_size=(1, 1), stride=(1, 1)) if self.fcn_body.aux_classifier != None: self.fcn_body.aux_classifier[-1] = nn.Conv2d(512, n_classes, kernel_size=(1, 1), stride=(1, 1)) # Softmax alternative self.has_softmax = softmax_out if softmax_out: self.softmax = nn.Softmax2d() else: self.softmax = None
def main(): if __name__ == "__main__": # Extracting data from the given file file_name = "coding_challenge.zip" with ZipFile(file_name, 'r') as f: f.extractall() path = "coding_challenge" images_path = path + "/images" masks_path = path + "/masks" train_dataset = Dataset(images_path, masks_path) model = fcn_resnet50(pretrained=False, progress=True, num_classes=3, aux_loss=None) optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9) n_epochs = 3 for i in range(n_epochs): loss, pred = train(train_dataset, model, optimizer) print(f"Training loss at epoch {i+1} : {loss}")
def __init__(self): super().__init__() self.layers = fcn_resnet50(pretrained=True) for param in self.parameters(): param.requires_grad = False self.layers.backbone.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) self.layers.classifier[-1] = nn.Conv2d(512, COLORS, kernel_size=(1, 1), stride=(1, 1)) self.layers.aux_classifier[-1] = nn.Conv2d(256, COLORS, kernel_size=(1, 1), stride=(1, 1)) self.loss_criterion = nn.MSELoss()
def make_model(args): if args.model == 'fcn_mobilenetv2': # Convolutional neural netorks global_model = fcn_mobilenetv2(num_classes=args.num_classes, aux_loss=bool(args.aux_lr)) elif args.model == 'deeplabv3_mobilenetv2': global_model = deeplabv3_mobilenetv2(num_classes=args.num_classes, aux_loss=bool(args.aux_lr)) elif args.model == 'deeplabv3_mobilenetv3': global_model = deeplabv3_mobilenet_v3_large( num_classes=args.num_classes, aux_loss=bool(args.aux_lr), pretrained=args.pretrained) elif args.model == 'lraspp_mobilenetv3': global_model = lraspp_mobilenet_v3_large(num_classes=args.num_classes, pretrained=args.pretrained) # no aux classifier, no dropout layer global_model.aux_classifier = None #resnet for test only as too many params elif args.model == 'fcn_resnet50': global_model = fcn_resnet50(num_classes=args.num_classes, pretrained=True) else: exit('Error: unrecognized model') if args.activation == 'tanh': # test tanh for DP-SGD global_model = convert_relu_tanh(global_model) if args.freeze_backbone: # test for DP-SGD for p in global_model.backbone.parameters(): p.requires_grad = False return global_model
# Semantic segmentation models # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # We will see how to use it with torchvision's FCN Resnet-50, loaded with # :func:`~torchvision.models.segmentation.fcn_resnet50`. You can also try using # DeepLabv3 (:func:`~torchvision.models.segmentation.deeplabv3_resnet50`) or # lraspp mobilenet models # (:func:`~torchvision.models.segmentation.lraspp_mobilenet_v3_large`). # # Let's start by looking at the ouput of the model. Remember that in general, # images must be normalized before they're passed to a semantic segmentation # model. from torchvision.models.segmentation import fcn_resnet50 model = fcn_resnet50(pretrained=True, progress=False) model = model.eval() normalized_batch = F.normalize(batch, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) output = model(normalized_batch)['out'] print(output.shape, output.min().item(), output.max().item()) ##################################### # As we can see above, the output of the segmentation model is a tensor of shape # ``(batch_size, num_classes, H, W)``. Each value is a non-normalized score, and # we can normalize them into ``[0, 1]`` by using a softmax. After the softmax, # we can interpret each value as a probability indicating how likely a given # pixel is to belong to a given class. #
def __init__(self, n_class=21): super(FCN, self).__init__() self.fcn = fcn_resnet50(pretrained=False, num_classes=n_class)
def fcn_resnet50(num_classes): """Return torchvision fcn_resnet50 and wrap it in SingleOutNet.""" return SingleOutNet( seg_models.fcn_resnet50(num_classes=num_classes, aux_loss=True))
return loss # for debug if __name__ == "__main__": import torch.optim as optim from torchvision.models import segmentation device = 'cuda' if torch.cuda.is_available() else 'cpu' img = torch.randn(8, 3, 224, 224).to(device) gt = torch.randint(0, 10, (8, 224, 224)).to(device) print(img.shape, gt.shape) model = segmentation.fcn_resnet50(num_classes=10).to(device) optimizer = optim.Adam(model.parameters(), lr=0.0001) criterion = BoundaryLoss() y = model(img) loss = criterion(y['out'], gt) optimizer.zero_grad() loss.backward() optimizer.step() print(loss)
# We will see how to use it with torchvision's FCN Resnet-50, loaded with # :func:`~torchvision.models.segmentation.fcn_resnet50`. You can also try using # DeepLabv3 (:func:`~torchvision.models.segmentation.deeplabv3_resnet50`) or # lraspp mobilenet models # (:func:`~torchvision.models.segmentation.lraspp_mobilenet_v3_large`). # # Let's start by looking at the output of the model. Remember that in general, # images must be normalized before they're passed to a semantic segmentation # model. from torchvision.models.segmentation import fcn_resnet50, FCN_ResNet50_Weights weights = FCN_ResNet50_Weights.DEFAULT transforms = weights.transforms(resize_size=None) model = fcn_resnet50(weights=weights, progress=False) model = model.eval() normalized_batch = transforms(batch) output = model(normalized_batch)['out'] print(output.shape, output.min().item(), output.max().item()) ##################################### # As we can see above, the output of the segmentation model is a tensor of shape # ``(batch_size, num_classes, H, W)``. Each value is a non-normalized score, and # we can normalize them into ``[0, 1]`` by using a softmax. After the softmax, # we can interpret each value as a probability indicating how likely a given # pixel is to belong to a given class. # # Let's plot the masks that have been detected for the dog class and for the # boat class:
def train(opt): # Specify the Model Architecture if opt.architecture.lower() == "deeplabv3_resnet50": model = models.deeplabv3_resnet50(pretrained=True, progress=True) model.classifier = models.deeplabv3.DeepLabHead(2048, 4) elif opt.architecture.lower() == "deeplabv3_resnet101": model = models.deeplabv3_resnet101(pretrained=True, progress=True) model.classifier = models.deeplabv3.DeepLabHead(2048, 4) elif opt.architecture.lower() == "fcn_resnet50": model = models.fcn_resnet50(pretrained=True, progress=True) model.classifier = models.fcn.FCNHead(2048, 4) elif opt.architecture.lower() == "fcn_resnet101": model = models.fcn_resnet101(pretrained=True, progress=True) model.classifier = models.fcn.FCNHead(2048, 4) # Define Optimizer if opt.optim.lower() == "adam": modelOptim = torch.optim.Adam(model.parameters(), lr=opt.lr) elif opt.optim.lower() == "sgd": modelOptim = torch.optim.SGD(model.parameters(), lr=opt.lr) # Define Loss Function lossFnc = torch.nn.CrossEntropyLoss() # Set Training and Validation Datasets dataTransforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) segdata = { x: Dataset(root=Path(opt.data_path) / x, imageFolder="images", maskFolder="masks", transforms=dataTransforms) for x in ["train", "valid"] } dataLoaders = { x: DataLoader(segdata[x], batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) for x in ["train", "valid"] } # Set Training Device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) # Create and Initialize Training Log File #perfMetrics = {"f1-score": f1_score, "auroc": roc_auc_score} perfMetrics = {"f1-score": f1_score} fieldnames = ['epoch', 'train_loss', 'valid_loss'] + \ [f'train_{m}' for m in perfMetrics.keys()] + \ [f'valid_{m}' for m in perfMetrics.keys()] with open(os.path.join('log.csv'), 'w', newline='') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() #model = torch.load('trainedModel25.pth') # Train startTimer = time.time() for epoch in range(1, opt.num_epochs + 1): print('-' * 60) print("Epoch: {}/{}".format(epoch, opt.num_epochs)) batchsummary = {a: [0] for a in fieldnames} for phase in ["train", "valid"]: if phase == 'train': model.train() else: model.eval() # Iterate over data. for sample in tqdm(iter(dataLoaders[phase]), file=sys.stdout): inputs = sample['image'].to(device) masks = sample['mask'].to(device) # zero the parameter gradients modelOptim.zero_grad() # track history if only in train with torch.set_grad_enabled(phase == 'train'): outputs = model(inputs) loss = lossFnc(outputs['out'], masks) y_pred = outputs['out'].data.cpu().numpy().ravel() y_true = masks.data.cpu().numpy().ravel() for name, metric in perfMetrics.items(): if name == 'f1-score': # Use a classification threshold of 0.1 f1Classes = np.zeros(4) nPixels = np.zeros(4) for classID in range(4): f1Classes[classID] = metric( y_true == classID, y_pred[classID * len(y_true): (classID + 1) * len(y_true)] > 0.1) nPixels[classID] = np.count_nonzero( y_true == classID) f1weights = nPixels / (np.sum(nPixels)) f1 = np.matmul(f1Classes, f1weights) batchsummary[f'{phase}_{name}'].append(f1) else: batchsummary[f'{phase}_{name}'].append( metric(y_true.astype('uint8'), y_pred)) # backward + optimize only if in training phase if phase == 'train': loss.backward() modelOptim.step() batchsummary['epoch'] = epoch epoch_loss = loss batchsummary[f'{phase}_loss'] = epoch_loss.item() for field in fieldnames[3:]: batchsummary[field] = np.mean(batchsummary[field]) print((f'train loss: {batchsummary["train_loss"]: .4f}, ' f'valid loss: {batchsummary["valid_loss"]: .4f}, ' f'train f1-score: {batchsummary["train_f1-score"]: .4f}, ' f'valid f1-score: {batchsummary["valid_f1-score"]: .4f}, ')) #f'train auroc: {batchsummary["train_auroc"]: .4f}, ' #f'valid auroc: {batchsummary["valid_auroc"]: .4f}, ')) with open(os.path.join('log.csv'), 'a', newline='') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writerow(batchsummary) # deep copy the model if phase == 'valid' and loss < 1e10: best_loss = loss best_model_wts = copy.deepcopy(model.state_dict()) time_elapsed = time.time() - startTimer print('-' * 60) print('Training completed in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print(f'Lowest validation loss: {best_loss: .4f}') # load best model weights model.load_state_dict(best_model_wts)
import torchvision.models.segmentation as seg model = seg.fcn_resnet50(pretrained=True) # model2 = seg.fcn_resnet101(pretrained=True) print(model2)