def __init__(self,
                 mname,
                 outtoken,
                 hidden=128,
                 enc_layers=1,
                 dec_layers=1,
                 nhead=1,
                 dropout=0.1,
                 pretrained=False):
        super(TransformerModel, self).__init__()
        self.backbone = models.__getattribute__(mname)(pretrained=pretrained)
        self.backbone.fc = nn.Conv2d(2048, hidden // 4, 1)

        self.pos_encoder = PositionalEncoding(hidden, dropout)
        self.decoder = nn.Embedding(outtoken, hidden)
        self.pos_decoder = PositionalEncoding(hidden, dropout)
        self.transformer = nn.Transformer(d_model=hidden,
                                          nhead=nhead,
                                          num_encoder_layers=enc_layers,
                                          num_decoder_layers=dec_layers,
                                          dim_feedforward=hidden * 4,
                                          dropout=dropout,
                                          activation='relu')

        self.fc_out = nn.Linear(hidden, outtoken)
        self.src_mask = None
        self.trg_mask = None
        self.memory_mask = None
Example #2
0
    def __init__(self,
                 base_model_name: str,
                 config: Config,
                 pretrained=False,
                 num_classes=264):
        super(ResNet, self).__init__()

        base_model = models.__getattribute__(base_model_name)(
            pretrained=pretrained)
        layers = list(base_model.children())[:-2]
        layers.append(nn.AdaptiveMaxPool2d(1))
        self.encoder = nn.Sequential(*layers)
        in_features = base_model.fc.in_features
        self.config = config
        if config.cbr:
            self.cbr1 = CBR(1, 3, 5, 1)
        if config.loss_type == 'bce' or config.loss_type == 'focal':
            self.classifier = nn.Sequential(nn.Linear(in_features, 1024),
                                            nn.ReLU(), nn.Dropout(p=0.2),
                                            nn.Linear(1024, 1024), nn.ReLU(),
                                            nn.Dropout(p=0.2),
                                            nn.Linear(1024, num_classes))
        elif config.loss_type == 'arcface':
            self.classifier = nn.Sequential(
                nn.Linear(in_features, 1024), nn.ReLU(), nn.Dropout(p=0.2),
                nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(p=0.2),
                ArcMarginProduct(1024, num_classes))
Example #3
0
def create_model(arch):
    """
    Creates NN model
    Parameters:
    - arch
    Return
    - NN model
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    arch = 'vgg11'
    model = models.__getattribute__(arch)(pretrained=True)

    # Freeze parameters so we don't backprop through them
    for param in model.parameters():
        param.requires_grad = False

    from collections import OrderedDict
    classifier = nn.Sequential(
        OrderedDict([('fc1', nn.Linear(25088, 4096)), ('relu1', nn.ReLU()),
                     ('dropout', nn.Dropout(p=0.5)),
                     ('fc2', nn.Linear(4096, 102)),
                     ('output', nn.LogSoftmax(dim=1))]))

    model.classifier = classifier
    return model
def load_checkpoint(filepath):
    checkpoint = torch.load(filepath)
    arch = checkpoint['arch']
    loaded_model = models.__getattribute__(arch)(pretrained=True)
    for param in loaded_model.parameters():
        param.requires_grad = False
    loaded_model.fc = checkpoint['fc']
    loaded_model.load_state_dict(checkpoint['state_dict'])
    loaded_model.class_to_idx = checkpoint['class_to_idx']
    optimizer = checkpoint['optimizer_dict']
    epochs = checkpoint['epochs']
    criterion = checkpoint['criterion']
    return loaded_model, optimizer, criterion, epochs
Example #5
0
 def __init__(self, name, num_classes=265):
     super(BirdcallNet, self).__init__()
     self.model = models.__getattribute__(name)(pretrained=True)
     if name in ["resnet50","resnext50_32x4d"]:
         self.model.fc = nn.Linear(2048, num_classes)
     elif name in ['resnet18','resnet34']:
         self.model.fc = nn.Linear(512, num_classes)
     elif  name =="densenet121":
         self.model.classifier = nn.Linear(1024, num_classes)
     elif name in ['alexnet','vgg16']:
         self.model.classifier[-1] = nn.Linear(4096, num_classes)
     elif name =="mobilenet_v2":
         self.model.classifier[1] = nn.Linear(1280, num_classes)
def main():
    allowed_models = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152'] 
    parser = argparse.ArgumentParser(description='Train NN')
    parser.add_argument('data_dir', help='directory containing sub-folders with data')
    parser.add_argument('--save_dir', help='directory for saving checkpoint', default='checkpoints')
    parser.add_argument('--arch', help='pre-trained model architecture', default='resnet18', choices=allowed_models)
    parser.add_argument('--learning_rate', help='learning rate during learning', type=float, default=0.01)
    parser.add_argument('--dropout', help='dropout during learning', type=float, default=0.05)
    parser.add_argument('--hidden_units', help='List of number of nodes in hidden layers', nargs='+', type=int, default=[256, 128])
    parser.add_argument('--epochs', help='Number of epochs for training', default=3, type=int)
    parser.add_argument('--gpu', help='Enable GPU', action='store_true')

    args = parser.parse_args()

    # Describe directories relative to working directory
    data_dir = args.data_dir
    train_dir = data_dir + '/train'
    valid_dir = data_dir + '/valid'
    test_dir = data_dir + '/test'
    save_dir = args.save_dir
    model_arch = args.arch
    model_hidden_units = args.hidden_units
    learning_rate = args.learning_rate
    drop = args.dropout
    print('Data directory: ' + data_dir)
    print('hidden units: ' + str(args.hidden_units))
    print('Save directory: ' + save_dir)
    print('Architecture: ' + args.arch)
    fu.create_directory(save_dir)
    model = models.__getattribute__(model_arch)(pretrained=True)

    for param in model.parameters():
        param.requires_grad = False

    model.fc = fu.Network(model.fc.in_features, 102, model_hidden_units, drop)
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.fc.parameters(), lr=learning_rate)
    device = torch.device('cuda' if torch.cuda.is_available() and args.gpu==True else 'cpu') 
    print('device: ', device)

    epochs = args.epochs
    print_every = 50
    running_loss = 0
    steps = 0

    train_loader, test_loader, valid_loader, train_data, test_data, valid_data = load_transform.load_transform(data_dir, train_dir, valid_dir, test_dir)

    fu.train(device, model, epochs, criterion, optimizer, print_every, train_loader, test_loader, valid_loader)
    fu.save_checkpoint(model, model_arch, epochs, criterion, optimizer, train_data, save_dir)
    
    return model, test_loader, criterion
Example #7
0
    def __init__(self, base_model_name: str, pretrained=False,
                 num_classes=6):
        super().__init__()
        base_model = models.__getattribute__(base_model_name)(
            pretrained=pretrained)
        layers = list(base_model.children())[:-2]
        layers.append(nn.AdaptiveMaxPool2d(1))
        self.encoder = nn.Sequential(*layers)

        in_features = base_model.fc.in_features
        self.classifier = nn.Sequential(
            nn.Linear(in_features, 1024), nn.ReLU(), nn.Dropout(p=0.2),
            nn.Linear(1024, 1024), nn.ReLU(), nn.Dropout(p=0.2),
            nn.Linear(1024, num_classes))
Example #8
0
 def __init__(self,
              base_model_name: str,
              pretrained=False,
              num_classes=400,
              embed=False):
     super().__init__()
     base_model = models.__getattribute__(base_model_name)(
         pretrained=pretrained)
     layers = list(base_model.children())[:-1]
     layers.append(nn.AdaptiveMaxPool2d(1))
     self.encoder = nn.Sequential(*layers)
     self.fc1 = nn.Sequential(nn.Linear(1280, 512), nn.ReLU(),
                              nn.Dropout(0.2))
     self.fc2 = nn.Sequential(nn.Linear(512, num_classes))
     self.embed = embed
def load_checkpoint(filename, device):
    checkpoint = torch.load(filename)

    model_name = checkpoint['model_name']

    model = models.__getattribute__(model_name)(pretrained=True)
    model.to(device)

    for param in model.parameters():
        param.requires_grad = False

    model.classifier = checkpoint['classifier']
    model.class_to_idx = checkpoint['class_to_idx']
    model.load_state_dict(checkpoint['state_dict'])
    optimizer = checkpoint['optimizer']

    model.eval()
    return (model)
Example #10
0
    def __init__(self, params):
        super().__init__()
        self.__class__.__name__ = 'ResNet50'
        base_model_name = params['base_model']
        pretrained = params['pretrained']
        num_classes = params['n_classes']
        base_model = models.__getattribute__(base_model_name)(
            pretrained=pretrained)
        layers = list(base_model.children())[:-2]
        layers.append(nn.AdaptiveMaxPool2d(1))
        self.encoder = nn.Sequential(*layers)

        in_features = base_model.fc.in_features
        self.classifier = nn.Sequential(nn.Linear(in_features, 1024),
                                        nn.ReLU(), nn.Dropout(p=0.2),
                                        nn.Linear(1024, 1024), nn.ReLU(),
                                        nn.Dropout(p=0.2),
                                        nn.Linear(1024, num_classes))
Example #11
0
def load_checkpoint(chkpoint_filepath):
    # Loads deep learning model checkpoint.
    # Load the saved file, on gpu or else cpu
    if torch.cuda.is_available():  # Move tensors to GPU if available
        checkpoint = torch.load(chkpoint_filepath)
        print('model on gpu')
    else:
        # Needed to add this map_location parameters when on CPU
        print('model on cpu')
        checkpoint = torch.load(chkpoint_filepath,
                                map_location=lambda storage, loc: storage)
        # checkpoint = torch.load(chkpoint_filepath)
    # model, input_size = get_model(arch)
    arch = checkpoint['arch']
    model = models.__getattribute__(arch)(pretrained=True)

    for param in model.parameters():
        param.requires_grad = False

    model.classifier = checkpoint['classifier']

    # get the hyperparameters, etc, layer feature sizes from the classifier-
    input_size = model.classifier.fc1.in_features
    output_size = model.classifier.fc2.out_features
    try:  # added try for chkpts saved w/o optimzizer key
        optimizer = checkpoint['optimizer']
    except KeyError:
        print("optizmer key check")
        learning_rate = .001
        optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate)
    #end try
    criterion = checkpoint['criterion']
    # load the class names from the idx
    model.class_to_idx = checkpoint['class_to_idx']
    model.load_state_dict(checkpoint['state_dict'])
    epochs = checkpoint['epochs']

    return model, arch, criterion, epochs, optimizer
    def __init__(self,
                 name,
                 outtoken,
                 hidden=128,
                 enc_layers=1,
                 dec_layers=1,
                 nhead=1,
                 dropout=0.1,
                 pretrained=False):
        super(TransformerModelDnet, self).__init__()
        self.backbone = models.__getattribute__(name)(pretrained=pretrained)
        self.backbone.fc = nn.Conv2d(2048, hidden // 4, 1)

        self.backbone.features.denseblock4.denselayer1.norm1 = nn.BatchNorm2d(
            1056,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer1.conv1 = nn.Conv2d(
            1056, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer1.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer2.norm1 = nn.BatchNorm2d(
            1097,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer2.conv1 = nn.Conv2d(
            1097, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer2.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer3.norm1 = nn.BatchNorm2d(
            1138,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer3.conv1 = nn.Conv2d(
            1138, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer3.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer4.norm1 = nn.BatchNorm2d(
            1179,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer4.conv1 = nn.Conv2d(
            1179, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer4.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer5.norm1 = nn.BatchNorm2d(
            1220,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer5.conv1 = nn.Conv2d(
            1220, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer5.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer6.norm1 = nn.BatchNorm2d(
            1261,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer6.conv1 = nn.Conv2d(
            1261, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer6.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer7.norm1 = nn.BatchNorm2d(
            1302,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer7.conv1 = nn.Conv2d(
            1302, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer7.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer8.norm1 = nn.BatchNorm2d(
            1343,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer8.conv1 = nn.Conv2d(
            1343, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer8.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer9.norm1 = nn.BatchNorm2d(
            1384,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer9.conv1 = nn.Conv2d(
            1384, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer9.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer10.norm1 = nn.BatchNorm2d(
            1425,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer10.conv1 = nn.Conv2d(
            1425, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer10.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer11.norm1 = nn.BatchNorm2d(
            1466,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer11.conv1 = nn.Conv2d(
            1466, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer11.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer12.norm1 = nn.BatchNorm2d(
            1507,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer12.conv1 = nn.Conv2d(
            1507, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer12.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer13.norm1 = nn.BatchNorm2d(
            1548,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer13.conv1 = nn.Conv2d(
            1548, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer13.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer14.norm1 = nn.BatchNorm2d(
            1589,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer14.conv1 = nn.Conv2d(
            1589, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer14.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer15.norm1 = nn.BatchNorm2d(
            1630,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer15.conv1 = nn.Conv2d(
            1630, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer15.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer16.norm1 = nn.BatchNorm2d(
            1671,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer16.conv1 = nn.Conv2d(
            1671, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer16.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer17.norm1 = nn.BatchNorm2d(
            1712,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer17.conv1 = nn.Conv2d(
            1712, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer17.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer18.norm1 = nn.BatchNorm2d(
            1753,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer18.conv1 = nn.Conv2d(
            1753, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer18.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer19.norm1 = nn.BatchNorm2d(
            1794,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer19.conv1 = nn.Conv2d(
            1794, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer19.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer20.norm1 = nn.BatchNorm2d(
            1835,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer20.conv1 = nn.Conv2d(
            1835, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer20.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer21.norm1 = nn.BatchNorm2d(
            1876,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer21.conv1 = nn.Conv2d(
            1876, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer21.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer22.norm1 = nn.BatchNorm2d(
            1917,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer22.conv1 = nn.Conv2d(
            1917, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer22.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer23.norm1 = nn.BatchNorm2d(
            1958,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer23.conv1 = nn.Conv2d(
            1958, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer23.conv2 = nn.Conv2d(
            192,
            41,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)
        self.backbone.features.denseblock4.denselayer24.norm1 = nn.BatchNorm2d(
            1999,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.features.denseblock4.denselayer24.conv1 = nn.Conv2d(
            1999, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        self.backbone.features.denseblock4.denselayer24.conv2 = nn.Conv2d(
            192,
            49,
            kernel_size=(3, 3),
            stride=(1, 1),
            padding=(1, 1),
            bias=False)

        self.backbone.features._modules['norm5'] = nn.BatchNorm2d(
            2048,
            eps=1e-05,
            momentum=0.1,
            affine=True,
            track_running_stats=True)
        self.backbone.classifier = nn.Conv2d(2048, hidden // 4, 1)

        self.pos_encoder = PositionalEncoding(hidden, dropout)
        self.decoder = nn.Embedding(outtoken, hidden)
        self.pos_decoder = PositionalEncoding(hidden, dropout)
        self.transformer = nn.Transformer(d_model=hidden,
                                          nhead=nhead,
                                          num_encoder_layers=enc_layers,
                                          num_decoder_layers=dec_layers,
                                          dim_feedforward=hidden * 4,
                                          dropout=dropout,
                                          activation='relu')

        self.fc_out = nn.Linear(hidden, outtoken)
        self.src_mask = None
        self.trg_mask = None
        self.memory_mask = None