Ejemplo n.º 1
0
    def __init__(self, cnn_model, input_dim, hidden_dim, lstm_layers,
                 embedding_dim, batch_size, sequence_len):
        super(Encoder, self).__init__()

        # Convolutions (pre-trained)
        self.cnn_embedding_dim = None

        if (cnn_model == "vgg"):
            self.cnn = models.vgg16(pretrained=True).features
            self.cnn_embedding_dim = 25088

        elif (cnn_model == "resnet"):
            self.cnn = models.resnet(pretrained=True).features
            self.cnn_embedding_dim = 1024

        self.fc1 = nn.Linear(self.cnn_embedding_dim, input_dim)
        self.fc2 = nn.Linear(hidden_dim, embedding_dim)

        # Activations
        self.relu = nn.ReLU()
        self.leaky_relu = nn.LeakyReLU(0.2)

        # LSTM
        self.LSTM = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=lstm_layers,
            batch_first=
            True  # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
        )

        self.sequence_len = sequence_len
        self.batch_size = batch_size
def create_model(arch, hidden_units, learning_rate):

    if arch == 'vgg16':
        model = models.vgg16(pretrained=True)
        in_features = model.classifier[0].in_features
        for param in model.parameters():
            param.requires_grad = False
    elif arch == 'resnet':
        model = models.resnet(pretrained=True)
        in_features = model.fc.in_features(pretrained=True)
        for param in model.parameters():
            param.requires_grad = False
    elif arch == 'densenet121':
        model = models.densenet121(pretrained=True)
        in_features = model.classifier.in_features
        for param in model.parameters():
            param.requires_grad = False
    else:
        print('The required architecture cannot be recognised')

    classifier = nn.Sequential(
        OrderedDict([('fc1', nn.Linear(in_features, hidden_units)),
                     ('drop', nn.Dropout(p=0.5)), ('relu', nn.ReLU()),
                     ('fc2', nn.Linear(hidden_units, 102)),
                     ('output', nn.LogSoftmax(dim=1))]))
    model.classifier = classifier

    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate)
    scheduler = lr_scheduler.StepLR(optimizer,
                                    step_size=4,
                                    gamma=0.1,
                                    last_epoch=-1)

    return model, criterion, optimizer, scheduler, in_features
Ejemplo n.º 3
0
	def __init__(self,embed_size):
		super(CNN_Encoder,self).__init__()
		# Load the pretrained ResNet-152 model and replace the top fc layer
		resnet = models.resnet(pretrained=True)
		modules = list[resnet.children()[:,-1]]
		self.resnet = nn.Sequential(*modules)
		self.linear = nn.Linear(resnet.fc.in_features,embed_size)
		self.bn = nn.BatchNorm2D(embed_size,momentum = 0.01)
Ejemplo n.º 4
0
    def test_resnet_parsing(self):
        if __name__ == '__main__':
            resnet50 = resnet()
            resnet_in = torch.rand((1, 3, 512, 512))

            # testing on unet
            bonsai_parsed_model = bonsai_parser(resnet50, resnet_in)
            bonsai_parsed_model.summary()  # prints model cfg summary
            bonsai_parsed_model.save_cfg(
                'example_models/configs/resnet50_from_pytorch.cfg')
Ejemplo n.º 5
0
 def __init__(self, img_path, plot_res=True, to_onnx=False):
     self.device = "cuda:0"
     self.model = resnet()
     self.model_name = "resnet{}.pth".format(num)
     self.model.load_state_dict(
         torch.load(
             os.path.join(model_folder, "pth/{}".format(self.model_name))))
     self.model.cuda()
     self.to_onnx = to_onnx
     self.input_tensor = input_dim_3to4(image_normalize(img_path))
     self.plot = plot_res
     self.img = cv2.imread(img_path)
Ejemplo n.º 6
0
def load_build(filepath):

    checkpoint = torch.load(filepath)

    if in_arg.arch == 'vgg16':
        model = models.vgg16(pretrained=True)
    else:
        model = models.resnet(pretrained=True)

    model.classifier = checkpoint['classifier']
    learning_rate = checkpoint['learning_rate']
    batch_size = checkpoint['batch_size']

    model.load_state_dict(checkpoint['state_dict'])
    model.class_to_idx = checkpoint['class_to_idx']

    return model
Ejemplo n.º 7
0
    def __int__(self, class_num=62):
        super(Res18, self).__int__()
        model_ft = resnet(BasicBlock, [2, 2, 2, 2])
        self.base_model = nn.Sequential(*list(model_ft.children())[:-3])
        # attention
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.maxpool = nn.AdaptiveAvgPool2d(1)
        self.sign = nn.Sigmoid()
        in_plances = 256
        ratio = 8
        self.a_fc1 = nn.Conv2d(in_plances, in_plances // ratio, 1, bias=False)
        self.a_relu = nn.ReLU()
        self.a_fc2 = nn.Conv2d(in_plances // ratio, in_plances, 1, bias=False)

        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveAvgPool2d(1)
        self.reduce_layer = nn.Conv2d(512, 256, 1)

        self.fc1 = nn.Sequential(nn.Dropout(0.5), nn.Linear(256, class_num))
        self.fc2 = nn.Sequential(nn.Dropout(0.5), nn.Linear(256, class_num))
        self.fc3 = nn.Sequential(nn.Dropout(0.5), nn.Linear(256, class_num))
        self.fc4 = nn.Sequential(nn.Dropout(0.5), nn.Linear(256, class_num))
Ejemplo n.º 8
0
    def __init__(self, cnn_model, input_dim, hidden_dim, lstm_layers,
                 embedding_dim, sequence_len):
        super(Encoder, self).__init__()

        # Convolutions (pre-trained)
        self.cnn_embedding_dim = None

        if (cnn_model == "vgg"):
            self.cnn = models.vgg16(pretrained=True).features
            self.cnn_embedding_dim = 25088

        elif (cnn_model == "resnet"):
            self.cnn = models.resnet(pretrained=True).features
            self.cnn_embedding_dim = 1024

        self.fc1 = nn.Linear(self.cnn_embedding_dim, input_dim)
        self.fc2 = nn.Linear(hidden_dim, embedding_dim)
        self.unpool_1 = nn.Upsample(scale_factor=5, mode='bilinear')
        self.deconv_1 = nn.ConvTranspose2d(in_channels=8,
                                           out_channels=3,
                                           kernel_size=200,
                                           stride=1)

        #Activations
        self.relu = nn.ReLU()
        self.leaky_relu = nn.LeakyReLU(0.2)

        #LSTM
        self.LSTM = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=lstm_layers,
            batch_first=
            True  # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
        )

        self.sequence_len = sequence_len
Ejemplo n.º 9
0
 def __init__(self, num_classes=4):
     super().__init__()
     self.pre = ClassicCNN.classic_cnn_block(1, 3)
     self.resnet = resnet(pretrained=True)
     self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)
Ejemplo n.º 10
0
def main():

    #load a pre trained network (VGG16 or resnet)
    if in_arg.arch == 'vgg16':
        model = models.vgg16(pretrained=True)
    else:
        model = models.resnet(pretrained=True)

    num_ftrs = model.classifier[0].in_features
    print(num_ftrs)  #used as first layer of classifier

    #Build the model (pretrained model whose classifier is replaced with the custom classifier)
    input_size = num_ftrs
    output_size = 102  #this size would be different for a different image dataset but we assume Flowers
    classifier = Network(input_size, output_size, hidden_layers)

    model.classifier = classifier  #replace pretrained model classifer with custom
    print(model)  #print the model to make sure it is correct

    # Freeze model parameters so we don't backpropogate through them
    for param in model.parameters():
        param.requires_grad = False
    for paramc in model.classifier.parameters():
        paramc.requires_grad = True

    #load the dataset from utilities.py
    train_dataset = load_data('train')
    trainloader = torch.utils.data.DataLoader(train_dataset,
                                              batch_size=102,
                                              shuffle=True)

    test_dataset = load_data('test')
    testloader = torch.utils.data.DataLoader(test_dataset,
                                             batch_size=102,
                                             shuffle=True)

    valid_dataset = load_data('valid')
    validloader = torch.utils.data.DataLoader(valid_dataset,
                                              batch_size=102,
                                              shuffle=True)

    # image classes is a list of strings such as '17' of length 102 since there are 102 class numbers
    image_classes = train_dataset.classes
    num_classes = (len(image_classes))

    #set up parameters for forward pass through the network
    print_every = 2
    steps = 0
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.classifier.parameters(), in_arg.lr)

    #FORWARD PROPOGATION
    #send images and model to CPU or GPU depending on GPU available and input argument
    cuda = torch.cuda.is_available()
    if in_arg.gpu == 'gpu' and cuda:
        device = torch.device('cuda')
        model.cuda()
        FloatTensor = torch.cuda.FloatTensor
    else:
        device = torch.device('cpu')
        model.cpu()
        FloatTensor = torch.FloatTensor

    for e in range(in_arg.epoch):
        running_loss = 0
        model.train()  #make sure model is in train mode
        for images, labels in iter(trainloader):
            steps += 1

            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()  #clear the gradients
            output = model.forward(images)  #forward pass for training
            loss = criterion(output, labels)  #compute pass loss
            loss.backward()  #backward pass
            optimizer.step()  #optimize

            running_loss += loss.item()
            #during training, run test images through model and get accuracy
            if steps % print_every == 0:
                with torch.no_grad():  #turn off gradients for validation
                    test_loss, accuracy = testing_pass(model, testloader,
                                                       criterion)

                print(
                    'epoch: ', e,
                    'Training Loss: {:.4f}'.format(running_loss / print_every),
                    'Testing Loss: {:.4f}'.format(test_loss / len(testloader)),
                    'Testing Accuracy: {:.4f}'.format(accuracy /
                                                      len(testloader)))
                running_loss = 0
                model.train(
                )  #turn training mode back on for next forward train pass

    # Save the checkpoint
    model.class_to_idx = train_dataset.class_to_idx

    checkpoint = {
        'input_size': input_size,
        'output_size': 102,
        'learning_rate': in_arg.lr,
        'batch_size': 102,
        'classifier': classifier,
        'epochs': in_arg.epoch,
        'classifier_state_dict': model.classifier.state_dict(),
        'class_to_idx': model.class_to_idx
    }

    torch.save(checkpoint, 'checkpoint.pth')
Ejemplo n.º 11
0
from torchvision.models import resnet50 as resnet

# torch.backends.cudnn.benchmark = True
environ["CUDA_LAUNCH_BLOCKING"] = "1"

minibatch_size = 1
if len(argv) > 1:
    minibatch_size = int(argv[1])

# if True:
with torch.no_grad():
    x = torch.zeros(minibatch_size, 3, 224, 224)
    print("input shape:", x.shape)

    t0 = time()
    model = resnet(pretrained=False).eval()
    t1 = time()
    print("instantiation time:", (t1 - t0) * 1000, "ms")
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("parameters:", num_params)
    input()

    t0 = time()
    model = model.cuda()
    x = x.cuda()
    model(x)
    t1 = time()
    print("1st inference time:", (t1 - t0) * 1000, "ms")
    input()

    times = []
Ejemplo n.º 12
0
    def __init__(self, 
                 output,
                 resnet_depth    = 34,
                 imChannels      = 1,
                 alphabet_length = 11,
                 labels          = "_1234567890 ",
                 last_epoch      = 0
                 ):

        super(Model, self).__init__()

        self.out_path = output
        self.last_epoch = last_epoch

        # save model parameters for checkpointer
        self.modelparams = {"output"          : output,
                            "resnet_depth"    : resnet_depth,
                            "imChannels"      : imChannels,
                            "alphabet_length" : alphabet_length,
                            "labels"          : labels}

        # ----- MODEL ------

        # same as resnet input layer, but with support for grayscale input
        self.input = nn.Conv2d(imChannels, 64, kernel_size=(7, 7),
                               stride=(2, 2), padding=(3, 3), bias=False)

        self.bn1 = nn.BatchNorm2d(64)
        self.bn2 = nn.BatchNorm2d(alphabet_length)

        # leave first maxpool out to get more ouput dimensions, since
        # output string maximum length is limited by this
        self.relu = nn.ReLU(inplace=True)
        
        if resnet_depth == 18:
            from torchvision.models import resnet18 as resnet
            resnet_output_size = 256

        elif resnet_depth == 34:
            from torchvision.models import resnet34 as resnet
            resnet_output_size = 256

        elif resnet_depth == 50:
            from torchvision.models import resnet50 as resnet
            resnet_output_size = 1024

        else: 
            raise NotImplementedError("Only resnet depths of 18, 34, and 50 are supported.")

        # some layers are also removed from the end, since small images 
        # result in negative filter sizes on those layers, and training crashes
        resnet_orig = resnet()
        resnet_no_top = nn.Sequential(*list(resnet_orig.children())[4:-3])

        self.resnet = resnet_no_top

        self.dropout = nn.Dropout2d()

        conv1 = nn.Conv2d(resnet_output_size, 
                          int(resnet_output_size/2), 2)
        conv2 = nn.Conv2d(int(resnet_output_size/2), 
                          int(resnet_output_size/4), 2)
        conv3 = nn.Conv2d(int(resnet_output_size/4), 
                          alphabet_length, 1)        

        # svhn and coco-text datasets are supported, you might have to change 
        # these layers if using other datasests
        if alphabet_length == 11: #svhn parameters
            self.conv_out = nn.Sequential(conv1, self.relu, self.dropout,
                                          conv2, self.relu, self.dropout, 
                                          conv3, self.bn2, self.relu)
        else: # coco-text parameters
            conv3 = nn.Conv2d(int(resnet_output_size), alphabet_length, 1) 
            self.conv_out = nn.Sequential(self.dropout, conv3, self.bn2, self.relu) 
    
        n_param = self.count_parameters()
        print("\nModel initialized, {} trainable parameters".format(n_param))

        # ------ UTILITIES ------

        self.labels = labels
        self.decoder = GreedyDecoder(self.labels)

        self.loss_fcn = CTCLoss()

        # ------ INIT DEVICES -------

        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        if self.device == "cpu":
            print("GPU not found, using CPU ...")
        else:
            if torch.cuda.device_count() > 1:
                print("Using {} cuda devices ...".format(torch.cuda.device_count()))

                self.input     = nn.DataParallel(self.conv_out)
                self.resnet    = nn.DataParallel(self.resnet)    
                self.relu      = nn.DataParallel(self.relu)
                self.dropout   = nn.DataParallel(self.dropout)
                self.conv_out  = nn.DataParallel(self.conv_out)

            else:
                print("using {} ...".format(self.device))
if not nsml.IS_ON_NSML:
    DATASET_PATH = os.path.join('/home/kwpark_mk2/airush2_temp')
    DATASET_NAME = 'airush2_temp'
    print('use local gpu...!')

    use_nsml = False
else:
    DATASET_PATH = os.path.join(nsml.DATASET_PATH)
    print('start using nsml...!')
    print('DATASET_PATH: ', DATASET_PATH)

    use_nsml = True

from torch.autograd import Variable

resnet50 = models.resnet(pretrained=True)
modules = list(resnet50.children())[:-1]
resnet50 = nn.Sequential(*modules)
for p in resnet50.parameters():
    p.requires_grad = False

import glob


def main(args):
    model = resnet50

    model = model.cuda()

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
Ejemplo n.º 14
0
 def __init__(self, cuda):
     super(res_net, self).__init__()
     self.cuda = cuda
     self.net = resnet(pretrained=True)
     """Delete the avg pooling and fc layer"""
     self.net = nn.Sequential(*list(self.net.children())[:-2])
     # self.net.requires_grad = False
     print(self.net)
     """Decoder"""
     """Block 5"""
     self.decoder_block0 = nn.Sequential(*[
         nn.ConvTranspose2d(512, 64, kernel_size=1, stride=1, padding=0),
         nn.BatchNorm2d(64),
         nn.ConvTranspose2d(
             64, 64, kernel_size=5, stride=2, padding=2,
             output_padding=1),  # upsampling
         nn.BatchNorm2d(64),
         nn.ConvTranspose2d(64, 512, kernel_size=1, stride=1, padding=0),
         nn.BatchNorm2d(512)
     ])
     """Block 6"""
     self.decoder_block1 = nn.Sequential(*[
         nn.ConvTranspose2d(512, 64, kernel_size=1, stride=1, padding=0),
         nn.BatchNorm2d(64),
         nn.ConvTranspose2d(
             64, 64, kernel_size=5, stride=2, padding=2,
             output_padding=1),  # upsampling
         nn.BatchNorm2d(64),
         nn.ConvTranspose2d(64, 256, kernel_size=1, stride=1, padding=0),
         nn.BatchNorm2d(256),
     ])
     """Block 7"""
     self.decoder_block2 = nn.Sequential(*[
         nn.ConvTranspose2d(256, 64, kernel_size=1, stride=1, padding=0),
         nn.BatchNorm2d(64),
         nn.ConvTranspose2d(
             64, 64, kernel_size=5, stride=2, padding=2,
             output_padding=1),  # upsampling
         nn.BatchNorm2d(64),
         nn.ConvTranspose2d(64, 256, kernel_size=1, stride=1, padding=0),
         nn.BatchNorm2d(256),
     ])
     """Block 8"""
     self.decoder_block3 = nn.Sequential(*[
         nn.ConvTranspose2d(256, 64, kernel_size=1, stride=1, padding=0),
         nn.BatchNorm2d(64),
         nn.ConvTranspose2d(
             64, 64, kernel_size=5, stride=2, padding=2,
             output_padding=1),  # upsampling
         nn.BatchNorm2d(64),
         nn.ConvTranspose2d(64, 128, kernel_size=1, stride=1, padding=0),
         nn.BatchNorm2d(128),
     ])
     """Block 9"""
     self.decoder_block4 = nn.Sequential(*[
         nn.ConvTranspose2d(
             128, 64, kernel_size=5, stride=2, padding=2,
             output_padding=1),  # upsampling
         nn.BatchNorm2d(64),
         nn.ConvTranspose2d(64, 1, kernel_size=1, stride=1),
     ])