def load_checkpoint(filepath):
    checkpoint = torch.load(filepath)
    model = fc_model.Network(checkpoint['input_size'],
                             checkpoint['output_size'],
                             checkpoint['hidden_layers'])
    model.load_state_dict(checkpoint['state_dict'])

    return model
import torch
from torch import  nn, optim
import torch.nn.functional as F
from torchvision import datasets, transforms

import fc_model

transform = transforms.Compose([transforms.ToTensor()])

train_set = datasets.FashionMNIST('FashionMNIST_data/', download=False, train=True, transform=transform)
test_set = datasets.FashionMNIST('FashionMNIST_data/', download=False, train=False, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=True)

image, label = next(iter(train_loader))

model = fc_model.Network(784, 10, [512, 256, 128])
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# fc_model.train(model, train_loader, test_loader, criterion, optimizer, epochs=2)
print("Our model: \n\n", model, '\n')
print("The state dict keys: \n\n", model.state_dict().keys())
Beispiel #3
0
image_path = results.image_path
assert os.path.exists(image_path), "image_path must be exist"

checkpoint_path = results.checkpoint_path
assert os.path.exists(checkpoint_path), "checkpoint_path must be exist"

# Load cat_to_name
with open(results.category_names, 'r') as f:
    cat_to_name = json.load(f)

# --------------------------------

# --- Create model from the checkpoint data
data_checkpoint = torch.load(checkpoint_path)

network = fc_model.Network(data_checkpoint['arch'],
                           cat_to_name,
                           hidden_units=data_checkpoint['hidden_units'],
                           gpu=results.gpu)

loaded_model = network.model
loaded_model.load_state_dict(data_checkpoint['state_dict'])

# Show prediction info
utils.view_classify(image_path,
                    loaded_model,
                    cat_to_name,
                    data_checkpoint['class_to_idx'],
                    topk=results.top_k,
                    show_plot=results.show_plot)
# %% [markdown]
# Here we can see one of the images.

# %%
image, label = next(iter(trainloader))
helper.imshow(image[0, :])

# %% [markdown]
# # Train a network
#
# To make things more concise here, I moved the model architecture and training code from the last part to a file called `fc_model`. Importing this, we can easily create a fully-connected network with `fc_model.Network`, and train the network using `fc_model.train`. I'll use this model (once it's trained) to demonstrate how we can save and load models.

# %%
# Create the network, define the criterion and optimizer

model = fc_model.Network(784, 10, [512, 256, 128])
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# %%
fc_model.train(model, trainloader, testloader, criterion, optimizer, epochs=2)

# %% [markdown]
# ## Saving and loading networks
#
# As you can imagine, it's impractical to train a network every time you need to use it. Instead, we can save trained networks then load them later to train more or use them for predictions.
#
# The parameters for PyTorch networks are stored in a model's `state_dict`. We can see the state dict contains the weight and bias matrices for each of our layers.

# %%
print("Our model: \n\n", model, '\n')
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        metavar='LR',
                        help='learning rate (default: 0.001)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.5,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--checkpoint',
                        type=str,
                        default="./model_checkpoint/checkpoint.pth",
                        help='Path to save Check point')
    parser.add_argument('--verify-model',
                        action="store_true",
                        default=False,
                        help='use for to verify model file')
    parser.add_argument('--debug',
                        action="store_true",
                        default=False,
                        help='use for to print debug logs')

    args = parser.parse_args()

    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")
    devname = "cuda" if use_cuda else "cpu"
    print("\n\n----------------\nDevice Used to process:", devname)

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        './data', train=True, download=True, transform=transform),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)

    test_loader = torch.utils.data.DataLoader(datasets.MNIST(
        './data', train=False, download=True, transform=transform),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    cwd = os.getcwd()
    cwd = cwd + "/" + args.checkpoint
    if (args.debug == True):
        print("\n\nPath for checkpoint file :{}\n\n".format(cwd))

    if os.path.isfile(cwd):
        print(
            "File Check Status: File is already present \nRetry with different file name\n----------------\n"
        )
    else:
        print(
            "\n----------------\nFile Check Status: File is not present!!!\nCreating new with name:{}\n----------------\n\n"
            .format(cwd))
        model = fc_model.Network(784, 10)
        criterion = nn.NLLLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)
        #Train and validation
        fc_model.train(model, train_loader, test_loader, criterion, optimizer,
                       device, args.epochs)

        print("\n----------------\nOur model: \n\n", model,
              "\n----------------\n")
        if (args.debug == True):
            print("The state dict keys: \n\n",
                  model.state_dict().keys(), "\n----------------\n")

        checkpoint = {
            'input_size': 784,
            'output_size': 10,
            'state_dict': model.state_dict()
        }

        torch.save(checkpoint, args.checkpoint)

    if (args.verify_model == True):
        model1 = load_checkpoint(args.checkpoint)
        model1.to(device)
        print("\n\nloaded model\n\n", model1)
        # Test out your network!
        model1.eval()
        dataiter = iter(test_loader)
        images, labels = dataiter.next()
        # Convert 2D image to 1D vector
        img, labels = images.to(device), labels.to(device)
        # Calculate the class probabilities (softmax) for img
        with torch.no_grad():
            output = model1.forward(img)

        ps = torch.exp(output)
        equality = (labels.data == ps.max(1)[1])

        print(ps)
        print(equality)
### LOAD

# load the previously saved statedict with all trained paramters
state_dict = torch.load('checkpoint.pth')
print(state_dict.keys())

# Then you need as well to load the state dict in to the network itself 
model.load_state_dict(state_dict)

'''ACHTUNG:
     Loading the state dict works only if the model architecture is exactly 
     the same as the checkpoint architecture. 
     If I create a model with a different architecture, this fails. 
     This means we need to rebuild the model exactly as it was when trained. '''
# Try this
model = fc_model.Network(784, 10, [400, 200, 100])
# This will throw an error because the tensor sizes are wrong!
model.load_state_dict(state_dict)

'''Information about the model architecture needs to be saved in the checkpoint, 
along with the state dict. To do this, you build a dictionary with all the 
information you need to compeletely rebuild the model.'''

### LOAD with FUNCTION
'''The following is just a demo function. You need to build your own load
function for each of the model you want to learn. '''

def load_checkpoint(filepath):
    checkpoint = torch.load(filepath)
    model = fc_model.Network(checkpoint['input_size'],
                             checkpoint['output_size'],
results = parser.parse_args()

# --------------------------------
# Check to ensure that data_dir is present first before proceeding
data_dir = results.data_dir
assert os.path.exists(data_dir), "data_dir must be exist"

# Load cat_to_name
with open(results.category_names, 'r') as f:
    cat_to_name = json.load(f)

#---------------------------------

network = fc_model.Network(results.arch,
                           cat_to_name,
                           hidden_units=results.hidden_units,
                           gpu=results.gpu)

network.train(results.data_dir,
              learning_rate=results.learning_rate,
              epochs=results.epochs)

# Save the checkpoint
checkpoint = {
    'arch': results.arch,
    'data_dir': results.data_dir,
    'cat_to_name': cat_to_name,
    'gpu': results.gpu,
    'hidden_units': results.hidden_units,
    'epochs': results.epochs,
    'learning_rate': results.learning_rate,