Ejemplo n.º 1
0
def cnn_finetune(device, dataloaders, dataset_sizes, class_names):
    """
        Load a pretrained model and reset final fully conected layer
    """
    model_ft = models.resnet18(pretrained=True)
    num_ftrs = model_ft.fc.in_features

    model_ft.fc = nn.Linear(num_ftrs, 2)

    model_ft.to(device)

    criterion = nn.CrossEntropyLoss()

    optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

    # decay lr by factor of 0.1 after each 7 epochas
    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft,
                                           step_size=7,
                                           gamma=0.1)

    model_ft = train_model(model_ft,
                           criterion,
                           optimizer_ft,
                           exp_lr_scheduler,
                           device,
                           dataloaders,
                           dataset_sizes,
                           num_epochs=25)

    visualize_model(model_ft, device, dataloaders, class_names)
Ejemplo n.º 2
0
def cnn_feature_extractor(device, dataloaders, dataset_sizes, class_names):
    """
        Freeze all nn except last layer.
        Requires requires_grade=False to freeze all the parameters so that the gradients are not computed in backward().
    """
    model_conv = models.resnet18(pretrained=True)
    # keep parameters in all layers except fully connected layer
    for param in model_conv.parameters():
        requires_grade = False

    # Parameters of newly constructed modules have requires_grad=True by default
    num_ftrs = model_conv.fc.in_features
    model_conv.fc = nn.Linear(num_ftrs, 2)

    model_conv.to(device)
    criterion = nn.CrossEntropyLoss()

    # only final layer params
    optimizer_conv = optim.SGD(model_conv.fc.parameters(),
                               lr=0.001,
                               momentum=0.9)

    exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv,
                                           step_size=7,
                                           gamma=0.1)

    model_conv = train_model(model_conv,
                             criterion,
                             optimizer_conv,
                             exp_lr_scheduler,
                             device,
                             dataloaders,
                             dataset_sizes,
                             num_epochs=25)

    visualize_model(model_conv, device, dataloaders, class_names)
Ejemplo n.º 3
0
def main():
    model_ft = models.resnet18(pretrained=True)
    num_ftrs = model_ft.fc.in_features
    # model_ft.fc = nn.Linear(num_ftrs, 2)
    model_ft.fc = nn.Linear(num_ftrs, 6)

    if use_gpu:
        model_ft = model_ft.cuda()

    criterion = nn.MSELoss()
    # criterion = nn.CrossEntropyLoss()

    # Observe that all parameters are being optimized
    optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

    model_ft = train_model(model_ft,
                           criterion,
                           optimizer_ft,
                           exp_lr_scheduler,
                           num_epochs=25)

    visualize_model(model_ft, dset_loaders)

    # Here, we need to freeze all the network except the final layer. We need
    # to set ``requires_grad == False`` to freeze the parameters so that the
    # gradients are not computed in ``backward()``.
    # You can read more about this in the documentation
    # `here <http://pytorch.org/docs/notes/autograd.html#excluding-subgraphs-from-backward>`__.

    model_conv = torchvision.models.resnet18(pretrained=True)
    for param in model_conv.parameters():
        param.requires_grad = False

    # Parameters of newly constructed modules have requires_grad=True by default
    num_ftrs = model_conv.fc.in_features
    # model_conv.fc = nn.Linear(num_ftrs, 2)
    model_conv.fc = nn.Linear(num_ftrs, 6)

    if use_gpu:
        model_conv = model_conv.cuda()

    # criterion = nn.CrossEntropyLoss()
    criterion = nn.MSELoss()

    # Observe that only parameters of final layer are being optimized as
    # opoosed to before.
    optimizer_conv = optim.SGD(model_conv.fc.parameters(),
                               lr=0.001,
                               momentum=0.9)

    ######################################################################
    # Train and evaluate
    # ^^^^^^^^^^^^^^^^^^
    #
    # On CPU this will take about half the time compared to previous scenario.
    # This is expected as gradients don't need to be computed for most of the
    # network. However, forward does need to be computed.
    #
    model_conv = train_model(model_conv,
                             criterion,
                             optimizer_conv,
                             exp_lr_scheduler,
                             num_epochs=25)
    return model_conv
Ejemplo n.º 4
0
def main(argv):
    assert version.parse(torch.__version__) >= version.parse('1.2.0')

    dataset = argv[1] if len(argv) == 2 else 'celeba'
    print('Preparing dataset and parameters for', dataset, '...')

    if dataset == 'celeba':
        image_shape = [64, 64, 3]  # The input image shape
        n_components = 300  # Number of components in the mixture model
        n_factors = 10  # Number of factors - the latent dimension (same for all components)
        batch_size = 1000  # The EM batch size
        num_iterations = 30  # Number of EM iterations (=epochs)
        feature_sampling = 0.2  # For faster responsibilities calculation, randomly sample the coordinates (or False)
        mfa_sgd_epochs = 0  # Perform additional training with diagonal (per-pixel) covariance, using SGD
        init_method = 'rnd_samples'  # Initialize each component from few random samples using PPCA
        # trans = transforms.Compose([CropTransform((25, 50, 25+128, 50+128)), transforms.Resize(image_shape[0]),
        #                             transforms.ToTensor(),  ReshapeTransform([-1])])
        # train_set = CelebA(root='./data', split='train', transform=trans, download=True)
        # test_set = CelebA(root='./data', split='test', transform=trans, download=True)
        train_set, test_set = celeba_train_val_datasets(with_mask=False)
    elif dataset == 'mnist':
        image_shape = [28, 28]  # The input image shape
        n_components = 50  # Number of components in the mixture model
        n_factors = 6  # Number of factors - the latent dimension (same for all components)
        batch_size = 1000  # The EM batch size
        num_iterations = 30  # Number of EM iterations (=epochs)
        feature_sampling = False  # For faster responsibilities calculation, randomly sample the coordinates (or False)
        mfa_sgd_epochs = 0  # Perform additional training with diagonal (per-pixel) covariance, using SGD
        init_method = 'kmeans'  # Initialize by using k-means clustering
        # trans = transforms.Compose([transforms.ToTensor(),  ReshapeTransform([-1])])
        # train_set = MNIST(root='./data', train=True, transform=trans, download=True)
        # test_set = MNIST(root='./data', train=False, transform=trans, download=True)
        train_set, test_set = mnist_train_val_datasets(with_mask=False)

    else:
        assert False, 'Unknown dataset: ' + dataset

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model_dir = './models/' + dataset
    os.makedirs(model_dir, exist_ok=True)
    figures_dir = './figures/' + dataset
    os.makedirs(figures_dir, exist_ok=True)
    model_name = 'c_{}_l_{}_init_{}'.format(n_components, n_factors,
                                            init_method)

    print('Defining the MFA model...')
    model = MFA(n_components=n_components,
                n_features=np.prod(image_shape),
                n_factors=n_factors,
                init_method=init_method).to(device=device)

    print('EM fitting: {} components / {} factors / batch size {} ...'.format(
        n_components, n_factors, batch_size))
    ll_log = model.batch_fit(train_set,
                             test_set,
                             batch_size=batch_size,
                             max_iterations=num_iterations,
                             feature_sampling=feature_sampling)

    if mfa_sgd_epochs > 0:
        print(
            'Continuing training using SGD with diagonal (instead of isotropic) noise covariance...'
        )
        model.isotropic_noise = False
        ll_log_sgd = model.sgd_mfa_train(train_set,
                                         test_size=256,
                                         max_epochs=mfa_sgd_epochs,
                                         feature_sampling=feature_sampling)
        ll_log += ll_log_sgd

    print('Saving the model...')
    torch.save(model.state_dict(),
               os.path.join(model_dir, 'model_' + model_name + '.pth'))

    print('Visualizing the trained model...')
    model_image = visualize_model(model,
                                  image_shape=image_shape,
                                  end_component=10)
    imwrite(os.path.join(figures_dir, 'model_' + model_name + '.jpg'),
            model_image)

    print('Generating random samples...')
    rnd_samples, _ = model.sample(100, with_noise=False)
    mosaic = samples_to_mosaic(rnd_samples, image_shape=image_shape)
    imwrite(os.path.join(figures_dir, 'samples_' + model_name + '.jpg'),
            mosaic)

    print('Plotting test log-likelihood graph...')
    plt.plot(ll_log,
             label='c{}_l{}_b{}'.format(n_components, n_factors, batch_size))
    plt.grid(True)
    plt.savefig(
        os.path.join(figures_dir, 'training_graph_' + model_name + '.jpg'))
    print('Done')
Ejemplo n.º 5
0
                                       'step%d.read_rnn' % (step + 1))
        write_rnn = utils.layer_by_name(canvas_next,
                                        'step%d.write_rnn' % (step + 1))
        sample = utils.layer_by_name(canvas_next, 'step%d.sample' % (step + 1))

    output = ll.NonlinearityLayer(canvas_next, ln.sigmoid, name='output')

    return output


if __name__ == '__main__':
    mnist = utils.load_mnist(process=lambda x: (x > 0.8).astype('float32'))
    model = make_model()

    logger.info('visualize model to model.svg')
    utils.visualize_model(model, 'model.svg')

    image = utils.layer_by_name(model, 'step1.image').input_var

    output_layers = [
        utils.layer_by_name(model, name) for name in (
            ['output'] +
            ['step%d.sample_mean' % j for j in range(1, TIME_ROUNDS + 1)] +
            ['step%d.sample_logvar2' % j for j in range(1, TIME_ROUNDS + 1)])
    ]

    output_tensors = ll.get_output(output_layers)
    output = output_tensors[0]
    mean = output_tensors[1:1 + TIME_ROUNDS]
    logvar2 = output_tensors[1 + TIME_ROUNDS:1 + 2 * TIME_ROUNDS]