Ejemplo n.º 1
0
    def __str__(self) -> str:
        s = []
        s.append("Model parameters\n")

        if self.model is None:
            # Make a model to simplify printing
            model = convnet.ConvNetLayer(input_dim=self.model_input_dim,
                                         hidden_dims=self.model_hidden_dims,
                                         num_filters=self.model_num_filters)
        else:
            model = self.model

        s.append(str(model))
        s.append("\n")
        s.append("Solver parameters\n")

        if self.solv is None:
            solv = solver.Solver(
                model,
                None,
                num_epochs=self.solver_num_epochs,
                batch_size=self.solver_batch_size,
                update_rule=self.solver_update_rule,
                #optim_config={'learning_rate': learning_rate},
                verbose=self.verbose,
                print_every=self.solver_print_every,
                checkpoint_name=self.solver_checkpoint_name,
                checkpoint_dir=self.solver_checkpoint_dir)
        else:
            solv = self.solv
        s.append(str(solv))
        s.append("\n")

        return ''.join(s)
Ejemplo n.º 2
0
    def test_gradient_check_2conv_layers(self):
        print("\n======== TestConvNet.test_gradient_check_conv:")

        num_inputs = 2
        input_dim = (3, 32, 32)
        num_classes = 10

        X = np.random.randn(num_inputs, *input_dim)
        y = np.random.randint(num_classes, size=num_inputs)

        # TODO ; Modify this to be L Layer net
        model = convnet.ConvNetLayer(reg=0.0)
        loss, grads = model.loss(X, y)
        for p in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            param_grad_num = check_gradient.eval_numerical_gradient(f, model.params[p], verbose=False, h=1e-6)
            err = error.rel_error(param_grad_num, grads[p])
            print("%s max relative error: %e" % (p, err))

        # This is in a separate pass so that we can see all errors
        # printed to console before we invoke the assertions
        for p in sorted(grads):
            f = lambda _: model.loss(X, y)[0]
            param_grad_num = check_gradient.eval_numerical_gradient(f, model.params[p], verbose=False, h=1e-6)
            err = error.rel_error(param_grad_num, grads[p])
            self.assertLessEqual(err, self.eps)

        print("======== TestConvNet.test_gradient_check_conv: <END> ")
Ejemplo n.º 3
0
 def init_model(self, weight_scale: float, reg: float) -> None:
     self.model = convnet.ConvNetLayer(
         input_dim=self.model_input_dim,
         hidden_dims=self.model_hidden_dims,
         num_filters=self.model_num_filters,
         use_batchnorm=self.model_use_batchnorm,
         reg=reg,
         weight_scale=weight_scale,
         verbose=self.verbose)
Ejemplo n.º 4
0
def train_xavier(verbose=True, draw_plots=False):

    data_dir = 'datasets/cifar-10-batches-py'
    dataset = load_data(data_dir)

    # Hyperparams
    input_dim = (3, 32, 32)
    hidden_dims = [256, 256]
    num_filters = [16, 32, 64]
    reg = 2e-2
    weight_scale = 1e-3
    learning_rate = 1e-3
    num_epochs = 600
    batch_size = 50
    update_rule = 'adam'

    weight_init = ['gauss', 'gauss_sqrt', 'xavier']
    model_dict = {}

    for w in weight_init:
        model = convnet.ConvNetLayer(input_dim=input_dim,
                                     hidden_dims=hidden_dims,
                                     num_filters=num_filters,
                                     weight_scale=weight_scale,
                                     weight_init=w,
                                     reg=reg,
                                     verbose=True)
        model_dict[w] = model

    solver_dict = {}

    for k, m in model_dict.items():
        if verbose:
            print(m)
        solv = solver.Solver(m,
                             dataset,
                             print_every=10,
                             num_epochs=num_epochs,
                             batch_size=batch_size,
                             update_rule=update_rule,
                             optim_config={'learning_rate': learning_rate})
        solv.train()
        fname = '%s-solver-%d-epochs.pkl' % (k, int(num_epochs))
        solv.save(fname)
        skey = '%s-%s' % (m.__repr__(), k)
        solver_dict[skey] = solv

    # Plot results
    if draw_plots is True:
        fig, ax = vis_solver.get_train_fig()
        vis_solver.plot_solver_compare(ax, solver_dict)
        plt.show()
Ejemplo n.º 5
0
    def test_overfit_3layer(self):
        print("\n======== TestConvNet.test_overfit_3layer:")
        dataset = load_data(self.data_dir, self.verbose)
        num_train = 500

        small_data = {
            'X_train': dataset['X_train'][:num_train],
            'y_train': dataset['y_train'][:num_train],
            'X_val':   dataset['X_val'][:num_train],
            'y_val':   dataset['y_val'][:num_train]
        }
        if self.verbose:
            print("Size of training dataset :")
            for k, v in small_data.items():
                print("%s : %s " % (k, v.shape))

        #weight_scale = 1e-2
        #learning_rate = 1e-3
        weight_scale = 0.06
        learning_rate = 0.077
        batch_size = 50
        update_rule='adam'

        # Get a model
        model = convnet.ConvNetLayer(weight_scale=weight_scale,
                                     num_filters=[32],
                                     hidden_dims=[100],
                                     use_batchnorm=True,
                                     reg=0.0)
        if self.verbose:
            print(model)
        # Get a solver
        conv_solver = solver.Solver(model,
                                    small_data,
                                    num_epochs=self.num_epochs,
                                    batch_size=batch_size,
                                    update_rule=update_rule,
                                    optim_config={'learning_rate': learning_rate},
                                    print_every=self.print_every,
                                    verbose=self.verbose)
        conv_solver.train()
        conv_dict = {"convnet": conv_solver}
        # Plot figures
        if self.draw_plots is True:
            fig, ax = get_figure_handles()
            plot_test_result(ax, conv_dict, self.num_epochs)
            fig.set_size_inches(8,8)
            fig.tight_layout()
            plt.show()

        print("======== TestConvNet.test_overfit_3layer: <END> ")
Ejemplo n.º 6
0
    def test_loss_2conv_layers(self):

        print("\n======== TestConvNet.test_loss_3layer_conv:")

        N = 10       # Because the naive implementation is VERY slow
        X = np.random.randn(N, 3, 32, 32)
        y = np.random.randint(10, size=N)
        model_3l = convnet.ConvNetLayer()
        model_3l.reg = 0.0
        loss, grads = model_3l.loss(X,y)
        print("Initial loss (no regularization) : %f" % loss)
        model_3l.reg = 0.5
        loss, grads = model_3l.loss(X, y)
        print("Initial loss (with regularization) : %f" % loss)

        print("======== TestConvNet.test_loss_3layer_conv: <END> ")
Ejemplo n.º 7
0
def train_cifar10_conv():
    data_dir = 'datasets/cifar-10-batches-py'
    data = load_data(data_dir)

    verbose = True
    # Model hyperparams
    weight_scale = 0.05
    filter_size = 3
    reg = 0.05
    input_dim = (3, 32, 32)
    num_filters = [16, 32, 64, 128]
    hidden_dims = [256, 256]
    # Solver hyperparams
    update_rule = 'sgd_momentum'
    learning_rate = 1e-3
    num_epochs = 2000

    # Get a model
    conv_model = convnet.ConvNetLayer(input_dim=input_dim,
                                      hidden_dims=hidden_dims,
                                      num_filters=num_filters,
                                      weight_scale=weight_scale,
                                      reg=reg,
                                      filter_size=filter_size,
                                      verbose=verbose)
    if verbose:
        print(conv_model)
    # Get a solver
    checkpoint_name = 'c4-16-32-64-128-f2-256-256-lr=%f-ws=%f' % (
        learning_rate, weight_scale)
    conv_solver = solver.Solver(conv_model,
                                data,
                                num_epochs=num_epochs,
                                batch_size=50,
                                update_rule=update_rule,
                                optim_config={'learning_rate': learning_rate},
                                verbose=verbose,
                                print_every=50,
                                checkpoint_name=checkpoint_name,
                                checkpoint_dir='examples')
    if verbose is True:
        print("Training %d layer net" % conv_model.num_layers)
    conv_solver.train()
Ejemplo n.º 8
0
def overfit():
    # Data
    dataset = data_utils.get_CIFAR10_data('datasets/cifar-10-batches-py')
    # Hyperparameters
    # for now we just some random params, not found by search
    reg = 1e-2
    weight_scale = 2e-3
    learning_rate = 1e-3
    # Training parameters
    num_epochs = 40
    #train_sizes = [50, 100, 150, 200]
    train_sizes = [200, 400, 800, 1000, 1500]

    solv_dict = {}
    for size in train_sizes:
        overfit_data = {
            'X_train': dataset['X_train'][:size],
            'y_train': dataset['y_train'][:size],
            'X_val':   dataset['X_val'][:size],
            'y_val':   dataset['y_val'][:size]
        }
        model = convnet.ConvNetLayer(hidden_dims=[256],
                                     num_filters=[16],
                                     filter_size=5,
                                     reg=reg,
                                     weight_scale=weight_scale)
        solv = solver.Solver(model,
                             overfit_data,
                             num_epochs=num_epochs,
                             optim_config={'learning_rate': learning_rate})
        print("Overfitting on %d examples in %d epochs using the following network" % (size, num_epochs))
        print(model)
        solv.train()
        dkey = 'size_%d' % size
        solv_dict[dkey] = solv
        # Check that we can actually overfit

    # Plot the results
    fig, ax = vis_solver.get_train_fig()
    vis_solver.plot_solver_compare(ax, solv_dict)
    plt.show()
Ejemplo n.º 9
0
def learn_random_data():
    # Some trial hyperparameters
    reg = 1e-4
    ws = 0.05
    lr = 1e-3
    num_epochs = 10

    #data = load_data('datasets/cifar-10-batches-py', verbose=True)
    #rand_data = convert_data_random(data, int(np.max(data['X_train'])))
    rand_data = gen_random_data()
    # Get model
    model = convnet.ConvNetLayer(hidden_dims=[256],
                                 reg=reg)
    # Get solver
    solv = solver.Solver(model,
                         rand_data,
                         optim_config={'learning_rate': lr},
                         num_epochs=num_epochs)
    solv.train()

    # Show some plots
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = []
    for i in range(3):
        subax = fig.add_subplot(3, 1, (i+1))
        ax.append(subax)

    ax[0].plot(solv.loss_history, 'o')
    ax[0].set_title("Loss")
    ax[1].plot(solv.train_acc_history)
    ax[1].set_title("Training accuracy")
    ax[2].plot(solv.val_acc_history)
    ax[2].set_title("Validation accuracy")

    for i in range(3):
        ax[i].set_xlabel("Epochs")
        ax[i].set_xticks(range(num_epochs))
Ejemplo n.º 10
0
def scale_network(draw_plots=False):

    # Some trial hyperparameters
    reg = 1e-4
    ws = 0.05
    lr = 1e-3
    fsizes = [16, 32, 64, 128]
    hdims = 256

    num_filters = []
    hidden_dims = [256]
    num_epochs = 100

    # prep data
    num_train = 5000
    dataset = load_data('datasets/cifar-10-batches-py')
    small_data = {
        'X_train': dataset['X_train'][:num_train],
        'y_train': dataset['y_train'][:num_train],
        'X_val':   dataset['X_val'][:num_train],
        'y_val':   dataset['y_val'][:num_train]
    }

    for s in fsizes:
        num_filters.append(s)
        if s == 64:
            hidden_dims.append(hdims)
        model = convnet.ConvNetLayer(hidden_dims=hidden_dims,
                                     num_filters=num_filters,
                                     reg=reg,
                                     weight_scale=ws,
                                     verbose=True)
        print(model)
        cname = model.__repr__()
        print("Saving checkpoints to examples/%s.pkl" % cname)
        solv = solver.Solver(model, small_data,
                             optim_config={'learning_rate': lr},
                             update_rule='sgd_momentum',
                             num_epochs=num_epochs,
                             checkpoint_dir='examples',
                             checkpoint_name=cname,
                             batch_size=50,
                             loss_window_len=400,
                             loss_window_eps=1e-5)
        solv.train()

        # Show results
        if draw_plots is True:
            import matplotlib.pyplot as plt
            fig = plt.figure()
            ax = []
            for i in range(3):
                subax = fig.add_subplot(3, 1, (i+1))
                ax.append(subax)

            ax[0].plot(solv.loss_history, 'o')
            ax[0].set_title("Loss")
            ax[1].plot(solv.train_acc_history)
            ax[1].set_title("Training accuracy")
            ax[2].plot(solv.val_acc_history)
            ax[2].set_title("Validation accuracy")

            for i in range(3):
                ax[i].set_xlabel("Epochs")
                #ax[i].set_xticks(range(num_epochs))
            plt.show()
Ejemplo n.º 11
0
    def test_3layer_nets(self):
        print("\n======== TestSolverCompare.test_3layer_nets:")
        dataset = load_data(self.data_dir, self.verbose)
        num_train = 50
        small_data = {
            'X_train': dataset['X_train'][:num_train],
            'y_train': dataset['y_train'][:num_train],
            'X_val': dataset['X_val'][:num_train],
            'y_val': dataset['y_val'][:num_train]
        }

        filter_size = 7
        num_filters = 32
        hidden_dims = 100
        weight_scale = 1e-2
        learning_rate = 1e-3
        reg = 0.0
        batch_size = 50
        update_rule = 'adam'

        # TODO : Save this for a Xavier test
        #for i in range(2):
        #    if i == 0:
        #        use_xavier = False
        #    else:
        #        use_xavier = True

        from pymllib.classifiers import convnet

        l3_net = convnet.ThreeLayerConvNet(hidden_dim=hidden_dims,
                                           num_filters=num_filters,
                                           filter_size=filter_size,
                                           weight_scale=weight_scale,
                                           reg=reg)
        if self.verbose:
            print("L3 net:")
            print(l3_net)
        fc_net = convnet.ConvNetLayer(hidden_dims=[hidden_dims],
                                      num_filters=[num_filters],
                                      filter_size=filter_size,
                                      weight_scale=weight_scale,
                                      reg=reg)

        model_dict = {'l3_net': l3_net, 'fc_net': fc_net}
        solver_dict = {}
        for k, m in model_dict.items():
            solv = solver.Solver(m,
                                 small_data,
                                 optim_config={'learning_rate': learning_rate},
                                 num_epochs=self.num_epochs,
                                 batch_size=batch_size,
                                 print_every=self.print_every,
                                 verbose=True)
            solv.train()
            solver_dict[k] = solv

        # Make some plots
        if self.draw_plots:
            fig, ax = vis_solver.get_train_fig()
            vis_solver.plot_solver_compare(ax, solver_dict)
            plt.show()

        print("======== TestSolverCompare.test_3layer_nets : <END> ")
Ejemplo n.º 12
0
def LLayerConv(verbose=True, show_plots=False, solver_filename=None):
    data_dir = 'datasets/cifar-10-batches-py'

    # Get data
    num_train = 1000
    dataset = load_data(data_dir, verbose)
    train_data = {
        'X_train': dataset['X_train'][:num_train],
        'y_train': dataset['y_train'][:num_train],
        'X_val': dataset['X_val'][:num_train],
        'y_val': dataset['y_val'][:num_train]
    }
    # Set params
    input_dim = (3, 32, 32)
    weight_scale = 0.06
    learning_rate = 0.07
    #reg = 1e-2
    reg = 0.05
    filter_size = 5
    num_filters = [16, 32, 64]
    hidden_dim = [256, 128]
    num_epochs = 100
    # Get a convnet
    conv_model = convnet.ConvNetLayer(input_dim=input_dim,
                                      hidden_dims=hidden_dim,
                                      num_filters=num_filters,
                                      weight_scale=weight_scale,
                                      reg=reg,
                                      filter_size=filter_size,
                                      use_batchnorm=True,
                                      verbose=verbose)
    if verbose:
        print(conv_model)
    # Get a solver
    conv_solver = solver.Solver(conv_model,
                                train_data,
                                num_epochs=num_epochs,
                                batch_size=10,
                                update_rule='adam',
                                optim_config={'learning_rate': learning_rate},
                                verbose=verbose,
                                print_every=50,
                                checkpoint_name='c2-32-32-f2-256-256',
                                checkpoint_dir='examples')
    if verbose is True:
        print("Training %d layer net" % conv_model.num_layers)
    conv_solver.train()
    conv_solver.save(solver_filename)

    if show_plots is True:
        # The training loss, accuracy, etc
        tfig, tax = get_figure_handles()
        solver_dict = {'convnet': conv_solver}
        plot_test_result(tax, solver_dict, num_epochs=num_epochs)
        plt.show()
    if verbose:
        print(conv_model)
    # Get a solver
    conv_solver = solver.Solver(conv_model,
                                train_data,
                                num_epochs=num_epochs,
                                batch_size=50,
                                update_rule='adam',
                                optim_config={'learning_rate': 1e-3},
                                verbose=verbose,
                                print_every=50)
    conv_solver.train()
    # Plot results
    #fig, ax = get_one_figure_handle()
    #grid = vis_weights.vis_grid_img(weight_dict['W1'].transpose(0, 2, 3, 1))
    #ax.imshow(grid)
    #fig.set_size_inches(5,5)
    # save the data
    solver_file = "examples/conv_solver_%d_epochs.pkl" % num_epochs
    conv_solver.save(solver_file)

    # The training loss, accuracy, etc
    tfig, tax = get_figure_handles()
    solver_dict = {'convnet': conv_solver}
    plot_test_result(tax, solver_dict, num_epochs=num_epochs)
    plt.show()

    print("done")