Ejemplo n.º 1
0
    def __str__(self) -> str:
        s = []
        s.append("Model parameters\n")

        if self.model is None:
            # Make a model to simplify printing
            model = convnet.ConvNetLayer(input_dim=self.model_input_dim,
                                         hidden_dims=self.model_hidden_dims,
                                         num_filters=self.model_num_filters)
        else:
            model = self.model

        s.append(str(model))
        s.append("\n")
        s.append("Solver parameters\n")

        if self.solv is None:
            solv = solver.Solver(
                model,
                None,
                num_epochs=self.solver_num_epochs,
                batch_size=self.solver_batch_size,
                update_rule=self.solver_update_rule,
                #optim_config={'learning_rate': learning_rate},
                verbose=self.verbose,
                print_every=self.solver_print_every,
                checkpoint_name=self.solver_checkpoint_name,
                checkpoint_dir=self.solver_checkpoint_dir)
        else:
            solv = self.solv
        s.append(str(solv))
        s.append("\n")

        return ''.join(s)
Ejemplo n.º 2
0
    def test_weight_init(self):
        print("======== TestFCNet.test_weight_init:")

        dataset = load_data(self.data_dir, self.verbose)
        num_train = 1500
        small_data = {
            'X_train': dataset['X_train'][:num_train],
            'y_train': dataset['y_train'][:num_train],
            'X_val': dataset['X_val'][:num_train],
            'y_val': dataset['y_val'][:num_train]
        }
        input_dim = 32 * 32 * 3
        hidden_dims = [100, 100, 100, 100, 100, 100, 100, 100, 100]
        weight_scale = 2e-2
        reg = 2e-2
        learning_rate = 1e-3
        batch_size = 50
        update_rule = 'adam'
        weight_init = ['gauss', 'gauss_sqrt2', 'xavier']

        model_dict = {}
        for w in weight_init:
            model = fcnet.FCNet(input_dim=input_dim,
                                hidden_dims=hidden_dims,
                                weight_scale=weight_scale,
                                reg=reg,
                                weight_init=w)
            model_dict[w] = model
        solver_dict = {}

        for k, m in model_dict.items():
            if self.verbose:
                print(m)

            solv = solver.Solver(
                m,
                small_data,
                print_every=self.print_every,
                num_epochs=self.num_epochs,
                batch_size=batch_size,  # previously 25
                update_rule=update_rule,
                optim_config={'learning_rate': learning_rate})
            solv.train()
            #skey = '%s-%s' % (m.__repr__(), k)
            skey = '%s' % k
            solver_dict[skey] = solv

        if self.draw_plots:
            fig, ax = vis_solver.get_train_fig()
            vis_solver.plot_solver_compare(ax, solver_dict)
            #vis_solver.plot_solver(ax, solv)
            plt.show()

        print("======== TestFCNet.test_weight_init: <END> ")
Ejemplo n.º 3
0
    def test_all_optim_fcnet_5layer(self):
        print("\n======== TestSolverFCNet.test_all_optim_fcnet_5layer:")

        dataset = load_data(self.data_dir, self.verbose)
        num_train = 50
        small_data = {
            'X_train': dataset['X_train'][:num_train],
            'y_train': dataset['y_train'][:num_train],
            'X_val': dataset['X_val'][:num_train],
            'y_val': dataset['y_val'][:num_train]
        }
        #input_dim = small_data['X_train'].shape[0]
        input_dim = 3 * 32 * 32
        hidden_dims = [100, 100, 100, 100, 100]
        #hidden_dims = [100, 50, 10]     # just some random dims
        weight_scale = 5e-2
        reg = 1e-1
        batch_size = 50
        solvers = {}

        # Solver params
        optim_list = ['rmsprop', 'sgd_momentum', 'adam', 'sgd']
        lr = {'rmsprop': 1e-4, 'adam': 1e-3, 'sgd': 1e-3, 'sgd_momentum': 1e-3}

        for update_rule in optim_list:
            print("Using update rule %s" % update_rule)
            model = fcnet.FCNet(input_dim=input_dim,
                                hidden_dims=hidden_dims,
                                weight_scale=weight_scale,
                                reg=reg,
                                dtype=np.float64)
            if self.verbose:
                print(model)
            model_solver = solver.Solver(
                model,
                small_data,
                print_every=self.print_every,
                num_epochs=self.num_epochs,
                batch_size=batch_size,  # previously 25
                update_rule=update_rule,
                optim_config={'learning_rate': lr[update_rule]})
            solvers[update_rule] = model_solver
            model_solver.train()

        # get some figure handles and plot the data
        if self.draw_plots:
            fig, ax = get_figure_handles()
            plot_test_result(ax, solvers, self.num_epochs)
            fig.set_size_inches(8, 8)
            fig.tight_layout()
            plt.show()

        print("======== TestSolverFCNet.test_all_optim_fcnet_5layer: <END> ")
Ejemplo n.º 4
0
def ex_vis_solver_compare(ax, path, fname, epoch_num, prefix=None):
    """
    EX_VIS_SOLVER_COMPARE
    Visualize a series of solutions superimposed on a single plot.
    Each solution checpoint is read in turn an plotted on a single
    graph. The legend is created using the __repr__() result for each
    solver object.

    Inputs
        ax:
            A matplotlib axes onto which to draw the visualization
        path:
            Directory containing solver files. This may be a list of multiple
            directories, in which case the method iterates over each of them in turn.
        fname:
            The name of a given solver file, without the '_epoch_%d.pkl' suffix
        epoch_num:
            Which epoch to load.
        prefix:
            A prefix that is prepended to the filename. This allows, for example, a
            group of subfolders to be traversed that all have the same root.
            Default = None
    """

    # Helper function for loading solver objects
    def load_solver(fname):
        solv = solver.Solver(None, None)
        solv.load_checkpoint(fname)
        return solv

    # Check input arguments
    if type(path) is not list:
        path = [path]

    if type(fname) is not list:
        fname = [fname]

    # Iterate over all files and generate animations
    solver_dict = {}
    for p in path:
        for f in fname:
            epoch_str = '_epoch_%d.pkl' % epoch_num
            if prefix is not None:
                cname = str(prefix) + '/' + str(p) + '/' + str(f) + str(
                    epoch_str)
            else:
                cname = str(p) + '/' + str(f) + str(epoch_str)
            solv = solver.Solver(None, None)
            solv.load_checkpoint(cname)
            solver_dict[f] = solv
            #vis_solver.plot_model_first_layer(ax, solv.model, cname)
    vis_solver.plot_solver_compare(ax, solver_dict)
Ejemplo n.º 5
0
def train_xavier(verbose=True, draw_plots=False):

    data_dir = 'datasets/cifar-10-batches-py'
    dataset = load_data(data_dir)

    # Hyperparams
    input_dim = (3, 32, 32)
    hidden_dims = [256, 256]
    num_filters = [16, 32, 64]
    reg = 2e-2
    weight_scale = 1e-3
    learning_rate = 1e-3
    num_epochs = 600
    batch_size = 50
    update_rule = 'adam'

    weight_init = ['gauss', 'gauss_sqrt', 'xavier']
    model_dict = {}

    for w in weight_init:
        model = convnet.ConvNetLayer(input_dim=input_dim,
                                     hidden_dims=hidden_dims,
                                     num_filters=num_filters,
                                     weight_scale=weight_scale,
                                     weight_init=w,
                                     reg=reg,
                                     verbose=True)
        model_dict[w] = model

    solver_dict = {}

    for k, m in model_dict.items():
        if verbose:
            print(m)
        solv = solver.Solver(m,
                             dataset,
                             print_every=10,
                             num_epochs=num_epochs,
                             batch_size=batch_size,
                             update_rule=update_rule,
                             optim_config={'learning_rate': learning_rate})
        solv.train()
        fname = '%s-solver-%d-epochs.pkl' % (k, int(num_epochs))
        solv.save(fname)
        skey = '%s-%s' % (m.__repr__(), k)
        solver_dict[skey] = solv

    # Plot results
    if draw_plots is True:
        fig, ax = vis_solver.get_train_fig()
        vis_solver.plot_solver_compare(ax, solver_dict)
        plt.show()
Ejemplo n.º 6
0
def ex_plot_solver_weights(ax, fname, title=None):
    """
    EX_PLOT_SOLVER_WEIGHTS
    Example showing how to plot the first layer weights
    in a solver object
    """

    if title is None:
        title = "Layer 1 weights"
    solv = solver.Solver(None, None)
    solv.load_checkpoint(fname)
    vis_solver.plot_model_first_layer(ax, solv.model, cname)
    ax.set_title(title)
Ejemplo n.º 7
0
    def test_overfit_3layer(self):
        print("\n======== TestConvNet.test_overfit_3layer:")
        dataset = load_data(self.data_dir, self.verbose)
        num_train = 500

        small_data = {
            'X_train': dataset['X_train'][:num_train],
            'y_train': dataset['y_train'][:num_train],
            'X_val':   dataset['X_val'][:num_train],
            'y_val':   dataset['y_val'][:num_train]
        }
        if self.verbose:
            print("Size of training dataset :")
            for k, v in small_data.items():
                print("%s : %s " % (k, v.shape))

        #weight_scale = 1e-2
        #learning_rate = 1e-3
        weight_scale = 0.06
        learning_rate = 0.077
        batch_size = 50
        update_rule='adam'

        # Get a model
        model = convnet.ConvNetLayer(weight_scale=weight_scale,
                                     num_filters=[32],
                                     hidden_dims=[100],
                                     use_batchnorm=True,
                                     reg=0.0)
        if self.verbose:
            print(model)
        # Get a solver
        conv_solver = solver.Solver(model,
                                    small_data,
                                    num_epochs=self.num_epochs,
                                    batch_size=batch_size,
                                    update_rule=update_rule,
                                    optim_config={'learning_rate': learning_rate},
                                    print_every=self.print_every,
                                    verbose=self.verbose)
        conv_solver.train()
        conv_dict = {"convnet": conv_solver}
        # Plot figures
        if self.draw_plots is True:
            fig, ax = get_figure_handles()
            plot_test_result(ax, conv_dict, self.num_epochs)
            fig.set_size_inches(8,8)
            fig.tight_layout()
            plt.show()

        print("======== TestConvNet.test_overfit_3layer: <END> ")
Ejemplo n.º 8
0
    def test_adam_vs_rmsprop_fcnet(self):
        print("\n======== TestSolverFCNet.test_adam_vs_rmsprop:")
        dataset = load_data(self.data_dir, self.verbose)
        num_train = 50
        small_data = {
            'X_train': dataset['X_train'][:num_train],
            'y_train': dataset['y_train'][:num_train],
            'X_val': dataset['X_val'][:num_train],
            'y_val': dataset['y_val'][:num_train]
        }
        #input_dim = small_data['X_train'].shape[0]
        input_dim = 3 * 32 * 32
        #hidden_dims = [100, 100, 100, 100, 100]
        hidden_dims = [100, 100, 100, 100, 100]
        weight_scale = 5e-2
        batch_size = 50
        reg = 1e-1
        lr = {'rmsprop': 1e-4, 'adam': 1e-3}
        update_rule = ['rmsprop', 'adam']

        solvers = {}
        for u in update_rule:
            model = fcnet.FCNet(input_dim=input_dim,
                                hidden_dims=hidden_dims,
                                weight_scale=weight_scale,
                                reg=reg,
                                dtype=np.float64)
            if self.verbose:
                print(model)
            model_solver = solver.Solver(
                model,
                small_data,
                print_every=self.print_every,
                num_epochs=self.num_epochs,
                batch_size=batch_size,  # previously 25
                update_rule=u,
                optim_config={'learning_rate': lr[u]})
            solvers[u] = model_solver
            model_solver.train()

        if self.draw_plots is True:
            fig, ax = get_figure_handles()
            plot_test_result(ax, solvers, self.num_epochs)
            fig.set_size_inches(8, 8)
            fig.tight_layout()
            plt.show()

        print("======== TestSolverFCNet.test_adam_vs_rmsprop: <END> ")
Ejemplo n.º 9
0
def ex_plot_sequence(ax,
                     path,
                     fname,
                     num_checkpoints,
                     prefix=None,
                     step=1,
                     pause_time=0.01):
    """
    EX_PLOT_SEQUENCE
    Example wrapper for vis_solver.plot_model_first_layer showing a possible
    inner loop for a weight visualization animation
    """

    if type(num_checkpoints) is tuple:
        if len(num_checkpoints) > 2:
            raise ValueError(
                "Cannot accept more than 2 limits for num_checkpoints")
        if num_checkpoints[0] == 0:
            n_min = 1
        else:
            n_min = int(num_checkpoints[0])
        n_max = int(num_checkpoints[1])
    else:
        n_min = 1
        n_max = int(num_checkpoints)

    # Check input arguments
    if type(path) is not list:
        path = [path]

    if type(fname) is not list:
        fname = [fname]

    # Iterate over all files and generate animations
    for p in path:
        for f in fname:
            for n in range(n_min, n_max, step):
                if prefix is not None:
                    cname = '%s/%s/%s_epoch_%d.pkl' % (prefix, p, f, int(n))
                else:
                    cname = '%s/%s_epoch_%d.pkl' % (p, f, int(n))
                solv = solver.Solver(None, None)
                solv.load_checkpoint(cname)
                title = '%s (epoch %d)' % (f, n)
                vis_solver.plot_model_first_layer(ax, solv.model, title=title)
                plt.pause(pause_time)
                plt.draw()
Ejemplo n.º 10
0
 def init_solver(self,
                 data: Dict[str, Any],
                 learning_rate: float = 1e-3,
                 num_epochs: Union[None, int] = None) -> None:
     if num_epochs is None:
         num_epochs = self.solver_num_epochs
     self.solv = solver.Solver(
         self.model,
         data,
         num_epochs=num_epochs,
         batch_size=self.solver_batch_size,
         update_rule=self.solver_update_rule,
         optim_config={'learning_rate': learning_rate},
         verbose=self.verbose,
         print_every=self.solver_print_every,
         checkpoint_name=self.solver_checkpoint_name,
         checkpoint_dir=self.solver_checkpoint_dir)
Ejemplo n.º 11
0
    def test_rmsprop_fcnet(self):
        print("\n======== TestSolverFCNet.test_rmsprop_fcnet:")
        dataset = load_data(self.data_dir, self.verbose)
        num_train = 50
        small_data = {
            'X_train': dataset['X_train'][:num_train],
            'y_train': dataset['y_train'][:num_train],
            'X_val': dataset['X_val'][:num_train],
            'y_val': dataset['y_val'][:num_train]
        }
        #input_dim = small_data['X_train'].shape[0]
        input_dim = 3 * 32 * 32
        #hidden_dims = [100, 100, 100, 100, 100]
        hidden_dims = [100, 50, 10]  # just some random dims
        weight_scale = 5e-2
        learning_rate = 1e-2
        batch_size = 50
        update_rule = 'rmsprop'

        model = fcnet.FCNet(input_dim=input_dim,
                            hidden_dims=hidden_dims,
                            weight_scale=weight_scale,
                            dtype=np.float64)
        if self.verbose:
            print(model)
        model_solver = solver.Solver(
            model,
            small_data,
            print_every=self.print_every,
            num_epochs=self.num_epochs,
            batch_size=batch_size,  # previously 25
            update_rule=update_rule,
            optim_config={'learning_rate': learning_rate})
        model_solver.train()

        if self.draw_plots is True:
            solvers = {'rmsprop': model_solver}
            fig, ax = get_figure_handles()
            plot_test_result(ax, solvers, self.num_epochs)
            fig.set_size_inches(8, 8)
            fig.tight_layout()
            plt.show()

        print("======== TestSolverFCNet.test_rmsprop_fcnet: <END> ")
Ejemplo n.º 12
0
    def test_fcnet_3layer_overfit(self):
        print("\n======== TestFCNet.test_fcnet_3layer_overfit:")

        dataset = load_data(self.data_dir, self.verbose)
        num_train = 50

        small_data = {
            'X_train': dataset['X_train'][:num_train],
            'y_train': dataset['y_train'][:num_train],
            'X_val': dataset['X_val'][:num_train],
            'y_val': dataset['y_val'][:num_train]
        }
        #input_dim = small_data['X_train'].shape[0]
        input_dim = 3 * 32 * 32
        hidden_dims = [100, 100]
        weight_scale = 0.079564
        learning_rate = 0.003775

        # Get model and solver
        model = fcnet.FCNet(input_dim=input_dim,
                            hidden_dims=hidden_dims,
                            weight_scale=weight_scale,
                            dtype=np.float64,
                            verbose=True)
        print(model)
        model_solver = solver.Solver(
            model,
            small_data,
            print_every=self.print_every,
            num_epochs=self.num_epochs,
            batch_size=50,  # previously 25
            update_rule='sgd',
            optim_config={'learning_rate': learning_rate})
        model_solver.train()

        # Plot results
        if self.draw_plots is True:
            plt.plot(model_solver.loss_history, 'o')
            plt.title('Training loss history (3 layers)')
            plt.xlabel('Iteration')
            plt.ylabel('Training loss')
            plt.show()

        print("======== TestFCNet.test_fcnet_3layer_overfit: <END> ")
Ejemplo n.º 13
0
    def test_fcnet_2layer_dropout(self):
        print("\n======== TestFCNetDropout.test_fcnet_2layer_dropout :")
        dataset = load_data(self.data_dir, self.verbose)
        num_train = 10

        small_data = {
            'X_train': dataset['X_train'][:num_train],
            'y_train': dataset['y_train'][:num_train],
            'X_val': dataset['X_val'][:num_train],
            'y_val': dataset['y_val'][:num_train]
        }
        #input_dim = small_data['X_train'].shape[0]
        input_dim = 3 * 32 * 32
        #hidden_dims = [100, 100, 100, 100]
        batch_size = 100
        solvers = {}
        dropout_probs = [0.0, 0.3, 0.5, 0.7]

        for d in dropout_probs:
            model = fcnet.FCNet(hidden_dims=[500],
                                input_dim=input_dim,
                                num_classes=10,
                                dropout=d,
                                weight_scale=2e-2)
            s = solver.Solver(model,
                              small_data,
                              num_epochs=self.num_epochs,
                              batch_size=batch_size,
                              update_rule='adam',
                              optim_config={'learning_rate': 5e-4},
                              verbose=True,
                              print_every=self.print_every)
            print("Training with dropout %f" % d)
            s.train()
            solvers['p=' + str(d)] = s

        if self.draw_plots:
            fig, ax = get_figure_handles()
            plot_test_result(ax, solvers, self.num_epochs)
            fig.set_size_inches(8, 8)
            fig.tight_layout()
            plt.show()

        print("======== TestFCNetDropout.test_fcnet_2layer_dropout: <END> ")
Ejemplo n.º 14
0
def train_cifar10_conv():
    data_dir = 'datasets/cifar-10-batches-py'
    data = load_data(data_dir)

    verbose = True
    # Model hyperparams
    weight_scale = 0.05
    filter_size = 3
    reg = 0.05
    input_dim = (3, 32, 32)
    num_filters = [16, 32, 64, 128]
    hidden_dims = [256, 256]
    # Solver hyperparams
    update_rule = 'sgd_momentum'
    learning_rate = 1e-3
    num_epochs = 2000

    # Get a model
    conv_model = convnet.ConvNetLayer(input_dim=input_dim,
                                      hidden_dims=hidden_dims,
                                      num_filters=num_filters,
                                      weight_scale=weight_scale,
                                      reg=reg,
                                      filter_size=filter_size,
                                      verbose=verbose)
    if verbose:
        print(conv_model)
    # Get a solver
    checkpoint_name = 'c4-16-32-64-128-f2-256-256-lr=%f-ws=%f' % (
        learning_rate, weight_scale)
    conv_solver = solver.Solver(conv_model,
                                data,
                                num_epochs=num_epochs,
                                batch_size=50,
                                update_rule=update_rule,
                                optim_config={'learning_rate': learning_rate},
                                verbose=verbose,
                                print_every=50,
                                checkpoint_name=checkpoint_name,
                                checkpoint_dir='examples')
    if verbose is True:
        print("Training %d layer net" % conv_model.num_layers)
    conv_solver.train()
Ejemplo n.º 15
0
def inspect_checkpoint(checkpoint_fname, verbose=False):

    csolver = solver.Solver(None, None)
    # TODO : Check file exists
    csolver.load(checkpoint_fname)
    weight_dict = cutils.get_conv_layer_dict(csolver.model)

    # For now, get the first layer weights out and show those
    grid = vis_grid_img(weight_dict['W1'].transpose(0, 2, 3, 1))

    def get_fig_handle():
        import matplotlib.pyplot as plt
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)

        return fig, ax

    fig, ax = get_fig_handle()
    ax.imshow(grid)
Ejemplo n.º 16
0
def convert_checkpoint(fname, verbose=False):
    """
    Convert a checkpoint to the newest version.
    This method is designed to convert old checkpoints to the
    current format, which unifies the save and load methods to that
    there is no difference between saving a solver and saving a
    checkpoint. Because there may be checkpoints from previous
    versions lying around that may break when loaded, this tool will
    convert old checkpoints so that this doesnt occur
    """
    solv = solver.Solver(None, None)
    solv.verbose = verbose
    #solv.load(fname)

    with open(fname, 'rb') as fp:
        cpoint = pickle.load(fp)

        # Model data
        solv.model = cpoint.get('model', None)
        # Solver params
        solv.update_rule = cpoint.get('update_rule', 'sgd')
        solv.lr_decay = cpoint.get('lr_decay', 0.95)
        solv.optim_config = cpoint.get('optim_config', {'learning_rate': 1e-3})
        solv.batch_size = cpoint.get('batch_size', 100)
        solv.epoch = cpoint.get('epoch', 0)
        solv.num_epochs = cpoint.get('num_epochs', 0)
        # Solution data
        solv.loss_history = cpoint.get('loss_history', None)
        solv.train_acc_history = cpoint.get('train_acc_history', None)
        solv.val_acc_history = cpoint.get('val_acc_history', None)
        # Loss window
        solv.enable_loss_window = cpoint.get('enable_loss_window', False)
        solv.loss_window_len = cpoint.get('loss_window_len', 500)
        solv.loss_window_eps = cpoint.get('loss_window_eps', 1e-3)
        solv.loss_converge_window = cpoint.get('loss_converge_window', 1e4)
        # Checkpoint info
        solv.checkpoint_name = cpoint.get('checkpoint_name', None)
        solv.checkpoint_dir = cpoint.get('checkpoint_dir', None)

    # This solver has now been 'converted'
    return solv
Ejemplo n.º 17
0
def overfit():
    # Data
    dataset = data_utils.get_CIFAR10_data('datasets/cifar-10-batches-py')
    # Hyperparameters
    # for now we just some random params, not found by search
    reg = 1e-2
    weight_scale = 2e-3
    learning_rate = 1e-3
    # Training parameters
    num_epochs = 40
    #train_sizes = [50, 100, 150, 200]
    train_sizes = [200, 400, 800, 1000, 1500]

    solv_dict = {}
    for size in train_sizes:
        overfit_data = {
            'X_train': dataset['X_train'][:size],
            'y_train': dataset['y_train'][:size],
            'X_val':   dataset['X_val'][:size],
            'y_val':   dataset['y_val'][:size]
        }
        model = convnet.ConvNetLayer(hidden_dims=[256],
                                     num_filters=[16],
                                     filter_size=5,
                                     reg=reg,
                                     weight_scale=weight_scale)
        solv = solver.Solver(model,
                             overfit_data,
                             num_epochs=num_epochs,
                             optim_config={'learning_rate': learning_rate})
        print("Overfitting on %d examples in %d epochs using the following network" % (size, num_epochs))
        print(model)
        solv.train()
        dkey = 'size_%d' % size
        solv_dict[dkey] = solv
        # Check that we can actually overfit

    # Plot the results
    fig, ax = vis_solver.get_train_fig()
    vis_solver.plot_solver_compare(ax, solv_dict)
    plt.show()
Ejemplo n.º 18
0
    def test_overfit_3layer(self):
        print("\n======== Test3LayerConvNet.test_overfit_3layer:")
        dataset = load_data(self.data_dir, self.verbose)
        num_train = 500

        small_data = {
            'X_train': dataset['X_train'][:num_train],
            'y_train': dataset['y_train'][:num_train],
            'X_val':   dataset['X_val'][:num_train],
            'y_val':   dataset['y_val'][:num_train]
        }
        weight_scale = 0.07
        learning_rate = 0.007
        batch_size = 50
        update_rule='adam'

        # Get a model
        model = convnet.ThreeLayerConvNet(weight_scale=weight_scale,
                                          reg=0.0)
        if self.verbose:
            print(model)
        # Get a solver
        conv_solver = solver.Solver(model,
                                    small_data,
                                    num_epochs=self.num_epochs,
                                    batch_size=batch_size,
                                    update_rule=update_rule,
                                    optim_config={'learning_rate': learning_rate},
                                    print_every=self.print_every,
                                    verbose=self.verbose)
        conv_solver.train()
        conv_dict = {"convnet": conv_solver}
        # Plot figures
        if self.draw_plots is True:
            fig, ax = get_figure_handles()
            plot_test_result(ax, conv_dict)
            fig.set_size_inches(8,8)
            fig.tight_layout()
            plt.show()

        print("======== Test3LayerConvNet.test_overfit_3layer: <END> ")
Ejemplo n.º 19
0
def learn_random_data():
    # Some trial hyperparameters
    reg = 1e-4
    ws = 0.05
    lr = 1e-3
    num_epochs = 10

    #data = load_data('datasets/cifar-10-batches-py', verbose=True)
    #rand_data = convert_data_random(data, int(np.max(data['X_train'])))
    rand_data = gen_random_data()
    # Get model
    model = convnet.ConvNetLayer(hidden_dims=[256],
                                 reg=reg)
    # Get solver
    solv = solver.Solver(model,
                         rand_data,
                         optim_config={'learning_rate': lr},
                         num_epochs=num_epochs)
    solv.train()

    # Show some plots
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = []
    for i in range(3):
        subax = fig.add_subplot(3, 1, (i+1))
        ax.append(subax)

    ax[0].plot(solv.loss_history, 'o')
    ax[0].set_title("Loss")
    ax[1].plot(solv.train_acc_history)
    ax[1].set_title("Training accuracy")
    ax[2].plot(solv.val_acc_history)
    ax[2].set_title("Validation accuracy")

    for i in range(3):
        ax[i].set_xlabel("Epochs")
        ax[i].set_xticks(range(num_epochs))
Ejemplo n.º 20
0
    def test_fcnet_5layer_loss(self):
        print("\n======== TestFCNet.test_fcnet_5layer_loss:")

        dataset = load_data(self.data_dir, self.verbose)
        num_train = 50

        small_data = {
            'X_train': dataset['X_train'][:num_train],
            'y_train': dataset['y_train'][:num_train],
            'X_val': dataset['X_val'][:num_train],
            'y_val': dataset['y_val'][:num_train]
        }
        #input_dim = small_data['X_train'].shape[0]
        input_dim = 3 * 32 * 32
        hidden_dims = [100, 100, 100, 100]
        weight_scale = 1e-2
        learning_rate = 1e-2

        # Get model and solver
        model = fcnet.FCNet(input_dim=input_dim,
                            hidden_dims=hidden_dims,
                            weight_scale=weight_scale,
                            reg=0.0,
                            dtype=np.float64)
        print(model)
        model_solver = solver.Solver(
            model,
            small_data,
            print_every=self.print_every,
            num_epochs=self.num_epochs,
            batch_size=50,  # previously 25
            update_rule='sgd',
            optim_config={'learning_rate': learning_rate})
        model_solver.train()

        print("======== TestFCNet.test_fcnet_5layer_loss: <END> ")
Ejemplo n.º 21
0
    def test_3layer_nets(self):
        print("\n======== TestSolverCompare.test_3layer_nets:")
        dataset = load_data(self.data_dir, self.verbose)
        num_train = 50
        small_data = {
            'X_train': dataset['X_train'][:num_train],
            'y_train': dataset['y_train'][:num_train],
            'X_val': dataset['X_val'][:num_train],
            'y_val': dataset['y_val'][:num_train]
        }

        filter_size = 7
        num_filters = 32
        hidden_dims = 100
        weight_scale = 1e-2
        learning_rate = 1e-3
        reg = 0.0
        batch_size = 50
        update_rule = 'adam'

        # TODO : Save this for a Xavier test
        #for i in range(2):
        #    if i == 0:
        #        use_xavier = False
        #    else:
        #        use_xavier = True

        from pymllib.classifiers import convnet

        l3_net = convnet.ThreeLayerConvNet(hidden_dim=hidden_dims,
                                           num_filters=num_filters,
                                           filter_size=filter_size,
                                           weight_scale=weight_scale,
                                           reg=reg)
        if self.verbose:
            print("L3 net:")
            print(l3_net)
        fc_net = convnet.ConvNetLayer(hidden_dims=[hidden_dims],
                                      num_filters=[num_filters],
                                      filter_size=filter_size,
                                      weight_scale=weight_scale,
                                      reg=reg)

        model_dict = {'l3_net': l3_net, 'fc_net': fc_net}
        solver_dict = {}
        for k, m in model_dict.items():
            solv = solver.Solver(m,
                                 small_data,
                                 optim_config={'learning_rate': learning_rate},
                                 num_epochs=self.num_epochs,
                                 batch_size=batch_size,
                                 print_every=self.print_every,
                                 verbose=True)
            solv.train()
            solver_dict[k] = solv

        # Make some plots
        if self.draw_plots:
            fig, ax = vis_solver.get_train_fig()
            vis_solver.plot_solver_compare(ax, solver_dict)
            plt.show()

        print("======== TestSolverCompare.test_3layer_nets : <END> ")
Ejemplo n.º 22
0
 def load_solver(fname):
     solv = solver.Solver(None, None)
     solv.load_checkpoint(fname)
     return solv
Ejemplo n.º 23
0
def LLayerConv(verbose=True, show_plots=False, solver_filename=None):
    data_dir = 'datasets/cifar-10-batches-py'

    # Get data
    num_train = 1000
    dataset = load_data(data_dir, verbose)
    train_data = {
        'X_train': dataset['X_train'][:num_train],
        'y_train': dataset['y_train'][:num_train],
        'X_val': dataset['X_val'][:num_train],
        'y_val': dataset['y_val'][:num_train]
    }
    # Set params
    input_dim = (3, 32, 32)
    weight_scale = 0.06
    learning_rate = 0.07
    #reg = 1e-2
    reg = 0.05
    filter_size = 5
    num_filters = [16, 32, 64]
    hidden_dim = [256, 128]
    num_epochs = 100
    # Get a convnet
    conv_model = convnet.ConvNetLayer(input_dim=input_dim,
                                      hidden_dims=hidden_dim,
                                      num_filters=num_filters,
                                      weight_scale=weight_scale,
                                      reg=reg,
                                      filter_size=filter_size,
                                      use_batchnorm=True,
                                      verbose=verbose)
    if verbose:
        print(conv_model)
    # Get a solver
    conv_solver = solver.Solver(conv_model,
                                train_data,
                                num_epochs=num_epochs,
                                batch_size=10,
                                update_rule='adam',
                                optim_config={'learning_rate': learning_rate},
                                verbose=verbose,
                                print_every=50,
                                checkpoint_name='c2-32-32-f2-256-256',
                                checkpoint_dir='examples')
    if verbose is True:
        print("Training %d layer net" % conv_model.num_layers)
    conv_solver.train()
    conv_solver.save(solver_filename)

    if show_plots is True:
        # The training loss, accuracy, etc
        tfig, tax = get_figure_handles()
        solver_dict = {'convnet': conv_solver}
        plot_test_result(tax, solver_dict, num_epochs=num_epochs)
        plt.show()
    if verbose:
        print(conv_model)
    # Get a solver
    conv_solver = solver.Solver(conv_model,
                                train_data,
                                num_epochs=num_epochs,
                                batch_size=50,
                                update_rule='adam',
                                optim_config={'learning_rate': 1e-3},
                                verbose=verbose,
                                print_every=50)
    conv_solver.train()
    # Plot results
    #fig, ax = get_one_figure_handle()
    #grid = vis_weights.vis_grid_img(weight_dict['W1'].transpose(0, 2, 3, 1))
    #ax.imshow(grid)
    #fig.set_size_inches(5,5)
    # save the data
    solver_file = "examples/conv_solver_%d_epochs.pkl" % num_epochs
    conv_solver.save(solver_file)

    # The training loss, accuracy, etc
    tfig, tax = get_figure_handles()
    solver_dict = {'convnet': conv_solver}
    plot_test_result(tax, solver_dict, num_epochs=num_epochs)
    plt.show()

    print("done")
Ejemplo n.º 24
0
def ThreeLayerNet(verbose=True, show_plots=False):
    save_convnet = True
    load_convnet = False
    data_dir = 'datasets/cifar-10-batches-py'
    convnet_path = 'examples/convnet_expr.pkl'

    # Get data
    data = load_data(data_dir, verbose)
    # Set params
    weight_scale = 1e-2
    reg = 1e-3

    # Get a convnet
    # TODO: more flexible convnet
    conv_model = convnet.ThreeLayerConvNet(weight_scale=weight_scale,
                                           hidden_dim=500,
                                           reg=reg)
    if verbose:
        print(conv_model)
    # Get a solver
    conv_solver = solver.Solver(conv_model,
                                data,
                                num_epochs=20,
                                batch_size=50,
                                update_rule='adam',
                                optim_config={'learning_rate': 1e-3},
                                verbose=verbose,
                                checkpoint_dir='examples',
                                checkpoint_name='3ln',
                                print_every=50)
    if load_convnet:  # FIXME : load data.
        print("Loading convnet from file %s" % convnet_path)
        conv_solver.load(convnet_path)

        if verbose is True:
            print("Training %d layer net" % conv_model.num_layers)
        conv_solver.train()

    if save_convnet:
        conv_solver.save(convnet_path)

    # Time to try and visualize what is happening...
    print("break here")
    weight_dict = {
        'W1': conv_solver.model.params['W1'],
        'W2': conv_solver.model.params['W2'],
        'W3': conv_solver.model.params['W3']
    }
    # Sizes
    print("Layer weight sizes: ")
    for k, v, in weight_dict.items():
        print("%s : %s" % (k, v.shape))
    # Max, min
    print("Layer weight max, min")
    for k, v in weight_dict.items():
        print("%s : max = %f, min = %f" % (k, np.max(v), np.min(v)))

    if show_plots is True:
        fig, ax = get_one_figure_handle()
        grid = vis_weights.vis_grid_img(weight_dict['W1'].transpose(
            0, 2, 3, 1))
        ax.imshow(grid)
        fig.set_size_inches(5, 5)

        # The training loss, accuracy, etc
        tfig, tax = get_figure_handles()
        solver_dict = {'convnet': conv_solver}
        plot_test_result(tax, solver_dict, num_epochs=None)
        plt.show()

    print("done")
Ejemplo n.º 25
0
def scale_network(draw_plots=False):

    # Some trial hyperparameters
    reg = 1e-4
    ws = 0.05
    lr = 1e-3
    fsizes = [16, 32, 64, 128]
    hdims = 256

    num_filters = []
    hidden_dims = [256]
    num_epochs = 100

    # prep data
    num_train = 5000
    dataset = load_data('datasets/cifar-10-batches-py')
    small_data = {
        'X_train': dataset['X_train'][:num_train],
        'y_train': dataset['y_train'][:num_train],
        'X_val':   dataset['X_val'][:num_train],
        'y_val':   dataset['y_val'][:num_train]
    }

    for s in fsizes:
        num_filters.append(s)
        if s == 64:
            hidden_dims.append(hdims)
        model = convnet.ConvNetLayer(hidden_dims=hidden_dims,
                                     num_filters=num_filters,
                                     reg=reg,
                                     weight_scale=ws,
                                     verbose=True)
        print(model)
        cname = model.__repr__()
        print("Saving checkpoints to examples/%s.pkl" % cname)
        solv = solver.Solver(model, small_data,
                             optim_config={'learning_rate': lr},
                             update_rule='sgd_momentum',
                             num_epochs=num_epochs,
                             checkpoint_dir='examples',
                             checkpoint_name=cname,
                             batch_size=50,
                             loss_window_len=400,
                             loss_window_eps=1e-5)
        solv.train()

        # Show results
        if draw_plots is True:
            import matplotlib.pyplot as plt
            fig = plt.figure()
            ax = []
            for i in range(3):
                subax = fig.add_subplot(3, 1, (i+1))
                ax.append(subax)

            ax[0].plot(solv.loss_history, 'o')
            ax[0].set_title("Loss")
            ax[1].plot(solv.train_acc_history)
            ax[1].set_title("Training accuracy")
            ax[2].plot(solv.val_acc_history)
            ax[2].set_title("Validation accuracy")

            for i in range(3):
                ax[i].set_xlabel("Epochs")
                #ax[i].set_xticks(range(num_epochs))
            plt.show()