def __str__(self) -> str: s = [] s.append("Model parameters\n") if self.model is None: # Make a model to simplify printing model = convnet.ConvNetLayer(input_dim=self.model_input_dim, hidden_dims=self.model_hidden_dims, num_filters=self.model_num_filters) else: model = self.model s.append(str(model)) s.append("\n") s.append("Solver parameters\n") if self.solv is None: solv = solver.Solver( model, None, num_epochs=self.solver_num_epochs, batch_size=self.solver_batch_size, update_rule=self.solver_update_rule, #optim_config={'learning_rate': learning_rate}, verbose=self.verbose, print_every=self.solver_print_every, checkpoint_name=self.solver_checkpoint_name, checkpoint_dir=self.solver_checkpoint_dir) else: solv = self.solv s.append(str(solv)) s.append("\n") return ''.join(s)
def test_weight_init(self): print("======== TestFCNet.test_weight_init:") dataset = load_data(self.data_dir, self.verbose) num_train = 1500 small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } input_dim = 32 * 32 * 3 hidden_dims = [100, 100, 100, 100, 100, 100, 100, 100, 100] weight_scale = 2e-2 reg = 2e-2 learning_rate = 1e-3 batch_size = 50 update_rule = 'adam' weight_init = ['gauss', 'gauss_sqrt2', 'xavier'] model_dict = {} for w in weight_init: model = fcnet.FCNet(input_dim=input_dim, hidden_dims=hidden_dims, weight_scale=weight_scale, reg=reg, weight_init=w) model_dict[w] = model solver_dict = {} for k, m in model_dict.items(): if self.verbose: print(m) solv = solver.Solver( m, small_data, print_every=self.print_every, num_epochs=self.num_epochs, batch_size=batch_size, # previously 25 update_rule=update_rule, optim_config={'learning_rate': learning_rate}) solv.train() #skey = '%s-%s' % (m.__repr__(), k) skey = '%s' % k solver_dict[skey] = solv if self.draw_plots: fig, ax = vis_solver.get_train_fig() vis_solver.plot_solver_compare(ax, solver_dict) #vis_solver.plot_solver(ax, solv) plt.show() print("======== TestFCNet.test_weight_init: <END> ")
def test_all_optim_fcnet_5layer(self): print("\n======== TestSolverFCNet.test_all_optim_fcnet_5layer:") dataset = load_data(self.data_dir, self.verbose) num_train = 50 small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } #input_dim = small_data['X_train'].shape[0] input_dim = 3 * 32 * 32 hidden_dims = [100, 100, 100, 100, 100] #hidden_dims = [100, 50, 10] # just some random dims weight_scale = 5e-2 reg = 1e-1 batch_size = 50 solvers = {} # Solver params optim_list = ['rmsprop', 'sgd_momentum', 'adam', 'sgd'] lr = {'rmsprop': 1e-4, 'adam': 1e-3, 'sgd': 1e-3, 'sgd_momentum': 1e-3} for update_rule in optim_list: print("Using update rule %s" % update_rule) model = fcnet.FCNet(input_dim=input_dim, hidden_dims=hidden_dims, weight_scale=weight_scale, reg=reg, dtype=np.float64) if self.verbose: print(model) model_solver = solver.Solver( model, small_data, print_every=self.print_every, num_epochs=self.num_epochs, batch_size=batch_size, # previously 25 update_rule=update_rule, optim_config={'learning_rate': lr[update_rule]}) solvers[update_rule] = model_solver model_solver.train() # get some figure handles and plot the data if self.draw_plots: fig, ax = get_figure_handles() plot_test_result(ax, solvers, self.num_epochs) fig.set_size_inches(8, 8) fig.tight_layout() plt.show() print("======== TestSolverFCNet.test_all_optim_fcnet_5layer: <END> ")
def ex_vis_solver_compare(ax, path, fname, epoch_num, prefix=None): """ EX_VIS_SOLVER_COMPARE Visualize a series of solutions superimposed on a single plot. Each solution checpoint is read in turn an plotted on a single graph. The legend is created using the __repr__() result for each solver object. Inputs ax: A matplotlib axes onto which to draw the visualization path: Directory containing solver files. This may be a list of multiple directories, in which case the method iterates over each of them in turn. fname: The name of a given solver file, without the '_epoch_%d.pkl' suffix epoch_num: Which epoch to load. prefix: A prefix that is prepended to the filename. This allows, for example, a group of subfolders to be traversed that all have the same root. Default = None """ # Helper function for loading solver objects def load_solver(fname): solv = solver.Solver(None, None) solv.load_checkpoint(fname) return solv # Check input arguments if type(path) is not list: path = [path] if type(fname) is not list: fname = [fname] # Iterate over all files and generate animations solver_dict = {} for p in path: for f in fname: epoch_str = '_epoch_%d.pkl' % epoch_num if prefix is not None: cname = str(prefix) + '/' + str(p) + '/' + str(f) + str( epoch_str) else: cname = str(p) + '/' + str(f) + str(epoch_str) solv = solver.Solver(None, None) solv.load_checkpoint(cname) solver_dict[f] = solv #vis_solver.plot_model_first_layer(ax, solv.model, cname) vis_solver.plot_solver_compare(ax, solver_dict)
def train_xavier(verbose=True, draw_plots=False): data_dir = 'datasets/cifar-10-batches-py' dataset = load_data(data_dir) # Hyperparams input_dim = (3, 32, 32) hidden_dims = [256, 256] num_filters = [16, 32, 64] reg = 2e-2 weight_scale = 1e-3 learning_rate = 1e-3 num_epochs = 600 batch_size = 50 update_rule = 'adam' weight_init = ['gauss', 'gauss_sqrt', 'xavier'] model_dict = {} for w in weight_init: model = convnet.ConvNetLayer(input_dim=input_dim, hidden_dims=hidden_dims, num_filters=num_filters, weight_scale=weight_scale, weight_init=w, reg=reg, verbose=True) model_dict[w] = model solver_dict = {} for k, m in model_dict.items(): if verbose: print(m) solv = solver.Solver(m, dataset, print_every=10, num_epochs=num_epochs, batch_size=batch_size, update_rule=update_rule, optim_config={'learning_rate': learning_rate}) solv.train() fname = '%s-solver-%d-epochs.pkl' % (k, int(num_epochs)) solv.save(fname) skey = '%s-%s' % (m.__repr__(), k) solver_dict[skey] = solv # Plot results if draw_plots is True: fig, ax = vis_solver.get_train_fig() vis_solver.plot_solver_compare(ax, solver_dict) plt.show()
def ex_plot_solver_weights(ax, fname, title=None): """ EX_PLOT_SOLVER_WEIGHTS Example showing how to plot the first layer weights in a solver object """ if title is None: title = "Layer 1 weights" solv = solver.Solver(None, None) solv.load_checkpoint(fname) vis_solver.plot_model_first_layer(ax, solv.model, cname) ax.set_title(title)
def test_overfit_3layer(self): print("\n======== TestConvNet.test_overfit_3layer:") dataset = load_data(self.data_dir, self.verbose) num_train = 500 small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } if self.verbose: print("Size of training dataset :") for k, v in small_data.items(): print("%s : %s " % (k, v.shape)) #weight_scale = 1e-2 #learning_rate = 1e-3 weight_scale = 0.06 learning_rate = 0.077 batch_size = 50 update_rule='adam' # Get a model model = convnet.ConvNetLayer(weight_scale=weight_scale, num_filters=[32], hidden_dims=[100], use_batchnorm=True, reg=0.0) if self.verbose: print(model) # Get a solver conv_solver = solver.Solver(model, small_data, num_epochs=self.num_epochs, batch_size=batch_size, update_rule=update_rule, optim_config={'learning_rate': learning_rate}, print_every=self.print_every, verbose=self.verbose) conv_solver.train() conv_dict = {"convnet": conv_solver} # Plot figures if self.draw_plots is True: fig, ax = get_figure_handles() plot_test_result(ax, conv_dict, self.num_epochs) fig.set_size_inches(8,8) fig.tight_layout() plt.show() print("======== TestConvNet.test_overfit_3layer: <END> ")
def test_adam_vs_rmsprop_fcnet(self): print("\n======== TestSolverFCNet.test_adam_vs_rmsprop:") dataset = load_data(self.data_dir, self.verbose) num_train = 50 small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } #input_dim = small_data['X_train'].shape[0] input_dim = 3 * 32 * 32 #hidden_dims = [100, 100, 100, 100, 100] hidden_dims = [100, 100, 100, 100, 100] weight_scale = 5e-2 batch_size = 50 reg = 1e-1 lr = {'rmsprop': 1e-4, 'adam': 1e-3} update_rule = ['rmsprop', 'adam'] solvers = {} for u in update_rule: model = fcnet.FCNet(input_dim=input_dim, hidden_dims=hidden_dims, weight_scale=weight_scale, reg=reg, dtype=np.float64) if self.verbose: print(model) model_solver = solver.Solver( model, small_data, print_every=self.print_every, num_epochs=self.num_epochs, batch_size=batch_size, # previously 25 update_rule=u, optim_config={'learning_rate': lr[u]}) solvers[u] = model_solver model_solver.train() if self.draw_plots is True: fig, ax = get_figure_handles() plot_test_result(ax, solvers, self.num_epochs) fig.set_size_inches(8, 8) fig.tight_layout() plt.show() print("======== TestSolverFCNet.test_adam_vs_rmsprop: <END> ")
def ex_plot_sequence(ax, path, fname, num_checkpoints, prefix=None, step=1, pause_time=0.01): """ EX_PLOT_SEQUENCE Example wrapper for vis_solver.plot_model_first_layer showing a possible inner loop for a weight visualization animation """ if type(num_checkpoints) is tuple: if len(num_checkpoints) > 2: raise ValueError( "Cannot accept more than 2 limits for num_checkpoints") if num_checkpoints[0] == 0: n_min = 1 else: n_min = int(num_checkpoints[0]) n_max = int(num_checkpoints[1]) else: n_min = 1 n_max = int(num_checkpoints) # Check input arguments if type(path) is not list: path = [path] if type(fname) is not list: fname = [fname] # Iterate over all files and generate animations for p in path: for f in fname: for n in range(n_min, n_max, step): if prefix is not None: cname = '%s/%s/%s_epoch_%d.pkl' % (prefix, p, f, int(n)) else: cname = '%s/%s_epoch_%d.pkl' % (p, f, int(n)) solv = solver.Solver(None, None) solv.load_checkpoint(cname) title = '%s (epoch %d)' % (f, n) vis_solver.plot_model_first_layer(ax, solv.model, title=title) plt.pause(pause_time) plt.draw()
def init_solver(self, data: Dict[str, Any], learning_rate: float = 1e-3, num_epochs: Union[None, int] = None) -> None: if num_epochs is None: num_epochs = self.solver_num_epochs self.solv = solver.Solver( self.model, data, num_epochs=num_epochs, batch_size=self.solver_batch_size, update_rule=self.solver_update_rule, optim_config={'learning_rate': learning_rate}, verbose=self.verbose, print_every=self.solver_print_every, checkpoint_name=self.solver_checkpoint_name, checkpoint_dir=self.solver_checkpoint_dir)
def test_rmsprop_fcnet(self): print("\n======== TestSolverFCNet.test_rmsprop_fcnet:") dataset = load_data(self.data_dir, self.verbose) num_train = 50 small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } #input_dim = small_data['X_train'].shape[0] input_dim = 3 * 32 * 32 #hidden_dims = [100, 100, 100, 100, 100] hidden_dims = [100, 50, 10] # just some random dims weight_scale = 5e-2 learning_rate = 1e-2 batch_size = 50 update_rule = 'rmsprop' model = fcnet.FCNet(input_dim=input_dim, hidden_dims=hidden_dims, weight_scale=weight_scale, dtype=np.float64) if self.verbose: print(model) model_solver = solver.Solver( model, small_data, print_every=self.print_every, num_epochs=self.num_epochs, batch_size=batch_size, # previously 25 update_rule=update_rule, optim_config={'learning_rate': learning_rate}) model_solver.train() if self.draw_plots is True: solvers = {'rmsprop': model_solver} fig, ax = get_figure_handles() plot_test_result(ax, solvers, self.num_epochs) fig.set_size_inches(8, 8) fig.tight_layout() plt.show() print("======== TestSolverFCNet.test_rmsprop_fcnet: <END> ")
def test_fcnet_3layer_overfit(self): print("\n======== TestFCNet.test_fcnet_3layer_overfit:") dataset = load_data(self.data_dir, self.verbose) num_train = 50 small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } #input_dim = small_data['X_train'].shape[0] input_dim = 3 * 32 * 32 hidden_dims = [100, 100] weight_scale = 0.079564 learning_rate = 0.003775 # Get model and solver model = fcnet.FCNet(input_dim=input_dim, hidden_dims=hidden_dims, weight_scale=weight_scale, dtype=np.float64, verbose=True) print(model) model_solver = solver.Solver( model, small_data, print_every=self.print_every, num_epochs=self.num_epochs, batch_size=50, # previously 25 update_rule='sgd', optim_config={'learning_rate': learning_rate}) model_solver.train() # Plot results if self.draw_plots is True: plt.plot(model_solver.loss_history, 'o') plt.title('Training loss history (3 layers)') plt.xlabel('Iteration') plt.ylabel('Training loss') plt.show() print("======== TestFCNet.test_fcnet_3layer_overfit: <END> ")
def test_fcnet_2layer_dropout(self): print("\n======== TestFCNetDropout.test_fcnet_2layer_dropout :") dataset = load_data(self.data_dir, self.verbose) num_train = 10 small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } #input_dim = small_data['X_train'].shape[0] input_dim = 3 * 32 * 32 #hidden_dims = [100, 100, 100, 100] batch_size = 100 solvers = {} dropout_probs = [0.0, 0.3, 0.5, 0.7] for d in dropout_probs: model = fcnet.FCNet(hidden_dims=[500], input_dim=input_dim, num_classes=10, dropout=d, weight_scale=2e-2) s = solver.Solver(model, small_data, num_epochs=self.num_epochs, batch_size=batch_size, update_rule='adam', optim_config={'learning_rate': 5e-4}, verbose=True, print_every=self.print_every) print("Training with dropout %f" % d) s.train() solvers['p=' + str(d)] = s if self.draw_plots: fig, ax = get_figure_handles() plot_test_result(ax, solvers, self.num_epochs) fig.set_size_inches(8, 8) fig.tight_layout() plt.show() print("======== TestFCNetDropout.test_fcnet_2layer_dropout: <END> ")
def train_cifar10_conv(): data_dir = 'datasets/cifar-10-batches-py' data = load_data(data_dir) verbose = True # Model hyperparams weight_scale = 0.05 filter_size = 3 reg = 0.05 input_dim = (3, 32, 32) num_filters = [16, 32, 64, 128] hidden_dims = [256, 256] # Solver hyperparams update_rule = 'sgd_momentum' learning_rate = 1e-3 num_epochs = 2000 # Get a model conv_model = convnet.ConvNetLayer(input_dim=input_dim, hidden_dims=hidden_dims, num_filters=num_filters, weight_scale=weight_scale, reg=reg, filter_size=filter_size, verbose=verbose) if verbose: print(conv_model) # Get a solver checkpoint_name = 'c4-16-32-64-128-f2-256-256-lr=%f-ws=%f' % ( learning_rate, weight_scale) conv_solver = solver.Solver(conv_model, data, num_epochs=num_epochs, batch_size=50, update_rule=update_rule, optim_config={'learning_rate': learning_rate}, verbose=verbose, print_every=50, checkpoint_name=checkpoint_name, checkpoint_dir='examples') if verbose is True: print("Training %d layer net" % conv_model.num_layers) conv_solver.train()
def inspect_checkpoint(checkpoint_fname, verbose=False): csolver = solver.Solver(None, None) # TODO : Check file exists csolver.load(checkpoint_fname) weight_dict = cutils.get_conv_layer_dict(csolver.model) # For now, get the first layer weights out and show those grid = vis_grid_img(weight_dict['W1'].transpose(0, 2, 3, 1)) def get_fig_handle(): import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(1, 1, 1) return fig, ax fig, ax = get_fig_handle() ax.imshow(grid)
def convert_checkpoint(fname, verbose=False): """ Convert a checkpoint to the newest version. This method is designed to convert old checkpoints to the current format, which unifies the save and load methods to that there is no difference between saving a solver and saving a checkpoint. Because there may be checkpoints from previous versions lying around that may break when loaded, this tool will convert old checkpoints so that this doesnt occur """ solv = solver.Solver(None, None) solv.verbose = verbose #solv.load(fname) with open(fname, 'rb') as fp: cpoint = pickle.load(fp) # Model data solv.model = cpoint.get('model', None) # Solver params solv.update_rule = cpoint.get('update_rule', 'sgd') solv.lr_decay = cpoint.get('lr_decay', 0.95) solv.optim_config = cpoint.get('optim_config', {'learning_rate': 1e-3}) solv.batch_size = cpoint.get('batch_size', 100) solv.epoch = cpoint.get('epoch', 0) solv.num_epochs = cpoint.get('num_epochs', 0) # Solution data solv.loss_history = cpoint.get('loss_history', None) solv.train_acc_history = cpoint.get('train_acc_history', None) solv.val_acc_history = cpoint.get('val_acc_history', None) # Loss window solv.enable_loss_window = cpoint.get('enable_loss_window', False) solv.loss_window_len = cpoint.get('loss_window_len', 500) solv.loss_window_eps = cpoint.get('loss_window_eps', 1e-3) solv.loss_converge_window = cpoint.get('loss_converge_window', 1e4) # Checkpoint info solv.checkpoint_name = cpoint.get('checkpoint_name', None) solv.checkpoint_dir = cpoint.get('checkpoint_dir', None) # This solver has now been 'converted' return solv
def overfit(): # Data dataset = data_utils.get_CIFAR10_data('datasets/cifar-10-batches-py') # Hyperparameters # for now we just some random params, not found by search reg = 1e-2 weight_scale = 2e-3 learning_rate = 1e-3 # Training parameters num_epochs = 40 #train_sizes = [50, 100, 150, 200] train_sizes = [200, 400, 800, 1000, 1500] solv_dict = {} for size in train_sizes: overfit_data = { 'X_train': dataset['X_train'][:size], 'y_train': dataset['y_train'][:size], 'X_val': dataset['X_val'][:size], 'y_val': dataset['y_val'][:size] } model = convnet.ConvNetLayer(hidden_dims=[256], num_filters=[16], filter_size=5, reg=reg, weight_scale=weight_scale) solv = solver.Solver(model, overfit_data, num_epochs=num_epochs, optim_config={'learning_rate': learning_rate}) print("Overfitting on %d examples in %d epochs using the following network" % (size, num_epochs)) print(model) solv.train() dkey = 'size_%d' % size solv_dict[dkey] = solv # Check that we can actually overfit # Plot the results fig, ax = vis_solver.get_train_fig() vis_solver.plot_solver_compare(ax, solv_dict) plt.show()
def test_overfit_3layer(self): print("\n======== Test3LayerConvNet.test_overfit_3layer:") dataset = load_data(self.data_dir, self.verbose) num_train = 500 small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } weight_scale = 0.07 learning_rate = 0.007 batch_size = 50 update_rule='adam' # Get a model model = convnet.ThreeLayerConvNet(weight_scale=weight_scale, reg=0.0) if self.verbose: print(model) # Get a solver conv_solver = solver.Solver(model, small_data, num_epochs=self.num_epochs, batch_size=batch_size, update_rule=update_rule, optim_config={'learning_rate': learning_rate}, print_every=self.print_every, verbose=self.verbose) conv_solver.train() conv_dict = {"convnet": conv_solver} # Plot figures if self.draw_plots is True: fig, ax = get_figure_handles() plot_test_result(ax, conv_dict) fig.set_size_inches(8,8) fig.tight_layout() plt.show() print("======== Test3LayerConvNet.test_overfit_3layer: <END> ")
def learn_random_data(): # Some trial hyperparameters reg = 1e-4 ws = 0.05 lr = 1e-3 num_epochs = 10 #data = load_data('datasets/cifar-10-batches-py', verbose=True) #rand_data = convert_data_random(data, int(np.max(data['X_train']))) rand_data = gen_random_data() # Get model model = convnet.ConvNetLayer(hidden_dims=[256], reg=reg) # Get solver solv = solver.Solver(model, rand_data, optim_config={'learning_rate': lr}, num_epochs=num_epochs) solv.train() # Show some plots import matplotlib.pyplot as plt fig = plt.figure() ax = [] for i in range(3): subax = fig.add_subplot(3, 1, (i+1)) ax.append(subax) ax[0].plot(solv.loss_history, 'o') ax[0].set_title("Loss") ax[1].plot(solv.train_acc_history) ax[1].set_title("Training accuracy") ax[2].plot(solv.val_acc_history) ax[2].set_title("Validation accuracy") for i in range(3): ax[i].set_xlabel("Epochs") ax[i].set_xticks(range(num_epochs))
def test_fcnet_5layer_loss(self): print("\n======== TestFCNet.test_fcnet_5layer_loss:") dataset = load_data(self.data_dir, self.verbose) num_train = 50 small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } #input_dim = small_data['X_train'].shape[0] input_dim = 3 * 32 * 32 hidden_dims = [100, 100, 100, 100] weight_scale = 1e-2 learning_rate = 1e-2 # Get model and solver model = fcnet.FCNet(input_dim=input_dim, hidden_dims=hidden_dims, weight_scale=weight_scale, reg=0.0, dtype=np.float64) print(model) model_solver = solver.Solver( model, small_data, print_every=self.print_every, num_epochs=self.num_epochs, batch_size=50, # previously 25 update_rule='sgd', optim_config={'learning_rate': learning_rate}) model_solver.train() print("======== TestFCNet.test_fcnet_5layer_loss: <END> ")
def test_3layer_nets(self): print("\n======== TestSolverCompare.test_3layer_nets:") dataset = load_data(self.data_dir, self.verbose) num_train = 50 small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } filter_size = 7 num_filters = 32 hidden_dims = 100 weight_scale = 1e-2 learning_rate = 1e-3 reg = 0.0 batch_size = 50 update_rule = 'adam' # TODO : Save this for a Xavier test #for i in range(2): # if i == 0: # use_xavier = False # else: # use_xavier = True from pymllib.classifiers import convnet l3_net = convnet.ThreeLayerConvNet(hidden_dim=hidden_dims, num_filters=num_filters, filter_size=filter_size, weight_scale=weight_scale, reg=reg) if self.verbose: print("L3 net:") print(l3_net) fc_net = convnet.ConvNetLayer(hidden_dims=[hidden_dims], num_filters=[num_filters], filter_size=filter_size, weight_scale=weight_scale, reg=reg) model_dict = {'l3_net': l3_net, 'fc_net': fc_net} solver_dict = {} for k, m in model_dict.items(): solv = solver.Solver(m, small_data, optim_config={'learning_rate': learning_rate}, num_epochs=self.num_epochs, batch_size=batch_size, print_every=self.print_every, verbose=True) solv.train() solver_dict[k] = solv # Make some plots if self.draw_plots: fig, ax = vis_solver.get_train_fig() vis_solver.plot_solver_compare(ax, solver_dict) plt.show() print("======== TestSolverCompare.test_3layer_nets : <END> ")
def load_solver(fname): solv = solver.Solver(None, None) solv.load_checkpoint(fname) return solv
def LLayerConv(verbose=True, show_plots=False, solver_filename=None): data_dir = 'datasets/cifar-10-batches-py' # Get data num_train = 1000 dataset = load_data(data_dir, verbose) train_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } # Set params input_dim = (3, 32, 32) weight_scale = 0.06 learning_rate = 0.07 #reg = 1e-2 reg = 0.05 filter_size = 5 num_filters = [16, 32, 64] hidden_dim = [256, 128] num_epochs = 100 # Get a convnet conv_model = convnet.ConvNetLayer(input_dim=input_dim, hidden_dims=hidden_dim, num_filters=num_filters, weight_scale=weight_scale, reg=reg, filter_size=filter_size, use_batchnorm=True, verbose=verbose) if verbose: print(conv_model) # Get a solver conv_solver = solver.Solver(conv_model, train_data, num_epochs=num_epochs, batch_size=10, update_rule='adam', optim_config={'learning_rate': learning_rate}, verbose=verbose, print_every=50, checkpoint_name='c2-32-32-f2-256-256', checkpoint_dir='examples') if verbose is True: print("Training %d layer net" % conv_model.num_layers) conv_solver.train() conv_solver.save(solver_filename) if show_plots is True: # The training loss, accuracy, etc tfig, tax = get_figure_handles() solver_dict = {'convnet': conv_solver} plot_test_result(tax, solver_dict, num_epochs=num_epochs) plt.show() if verbose: print(conv_model) # Get a solver conv_solver = solver.Solver(conv_model, train_data, num_epochs=num_epochs, batch_size=50, update_rule='adam', optim_config={'learning_rate': 1e-3}, verbose=verbose, print_every=50) conv_solver.train() # Plot results #fig, ax = get_one_figure_handle() #grid = vis_weights.vis_grid_img(weight_dict['W1'].transpose(0, 2, 3, 1)) #ax.imshow(grid) #fig.set_size_inches(5,5) # save the data solver_file = "examples/conv_solver_%d_epochs.pkl" % num_epochs conv_solver.save(solver_file) # The training loss, accuracy, etc tfig, tax = get_figure_handles() solver_dict = {'convnet': conv_solver} plot_test_result(tax, solver_dict, num_epochs=num_epochs) plt.show() print("done")
def ThreeLayerNet(verbose=True, show_plots=False): save_convnet = True load_convnet = False data_dir = 'datasets/cifar-10-batches-py' convnet_path = 'examples/convnet_expr.pkl' # Get data data = load_data(data_dir, verbose) # Set params weight_scale = 1e-2 reg = 1e-3 # Get a convnet # TODO: more flexible convnet conv_model = convnet.ThreeLayerConvNet(weight_scale=weight_scale, hidden_dim=500, reg=reg) if verbose: print(conv_model) # Get a solver conv_solver = solver.Solver(conv_model, data, num_epochs=20, batch_size=50, update_rule='adam', optim_config={'learning_rate': 1e-3}, verbose=verbose, checkpoint_dir='examples', checkpoint_name='3ln', print_every=50) if load_convnet: # FIXME : load data. print("Loading convnet from file %s" % convnet_path) conv_solver.load(convnet_path) if verbose is True: print("Training %d layer net" % conv_model.num_layers) conv_solver.train() if save_convnet: conv_solver.save(convnet_path) # Time to try and visualize what is happening... print("break here") weight_dict = { 'W1': conv_solver.model.params['W1'], 'W2': conv_solver.model.params['W2'], 'W3': conv_solver.model.params['W3'] } # Sizes print("Layer weight sizes: ") for k, v, in weight_dict.items(): print("%s : %s" % (k, v.shape)) # Max, min print("Layer weight max, min") for k, v in weight_dict.items(): print("%s : max = %f, min = %f" % (k, np.max(v), np.min(v))) if show_plots is True: fig, ax = get_one_figure_handle() grid = vis_weights.vis_grid_img(weight_dict['W1'].transpose( 0, 2, 3, 1)) ax.imshow(grid) fig.set_size_inches(5, 5) # The training loss, accuracy, etc tfig, tax = get_figure_handles() solver_dict = {'convnet': conv_solver} plot_test_result(tax, solver_dict, num_epochs=None) plt.show() print("done")
def scale_network(draw_plots=False): # Some trial hyperparameters reg = 1e-4 ws = 0.05 lr = 1e-3 fsizes = [16, 32, 64, 128] hdims = 256 num_filters = [] hidden_dims = [256] num_epochs = 100 # prep data num_train = 5000 dataset = load_data('datasets/cifar-10-batches-py') small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } for s in fsizes: num_filters.append(s) if s == 64: hidden_dims.append(hdims) model = convnet.ConvNetLayer(hidden_dims=hidden_dims, num_filters=num_filters, reg=reg, weight_scale=ws, verbose=True) print(model) cname = model.__repr__() print("Saving checkpoints to examples/%s.pkl" % cname) solv = solver.Solver(model, small_data, optim_config={'learning_rate': lr}, update_rule='sgd_momentum', num_epochs=num_epochs, checkpoint_dir='examples', checkpoint_name=cname, batch_size=50, loss_window_len=400, loss_window_eps=1e-5) solv.train() # Show results if draw_plots is True: import matplotlib.pyplot as plt fig = plt.figure() ax = [] for i in range(3): subax = fig.add_subplot(3, 1, (i+1)) ax.append(subax) ax[0].plot(solv.loss_history, 'o') ax[0].set_title("Loss") ax[1].plot(solv.train_acc_history) ax[1].set_title("Training accuracy") ax[2].plot(solv.val_acc_history) ax[2].set_title("Validation accuracy") for i in range(3): ax[i].set_xlabel("Epochs") #ax[i].set_xticks(range(num_epochs)) plt.show()