def __str__(self) -> str: s = [] s.append("Model parameters\n") if self.model is None: # Make a model to simplify printing model = convnet.ConvNetLayer(input_dim=self.model_input_dim, hidden_dims=self.model_hidden_dims, num_filters=self.model_num_filters) else: model = self.model s.append(str(model)) s.append("\n") s.append("Solver parameters\n") if self.solv is None: solv = solver.Solver( model, None, num_epochs=self.solver_num_epochs, batch_size=self.solver_batch_size, update_rule=self.solver_update_rule, #optim_config={'learning_rate': learning_rate}, verbose=self.verbose, print_every=self.solver_print_every, checkpoint_name=self.solver_checkpoint_name, checkpoint_dir=self.solver_checkpoint_dir) else: solv = self.solv s.append(str(solv)) s.append("\n") return ''.join(s)
def test_gradient_check_2conv_layers(self): print("\n======== TestConvNet.test_gradient_check_conv:") num_inputs = 2 input_dim = (3, 32, 32) num_classes = 10 X = np.random.randn(num_inputs, *input_dim) y = np.random.randint(num_classes, size=num_inputs) # TODO ; Modify this to be L Layer net model = convnet.ConvNetLayer(reg=0.0) loss, grads = model.loss(X, y) for p in sorted(grads): f = lambda _: model.loss(X, y)[0] param_grad_num = check_gradient.eval_numerical_gradient(f, model.params[p], verbose=False, h=1e-6) err = error.rel_error(param_grad_num, grads[p]) print("%s max relative error: %e" % (p, err)) # This is in a separate pass so that we can see all errors # printed to console before we invoke the assertions for p in sorted(grads): f = lambda _: model.loss(X, y)[0] param_grad_num = check_gradient.eval_numerical_gradient(f, model.params[p], verbose=False, h=1e-6) err = error.rel_error(param_grad_num, grads[p]) self.assertLessEqual(err, self.eps) print("======== TestConvNet.test_gradient_check_conv: <END> ")
def init_model(self, weight_scale: float, reg: float) -> None: self.model = convnet.ConvNetLayer( input_dim=self.model_input_dim, hidden_dims=self.model_hidden_dims, num_filters=self.model_num_filters, use_batchnorm=self.model_use_batchnorm, reg=reg, weight_scale=weight_scale, verbose=self.verbose)
def train_xavier(verbose=True, draw_plots=False): data_dir = 'datasets/cifar-10-batches-py' dataset = load_data(data_dir) # Hyperparams input_dim = (3, 32, 32) hidden_dims = [256, 256] num_filters = [16, 32, 64] reg = 2e-2 weight_scale = 1e-3 learning_rate = 1e-3 num_epochs = 600 batch_size = 50 update_rule = 'adam' weight_init = ['gauss', 'gauss_sqrt', 'xavier'] model_dict = {} for w in weight_init: model = convnet.ConvNetLayer(input_dim=input_dim, hidden_dims=hidden_dims, num_filters=num_filters, weight_scale=weight_scale, weight_init=w, reg=reg, verbose=True) model_dict[w] = model solver_dict = {} for k, m in model_dict.items(): if verbose: print(m) solv = solver.Solver(m, dataset, print_every=10, num_epochs=num_epochs, batch_size=batch_size, update_rule=update_rule, optim_config={'learning_rate': learning_rate}) solv.train() fname = '%s-solver-%d-epochs.pkl' % (k, int(num_epochs)) solv.save(fname) skey = '%s-%s' % (m.__repr__(), k) solver_dict[skey] = solv # Plot results if draw_plots is True: fig, ax = vis_solver.get_train_fig() vis_solver.plot_solver_compare(ax, solver_dict) plt.show()
def test_overfit_3layer(self): print("\n======== TestConvNet.test_overfit_3layer:") dataset = load_data(self.data_dir, self.verbose) num_train = 500 small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } if self.verbose: print("Size of training dataset :") for k, v in small_data.items(): print("%s : %s " % (k, v.shape)) #weight_scale = 1e-2 #learning_rate = 1e-3 weight_scale = 0.06 learning_rate = 0.077 batch_size = 50 update_rule='adam' # Get a model model = convnet.ConvNetLayer(weight_scale=weight_scale, num_filters=[32], hidden_dims=[100], use_batchnorm=True, reg=0.0) if self.verbose: print(model) # Get a solver conv_solver = solver.Solver(model, small_data, num_epochs=self.num_epochs, batch_size=batch_size, update_rule=update_rule, optim_config={'learning_rate': learning_rate}, print_every=self.print_every, verbose=self.verbose) conv_solver.train() conv_dict = {"convnet": conv_solver} # Plot figures if self.draw_plots is True: fig, ax = get_figure_handles() plot_test_result(ax, conv_dict, self.num_epochs) fig.set_size_inches(8,8) fig.tight_layout() plt.show() print("======== TestConvNet.test_overfit_3layer: <END> ")
def test_loss_2conv_layers(self): print("\n======== TestConvNet.test_loss_3layer_conv:") N = 10 # Because the naive implementation is VERY slow X = np.random.randn(N, 3, 32, 32) y = np.random.randint(10, size=N) model_3l = convnet.ConvNetLayer() model_3l.reg = 0.0 loss, grads = model_3l.loss(X,y) print("Initial loss (no regularization) : %f" % loss) model_3l.reg = 0.5 loss, grads = model_3l.loss(X, y) print("Initial loss (with regularization) : %f" % loss) print("======== TestConvNet.test_loss_3layer_conv: <END> ")
def train_cifar10_conv(): data_dir = 'datasets/cifar-10-batches-py' data = load_data(data_dir) verbose = True # Model hyperparams weight_scale = 0.05 filter_size = 3 reg = 0.05 input_dim = (3, 32, 32) num_filters = [16, 32, 64, 128] hidden_dims = [256, 256] # Solver hyperparams update_rule = 'sgd_momentum' learning_rate = 1e-3 num_epochs = 2000 # Get a model conv_model = convnet.ConvNetLayer(input_dim=input_dim, hidden_dims=hidden_dims, num_filters=num_filters, weight_scale=weight_scale, reg=reg, filter_size=filter_size, verbose=verbose) if verbose: print(conv_model) # Get a solver checkpoint_name = 'c4-16-32-64-128-f2-256-256-lr=%f-ws=%f' % ( learning_rate, weight_scale) conv_solver = solver.Solver(conv_model, data, num_epochs=num_epochs, batch_size=50, update_rule=update_rule, optim_config={'learning_rate': learning_rate}, verbose=verbose, print_every=50, checkpoint_name=checkpoint_name, checkpoint_dir='examples') if verbose is True: print("Training %d layer net" % conv_model.num_layers) conv_solver.train()
def overfit(): # Data dataset = data_utils.get_CIFAR10_data('datasets/cifar-10-batches-py') # Hyperparameters # for now we just some random params, not found by search reg = 1e-2 weight_scale = 2e-3 learning_rate = 1e-3 # Training parameters num_epochs = 40 #train_sizes = [50, 100, 150, 200] train_sizes = [200, 400, 800, 1000, 1500] solv_dict = {} for size in train_sizes: overfit_data = { 'X_train': dataset['X_train'][:size], 'y_train': dataset['y_train'][:size], 'X_val': dataset['X_val'][:size], 'y_val': dataset['y_val'][:size] } model = convnet.ConvNetLayer(hidden_dims=[256], num_filters=[16], filter_size=5, reg=reg, weight_scale=weight_scale) solv = solver.Solver(model, overfit_data, num_epochs=num_epochs, optim_config={'learning_rate': learning_rate}) print("Overfitting on %d examples in %d epochs using the following network" % (size, num_epochs)) print(model) solv.train() dkey = 'size_%d' % size solv_dict[dkey] = solv # Check that we can actually overfit # Plot the results fig, ax = vis_solver.get_train_fig() vis_solver.plot_solver_compare(ax, solv_dict) plt.show()
def learn_random_data(): # Some trial hyperparameters reg = 1e-4 ws = 0.05 lr = 1e-3 num_epochs = 10 #data = load_data('datasets/cifar-10-batches-py', verbose=True) #rand_data = convert_data_random(data, int(np.max(data['X_train']))) rand_data = gen_random_data() # Get model model = convnet.ConvNetLayer(hidden_dims=[256], reg=reg) # Get solver solv = solver.Solver(model, rand_data, optim_config={'learning_rate': lr}, num_epochs=num_epochs) solv.train() # Show some plots import matplotlib.pyplot as plt fig = plt.figure() ax = [] for i in range(3): subax = fig.add_subplot(3, 1, (i+1)) ax.append(subax) ax[0].plot(solv.loss_history, 'o') ax[0].set_title("Loss") ax[1].plot(solv.train_acc_history) ax[1].set_title("Training accuracy") ax[2].plot(solv.val_acc_history) ax[2].set_title("Validation accuracy") for i in range(3): ax[i].set_xlabel("Epochs") ax[i].set_xticks(range(num_epochs))
def scale_network(draw_plots=False): # Some trial hyperparameters reg = 1e-4 ws = 0.05 lr = 1e-3 fsizes = [16, 32, 64, 128] hdims = 256 num_filters = [] hidden_dims = [256] num_epochs = 100 # prep data num_train = 5000 dataset = load_data('datasets/cifar-10-batches-py') small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } for s in fsizes: num_filters.append(s) if s == 64: hidden_dims.append(hdims) model = convnet.ConvNetLayer(hidden_dims=hidden_dims, num_filters=num_filters, reg=reg, weight_scale=ws, verbose=True) print(model) cname = model.__repr__() print("Saving checkpoints to examples/%s.pkl" % cname) solv = solver.Solver(model, small_data, optim_config={'learning_rate': lr}, update_rule='sgd_momentum', num_epochs=num_epochs, checkpoint_dir='examples', checkpoint_name=cname, batch_size=50, loss_window_len=400, loss_window_eps=1e-5) solv.train() # Show results if draw_plots is True: import matplotlib.pyplot as plt fig = plt.figure() ax = [] for i in range(3): subax = fig.add_subplot(3, 1, (i+1)) ax.append(subax) ax[0].plot(solv.loss_history, 'o') ax[0].set_title("Loss") ax[1].plot(solv.train_acc_history) ax[1].set_title("Training accuracy") ax[2].plot(solv.val_acc_history) ax[2].set_title("Validation accuracy") for i in range(3): ax[i].set_xlabel("Epochs") #ax[i].set_xticks(range(num_epochs)) plt.show()
def test_3layer_nets(self): print("\n======== TestSolverCompare.test_3layer_nets:") dataset = load_data(self.data_dir, self.verbose) num_train = 50 small_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } filter_size = 7 num_filters = 32 hidden_dims = 100 weight_scale = 1e-2 learning_rate = 1e-3 reg = 0.0 batch_size = 50 update_rule = 'adam' # TODO : Save this for a Xavier test #for i in range(2): # if i == 0: # use_xavier = False # else: # use_xavier = True from pymllib.classifiers import convnet l3_net = convnet.ThreeLayerConvNet(hidden_dim=hidden_dims, num_filters=num_filters, filter_size=filter_size, weight_scale=weight_scale, reg=reg) if self.verbose: print("L3 net:") print(l3_net) fc_net = convnet.ConvNetLayer(hidden_dims=[hidden_dims], num_filters=[num_filters], filter_size=filter_size, weight_scale=weight_scale, reg=reg) model_dict = {'l3_net': l3_net, 'fc_net': fc_net} solver_dict = {} for k, m in model_dict.items(): solv = solver.Solver(m, small_data, optim_config={'learning_rate': learning_rate}, num_epochs=self.num_epochs, batch_size=batch_size, print_every=self.print_every, verbose=True) solv.train() solver_dict[k] = solv # Make some plots if self.draw_plots: fig, ax = vis_solver.get_train_fig() vis_solver.plot_solver_compare(ax, solver_dict) plt.show() print("======== TestSolverCompare.test_3layer_nets : <END> ")
def LLayerConv(verbose=True, show_plots=False, solver_filename=None): data_dir = 'datasets/cifar-10-batches-py' # Get data num_train = 1000 dataset = load_data(data_dir, verbose) train_data = { 'X_train': dataset['X_train'][:num_train], 'y_train': dataset['y_train'][:num_train], 'X_val': dataset['X_val'][:num_train], 'y_val': dataset['y_val'][:num_train] } # Set params input_dim = (3, 32, 32) weight_scale = 0.06 learning_rate = 0.07 #reg = 1e-2 reg = 0.05 filter_size = 5 num_filters = [16, 32, 64] hidden_dim = [256, 128] num_epochs = 100 # Get a convnet conv_model = convnet.ConvNetLayer(input_dim=input_dim, hidden_dims=hidden_dim, num_filters=num_filters, weight_scale=weight_scale, reg=reg, filter_size=filter_size, use_batchnorm=True, verbose=verbose) if verbose: print(conv_model) # Get a solver conv_solver = solver.Solver(conv_model, train_data, num_epochs=num_epochs, batch_size=10, update_rule='adam', optim_config={'learning_rate': learning_rate}, verbose=verbose, print_every=50, checkpoint_name='c2-32-32-f2-256-256', checkpoint_dir='examples') if verbose is True: print("Training %d layer net" % conv_model.num_layers) conv_solver.train() conv_solver.save(solver_filename) if show_plots is True: # The training loss, accuracy, etc tfig, tax = get_figure_handles() solver_dict = {'convnet': conv_solver} plot_test_result(tax, solver_dict, num_epochs=num_epochs) plt.show() if verbose: print(conv_model) # Get a solver conv_solver = solver.Solver(conv_model, train_data, num_epochs=num_epochs, batch_size=50, update_rule='adam', optim_config={'learning_rate': 1e-3}, verbose=verbose, print_every=50) conv_solver.train() # Plot results #fig, ax = get_one_figure_handle() #grid = vis_weights.vis_grid_img(weight_dict['W1'].transpose(0, 2, 3, 1)) #ax.imshow(grid) #fig.set_size_inches(5,5) # save the data solver_file = "examples/conv_solver_%d_epochs.pkl" % num_epochs conv_solver.save(solver_file) # The training loss, accuracy, etc tfig, tax = get_figure_handles() solver_dict = {'convnet': conv_solver} plot_test_result(tax, solver_dict, num_epochs=num_epochs) plt.show() print("done")