parameters['learning_rate'] = 0.0001 # was 0.001 parameters['batch_size'] = 32 # was 1000 parameters['epochs'] = 200 # was 10 parameters['ensemble_nets'] = max_nets parameters['network_shape'] = network_shape return parameters #------------------------------------------------------------------------------ # -- Load dataset -- (x_train, y_train), (x_test, y_test) = load_data(42, 0.1, 'cifar10') y_train = y_train.reshape(y_train.shape[0]) y_test = y_test.reshape(y_test.shape[0]) network_shape = get_network_shape('cifar10') # Create validation set (x_train, y_train), (x_val, y_val) = create_validation_set(x_train, y_train, 10000) # -- Create and train ensemble model -- num_nets_teacher = 1 teacher_parameters = initialize_teacher_parameters(network_shape, num_nets_teacher) teacher = t.ClassificationTeacherModel(teacher_parameters) teacher_history = teacher.train(x_train, y_train, x_val, y_val) plt.figure(1) plt.plot(teacher_history.history['loss']) plt.plot(teacher_history.history['val_loss'])
def initialize_student_parameters(teacher_parameters): parameters = {} parameters['temperature'] = 1 parameters['learning_rate'] = 0.01 # was 0.001 , these gave 2.88 val nll parameters['batch_size'] = 100 # was 32 parameters['epochs'] = 40 # was 1000 # 0.1, 32, 1000 => 2.15 (converged a lot faster than 1000) # 0.5, 32, 1000 => 2.01? # 0.1, 1, 40 => 2.01 too, bit noisy though parameters['network_shape'] = network_shape return parameters dataset_name = 'kin8nm' network_shape = get_network_shape(dataset_name) teacher_parameters = initialize_teacher_parameters(network_shape) M = teacher_parameters['ensemble_nets'] student_parameters = initialize_student_parameters(teacher_parameters) #TODO: Potentially do another optimization with bs=1 but not as high epochs batch_sizes = [64, 128, 256, 512, 1024] loss_weights = [0.1, 0.5, 0.9] # Lists to store the parameters and their nll nlls = list() parameters = list() # Optimization num_subiterations = 5 num_parameters = 40 # Get all the 5 different datasets for the inner loop
# 0.05, 20, 2000 gave 4.41 val_NLL_gaussians # 0.1, 20, 20000 gave 4.04 # 0.01, 1, 2000, 0.0 => 4.44 # 0.01, 1, 2000, 0.05 => 4.37 # 0.01, 1, 2000, 0.1 => 5.22 # 0.01, 1, 2000, 0.5 => 5.55 # 0.01, 1, 2000, 0.9 => 8.22 return parameters # --- Create training data for Toy example --- # seed = 6 is pretty good (x_train, y_train), (x_test, y_test) = load_data(seed=7, test_split=0.1, name='toy') network_shape = get_network_shape('toy') # --- Normalize training and true input values --- x_true = np.arange(-20.0, 20.0, 0.01) y_true = np.power(x_true, 3) (x_train_n, y_train), (x_true_n, y_true) = normalize(x_train, y_train, x_true, y_true) # --- Train teacher --- teacher_parameters = initialize_teacher_parameters(network_shape) teacher = t.RegressionTeacherModel(teacher_parameters) teacher_history = teacher.train(x_train_n, y_train, x_test, y_test) #np.random.seed(42) #rn.seed(12345)