Example #1
0
    parameters['learning_rate'] = 0.0001  # was 0.001
    parameters['batch_size'] = 32  # was 1000
    parameters['epochs'] = 200  # was 10
    parameters['ensemble_nets'] = max_nets
    parameters['network_shape'] = network_shape

    return parameters


#------------------------------------------------------------------------------

# -- Load dataset --
(x_train, y_train), (x_test, y_test) = load_data(42, 0.1, 'cifar10')
y_train = y_train.reshape(y_train.shape[0])
y_test = y_test.reshape(y_test.shape[0])
network_shape = get_network_shape('cifar10')

# Create validation set
(x_train, y_train), (x_val,
                     y_val) = create_validation_set(x_train, y_train, 10000)

# -- Create and train ensemble model --
num_nets_teacher = 1
teacher_parameters = initialize_teacher_parameters(network_shape,
                                                   num_nets_teacher)
teacher = t.ClassificationTeacherModel(teacher_parameters)
teacher_history = teacher.train(x_train, y_train, x_val, y_val)

plt.figure(1)
plt.plot(teacher_history.history['loss'])
plt.plot(teacher_history.history['val_loss'])
Example #2
0
def initialize_student_parameters(teacher_parameters):
    parameters = {}
    parameters['temperature']   = 1
    parameters['learning_rate'] = 0.01  # was 0.001  , these gave 2.88 val nll
    parameters['batch_size']    = 100    # was 32
    parameters['epochs']        = 40   # was 1000
    # 0.1, 32, 1000 => 2.15 (converged a lot faster than 1000)
    # 0.5, 32, 1000 => 2.01?
    # 0.1, 1, 40    => 2.01 too, bit noisy though
    parameters['network_shape'] = network_shape

    return parameters

dataset_name = 'kin8nm'

network_shape = get_network_shape(dataset_name)
teacher_parameters = initialize_teacher_parameters(network_shape)
M = teacher_parameters['ensemble_nets']
student_parameters = initialize_student_parameters(teacher_parameters)

#TODO: Potentially do another optimization with bs=1 but not as high epochs
batch_sizes = [64, 128, 256, 512, 1024]
loss_weights = [0.1, 0.5, 0.9]
# Lists to store the parameters and their nll
nlls = list()
parameters = list()

# Optimization
num_subiterations = 5
num_parameters = 40
# Get all the 5 different datasets for the inner loop
    # 0.05, 20, 2000 gave 4.41 val_NLL_gaussians
    # 0.1, 20, 20000 gave 4.04
    # 0.01, 1, 2000, 0.0 => 4.44
    # 0.01, 1, 2000, 0.05 => 4.37
    # 0.01, 1, 2000, 0.1 => 5.22
    # 0.01, 1, 2000, 0.5 => 5.55
    # 0.01, 1, 2000, 0.9 => 8.22
    return parameters


# --- Create training data for Toy example ---
# seed = 6 is pretty good
(x_train, y_train), (x_test, y_test) = load_data(seed=7,
                                                 test_split=0.1,
                                                 name='toy')
network_shape = get_network_shape('toy')

# --- Normalize training and true input values ---
x_true = np.arange(-20.0, 20.0, 0.01)
y_true = np.power(x_true, 3)
(x_train_n, y_train), (x_true_n, y_true) = normalize(x_train, y_train, x_true,
                                                     y_true)

# --- Train teacher ---
teacher_parameters = initialize_teacher_parameters(network_shape)
teacher = t.RegressionTeacherModel(teacher_parameters)
teacher_history = teacher.train(x_train_n, y_train, x_test, y_test)

#np.random.seed(42)
#rn.seed(12345)