예제 #1
0
def run_training():
    x_train, y_train, x_val, y_val, x_test, y_test = load_data_wrapper()

    learning_rate = 1e-1
    batch_size = 50
    max_epochs = 8

    mlp_model = MLP(input_dim=784,
                    output_dim=10,
                    hidden_dims=[30],
                    activation_functions=[sigmoid],
                    init_parameters_sd=1,
                    optimizer=SGD(learning_rate=learning_rate))

    print(mlp_model)

    train_model(mlp_model,
                x_train,
                y_train,
                lr=learning_rate,
                batch_size=batch_size,
                max_epochs=max_epochs,
                x_val=x_val,
                y_val=y_val,
                plot=True)
예제 #2
0
def run_training_and_evaluation():
    x_train, y_train, x_val, y_val, x_test, y_test = load_data_wrapper()

    hidden_dims = [100]
    activation_functions = [sigmoid, sigmoid]
    init_parameters_sd = 1
    learning_rate = 2e-1
    batch_size = 50
    max_epochs = 20

    mlp_model = MLP(input_dim=784,
                    output_dim=10,
                    hidden_dims=hidden_dims,
                    activation_functions=activation_functions,
                    init_parameters_sd=init_parameters_sd,
                    optimizer=SGD(learning_rate=learning_rate))
    print(mlp_model)

    train_model(mlp_model,
                x_train,
                y_train,
                batch_size=batch_size,
                max_epochs=max_epochs,
                x_val=x_val,
                y_val=y_val,
                plot=True,
                early_stop=True,
                patience=2)

    file_name = f'mlp_model_{hidden_dims}_sd={init_parameters_sd}' + \
                f'_lr={learning_rate}_b={batch_size}_{datetime.now().strftime("%m-%d-%Y_%H.%M")}.pkl'
    mlp_model.save_model(file_name)
    evaluate_model(mlp_model, x_test, y_test)
예제 #3
0
def get_results_for_cnn(x_train: np.ndarray, x_val: np.ndarray,
                        y_train: np.ndarray, y_val: np.ndarray,
                        kernel_size: int, simulation_i: int) -> Tuple:
    print(
        f'\n{datetime.now().strftime("%m-%d-%Y_%H.%M")} Model: CNN k={kernel_size}'
        + f' simulation {simulation_i + 1}/{simulation_number}')

    x_train = np.array([np.reshape(x, (28, 28)) for x in x_train])
    x_val = np.array([np.reshape(x, (28, 28)) for x in x_val])

    output_feature_map_dim = math.floor((28 - kernel_size + 2 * padding) /
                                        stride + 1)
    if max_pooling:
        output_feature_map_dim = math.floor(output_feature_map_dim / 2)

    conv_net = ConvolutionalNet(input_dim=(28, 28),
                                kernel_number=kernel_number,
                                kernel_size=kernel_size,
                                fc_input_dim=kernel_number *
                                output_feature_map_dim**2,
                                output_dim=10,
                                hidden_dims=[128],
                                activation_functions=[fc_act_function],
                                optimizer=Adam(learning_rate=learning_rate),
                                initializer=HeInitializer())

    sim_overall_epoch_num, sim_training_losses, sim_validation_losses, sim_validation_accuracies = \
        train_model(
            conv_net, x_train, y_train,
            batch_size=batch_size, max_epochs=max_epochs,
            x_val=x_val, y_val=y_val, plot=False
        )

    return sim_overall_epoch_num, sim_training_losses, sim_validation_losses, sim_validation_accuracies
def get_results_for_initializer(initializer_name: str, x_train: np.ndarray, x_val: np.ndarray, y_train: np.ndarray,
                                y_val: np.ndarray) -> Dict:
    epochs_num = []
    training_losses = []
    validation_losses = []
    validation_accuracies = []

    for i in range(simulation_number):
        print(f'\n{datetime.now().strftime("%m-%d-%Y_%H.%M")} Initializer : {initializer_name}' +
              f' simulation {i + 1}/{simulation_number}')

        initializer = _get_initializer_by_name(initializer_name)

        mlp_model = MLP(
            input_dim=784, output_dim=10, hidden_dims=hidden_dims,
            activation_functions=[act_function],
            optimizer=optimizer,
            initializer=initializer
        )

        sim_overall_epoch_num, sim_training_losses, sim_validation_losses, sim_validation_accuracies = \
            train_model(
                mlp_model, x_train, y_train,
                batch_size=batch_size, max_epochs=max_epochs,
                x_val=x_val, y_val=y_val, plot=False
            )

        epochs_num.append(sim_overall_epoch_num)
        training_losses.append(sim_training_losses)
        validation_losses.append(sim_validation_losses)
        validation_accuracies.append(sim_validation_accuracies)

    return {'epochs': epochs_num, 'train_losses': training_losses,
            'val_losses': validation_losses, 'val_acc': validation_accuracies,
            'optimizer': optimizer}
def analyze_activation_functions():
    x_train, y_train, x_val, y_val, x_test, y_test = load_data_wrapper()

    simulation_number = 5

    max_epochs = 7
    batch_size = 50
    weight_sd = 1.0
    learning_rate = 1e-1

    act_functions = [sigmoid, relu]
    act_functions_names = ['sigmoid', 'relu']

    training_data_dictionary = {}

    for act_fn, act_fn_name in zip(act_functions, act_functions_names):
        epochs_num = []
        training_losses = []
        validation_losses = []
        validation_accuracies = []

        for i in range(simulation_number):
            print(
                f'\nActivation function : {act_fn_name}, simulation {i + 1}/{simulation_number}'
            )
            mlp_model = MLP(input_dim=784,
                            output_dim=10,
                            hidden_dims=[30],
                            activation_functions=[act_fn],
                            init_parameters_sd=weight_sd,
                            optimizer=SGD(learning_rate=learning_rate))

            sim_overall_epoch_num, sim_training_losses, sim_validation_losses, sim_validation_accuracies = \
                train_model(
                    mlp_model, x_train, y_train,
                    batch_size=batch_size, max_epochs=max_epochs,
                    x_val=x_val, y_val=y_val, plot=False
                )

            epochs_num.append(sim_overall_epoch_num)
            training_losses.append(sim_training_losses)
            validation_losses.append(sim_validation_losses)
            validation_accuracies.append(sim_validation_accuracies)

        training_data_dictionary[act_fn_name] = {
            'epochs': epochs_num,
            'train_losses': training_losses,
            'val_losses': validation_losses,
            'val_acc': validation_accuracies
        }

    file_name = f'act_functions_analysis_data_{act_functions_names}_{datetime.now().strftime("%m-%d-%Y_%H.%M")}.pkl'
    with open(file_name, 'wb') as handle:
        pkl.dump(training_data_dictionary,
                 handle,
                 protocol=pkl.HIGHEST_PROTOCOL)

    plot_losses_results(training_data_dictionary)
    plot_accuracies_results(training_data_dictionary)
예제 #6
0
def analyze_number_of_neurons():
    x_train, y_train, x_val, y_val, x_test, y_test = load_data_wrapper()

    simulation_number = 5

    learning_rate = 1e-1
    batch_size = 50
    max_epochs = 7

    hidden_neurons_numbers = [30, 100, 300, 500]

    training_data_dictionary = {}

    for neurons_number in hidden_neurons_numbers:
        epochs_num = []
        training_losses = []
        validation_losses = []
        validation_accuracies = []

        for i in range(simulation_number):
            print(f'\nHidden neurons: {neurons_number}, simulation {i + 1}/{simulation_number}')
            mlp_model = MLP(
                input_dim=784, output_dim=10, hidden_dims=[neurons_number],
                activation_functions=[sigmoid],
                init_parameters_sd=1,
                optimizer=SGD(learning_rate=learning_rate)
            )

            sim_overall_epoch_num, sim_training_losses, sim_validation_losses, sim_validation_accuracies = \
                train_model(
                    mlp_model, x_train, y_train,
                    batch_size=batch_size, max_epochs=max_epochs,
                    x_val=x_val, y_val=y_val, plot=False
                )

            epochs_num.append(sim_overall_epoch_num)
            training_losses.append(sim_training_losses)
            validation_losses.append(sim_validation_losses)
            validation_accuracies.append(sim_validation_accuracies)

        training_data_dictionary[
            neurons_number] = {'epochs': epochs_num, 'train_losses': training_losses,
                               'val_losses': validation_losses, 'val_acc': validation_accuracies}

    file_name = f'neuron_numbers_analysis_data_{hidden_neurons_numbers}_{datetime.now().strftime("%m-%d-%Y_%H.%M")}.pkl'
    with open(file_name, 'wb') as handle:
        pkl.dump(training_data_dictionary, handle, protocol=pkl.HIGHEST_PROTOCOL)

    plot_losses_results(training_data_dictionary)
    plot_accuracies_results(training_data_dictionary)
예제 #7
0
def get_results_for_mlp(x_train: np.ndarray, x_val: np.ndarray,
                        y_train: np.ndarray, y_val: np.ndarray,
                        simulation_i: int) -> Tuple:
    print(f'\n{datetime.now().strftime("%m-%d-%Y_%H.%M")} Model: MLP' +
          f' simulation {simulation_i + 1}/{simulation_number}')

    mlp_model = MLP(input_dim=784,
                    output_dim=10,
                    hidden_dims=mlp_hidden_dims,
                    activation_functions=[fc_act_function],
                    optimizer=Adam(learning_rate=learning_rate),
                    initializer=HeInitializer())

    sim_overall_epoch_num, sim_training_losses, sim_validation_losses, sim_validation_accuracies = \
        train_model(
            mlp_model, x_train, y_train,
            batch_size=batch_size, max_epochs=max_epochs,
            x_val=x_val, y_val=y_val, plot=False
        )

    return sim_overall_epoch_num, sim_training_losses, sim_validation_losses, sim_validation_accuracies
예제 #8
0
def run_training():
    x_train, y_train, x_val, y_val, x_test, y_test = load_data_wrapper()

    x_train = np.array([np.reshape(x, (28, 28)) for x in x_train])
    x_val = np.array([np.reshape(x, (28, 28)) for x in x_val])
    x_test = np.array([np.reshape(x, (28, 28)) for x in x_test])

    # x_train = x_train[:5000]
    # y_train = y_train[:5000]
    #
    x_val = x_val[:500]
    y_val = y_val[:500]

    learning_rate = 5e-3
    batch_size = 50
    max_epochs = 7
    kernel_number = 4
    kernel_size = 5
    padding = 1
    stride = 1
    max_pooling = True

    output_feature_map_dim = math.floor((28 - kernel_size + 2 * padding) /
                                        stride + 1)
    if max_pooling:
        output_feature_map_dim = math.floor(output_feature_map_dim / 2)

    conv_net = ConvolutionalNet(input_dim=(28, 28),
                                kernel_number=kernel_number,
                                kernel_size=kernel_size,
                                fc_input_dim=kernel_number *
                                output_feature_map_dim**2,
                                output_dim=10,
                                hidden_dims=[128],
                                activation_functions=[relu],
                                optimizer=Adam(learning_rate=learning_rate),
                                initializer=HeInitializer())

    print(conv_net)

    index = 1
    x, y = x_test[index, :], y_test[index, :]
    y_hat = conv_net(x)
    print(f'y_real:\n{y}')
    print('Before learning')
    print(f'\ny_hat:\n{y_hat}')

    train_model(conv_net,
                x_train,
                y_train,
                batch_size=batch_size,
                max_epochs=max_epochs,
                x_val=x_val,
                y_val=y_val,
                plot=True)

    y_hat = conv_net(x)
    print(f'y_real:\n{y}')
    print('After learning')
    print(f'\ny_hat:\n{y_hat}')

    evaluate_model(conv_net, x_test, y_test)