def snn_dropout_mnist(new_dir=os.getcwd()):

    os.chdir(new_dir)

    from artificial_neural_networks.code.utils.download_mnist import download_mnist

    # SETTINGS
    parser = argparse.ArgumentParser()

    # General settings
    parser.add_argument('--verbose', type=int, default=1)
    parser.add_argument('--reproducible', type=bool, default=True)
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--plot', type=bool, default=False)

    # Settings for preprocessing and hyperparameters
    parser.add_argument('--scaling_factor', type=float, default=(1 / 255))
    parser.add_argument('--translation', type=float, default=0)
    parser.add_argument('--same_size', type=bool, default=True)
    parser.add_argument('--n_layers', type=int, default=20)
    parser.add_argument('--layer_size', type=int, default=512)
    parser.add_argument('--explicit_layer_sizes',
                        nargs='*',
                        type=int,
                        default=[512, 512])
    parser.add_argument('--n_epochs', type=int, default=2)
    parser.add_argument('--batch_size', type=none_or_int, default=128)
    parser.add_argument('--optimizer', type=str, default='RMSprop')
    parser.add_argument('--lrearning_rate', type=float, default=1e-3)
    parser.add_argument('--epsilon', type=none_or_float, default=None)
    parser.add_argument('--dropout_rate_input', type=int, default=0.1)
    parser.add_argument('--dropout_rate_hidden', type=int, default=0.2)

    # Settings for saving the model
    parser.add_argument('--save_architecture', type=bool, default=True)
    parser.add_argument('--save_last_weights', type=bool, default=True)
    parser.add_argument('--save_last_model', type=bool, default=True)
    parser.add_argument('--save_models', type=bool, default=False)
    parser.add_argument('--save_weights_only', type=bool, default=False)
    parser.add_argument('--save_best', type=bool, default=False)

    args = parser.parse_args()

    if (args.verbose > 0):
        print(args)

    # For reproducibility
    if (args.reproducible):
        os.environ['PYTHONHASHSEED'] = '0'
        np.random.seed(args.seed)
        rn.seed(args.seed)
        tf.set_random_seed(args.seed)

    # %%
    # Load the MNIST dataset

    mnist_path = download_mnist()
    mnist = np.load(mnist_path)
    train_x = mnist['x_train'].astype(np.float32)
    train_y = mnist['y_train'].astype(np.int32)
    test_x = mnist['x_test'].astype(np.float32)
    test_y = mnist['y_test'].astype(np.int32)
    mnist.close()

    # %%
    # PREPROCESSING STEP

    scaling_factor = args.scaling_factor
    translation = args.translation

    img_width = train_x.shape[1]
    img_height = train_x.shape[2]

    n_train = train_x.shape[0]  # number of training examples/samples
    n_test = test_x.shape[0]  # number of test examples/samples

    n_in = img_width * img_height  # number of features / dimensions
    n_out = np.unique(train_y).shape[0]  # number of classes/labels

    # Reshape training and test sets
    train_x = train_x.reshape(n_train, n_in)
    test_x = test_x.reshape(n_test, n_in)

    # Apply preprocessing
    train_x = scaling_factor * (train_x - translation)
    test_x = scaling_factor * (test_x - translation)

    one_hot = False  # It works exactly the same for both True and False

    # Convert class vectors to binary class matrices (i.e. One hot encoding)
    if (one_hot):
        train_y = to_categorical(train_y, n_out)
        test_y = to_categorical(test_y, n_out)

    # %%
    # Model hyperparameters

    N = []
    N.append(n_in)  # input layer
    if (args.same_size):
        n_layers = args.n_layers
        for i in range(n_layers):
            N.append(args.layer_size)  # hidden layer i
    else:
        n_layers = len(args.explicit_layer_sizes)
        for i in range(n_layers):
            N.append(args.explicit_layer_sizes[i])  # hidden layer i
    N.append(n_out)  # output layer

    # ANN Architecture
    L = len(N) - 1

    x = Input(shape=(n_in, ))  # input layer
    h = Dropout(rate=args.dropout_rate_input)(x)

    for i in range(1, L):
        h = Dense(units=N[i], activation='relu')(h)  # hidden layer i
        h = Dropout(rate=args.dropout_rate_hidden)(h)
    out = Dense(units=n_out, activation='softmax')(h)  # output layer

    model = Model(inputs=x, outputs=out)

    if (args.verbose > 0):
        model.summary()

    if (one_hot):
        loss_function = 'categorical_crossentropy'
    else:
        loss_function = 'sparse_categorical_crossentropy'

    metrics = ['accuracy']

    lr = args.lrearning_rate
    epsilon = args.epsilon
    optimizer_selection = {
        'Adadelta':
        optimizers.Adadelta(lr=lr, rho=0.95, epsilon=epsilon, decay=0.0),
        'Adagrad':
        optimizers.Adagrad(lr=lr, epsilon=epsilon, decay=0.0),
        'Adam':
        optimizers.Adam(lr=lr,
                        beta_1=0.9,
                        beta_2=0.999,
                        epsilon=epsilon,
                        decay=0.0,
                        amsgrad=False),
        'Adamax':
        optimizers.Adamax(lr=lr,
                          beta_1=0.9,
                          beta_2=0.999,
                          epsilon=epsilon,
                          decay=0.0),
        'Nadam':
        optimizers.Nadam(lr=lr,
                         beta_1=0.9,
                         beta_2=0.999,
                         epsilon=epsilon,
                         schedule_decay=0.004),
        'RMSprop':
        optimizers.RMSprop(lr=lr, rho=0.9, epsilon=epsilon, decay=0.0),
        'SGD':
        optimizers.SGD(lr=lr, momentum=0.0, decay=0.0, nesterov=False)
    }

    optimizer = optimizer_selection[args.optimizer]

    model.compile(optimizer=optimizer, loss=loss_function, metrics=metrics)

    # %%
    # Save trained models for every epoch

    models_path = r'artificial_neural_networks/trained_models/'
    model_name = 'mnist_snn_dropout'
    weights_path = models_path + model_name + '_weights'
    model_path = models_path + model_name + '_model'
    file_suffix = '_{epoch:04d}_{val_acc:.4f}_{val_loss:.4f}'

    if (args.save_weights_only):
        file_path = weights_path
    else:
        file_path = model_path

    file_path += file_suffix

    # monitor='val_loss'
    monitor = 'val_acc'

    if (args.save_models):
        checkpoint = ModelCheckpoint(file_path + '.h5',
                                     monitor=monitor,
                                     verbose=args.verbose,
                                     save_best_only=args.save_best_only,
                                     mode='auto',
                                     save_weights_only=args.save_weights_only)
        callbacks = [checkpoint]
    else:
        callbacks = []

    # %%
    # TRAINING PHASE

    model_history = model.fit(x=train_x,
                              y=train_y,
                              validation_data=(test_x, test_y),
                              batch_size=args.batch_size,
                              epochs=args.n_epochs,
                              verbose=args.verbose,
                              callbacks=callbacks)

    # %%
    # TESTING PHASE

    train_y_pred = np.argmax(model.predict(train_x), axis=1)
    test_y_pred = np.argmax(model.predict(test_x), axis=1)

    train_score = model.evaluate(x=test_x, y=test_y, verbose=args.verbose)
    train_dict = {'val_loss': train_score[0], 'val_acc': train_score[1]}

    test_score = model.evaluate(x=test_x, y=test_y, verbose=args.verbose)
    test_dict = {'val_loss': test_score[0], 'val_acc': test_score[1]}

    if (args.verbose > 0):
        print('Train loss:', train_dict['val_loss'])
        print('Train accuracy:', train_dict['val_acc'])

        print('Test loss:', test_dict['val_loss'])
        print('Test accuracy:', test_dict['val_acc'])

    # %%
    # Data Visualization

    def plot_confusion_matrix(cm,
                              classes,
                              title='Confusion matrix',
                              cmap=plt.cm.Blues):

        plt.imshow(cm, interpolation='nearest', cmap=cmap)
        plt.title(title)
        plt.colorbar()
        tick_marks = np.arange(len(classes))
        plt.xticks(tick_marks, classes)
        plt.yticks(tick_marks, classes)

        fmt = 'd'
        thresh = cm.max() / 2.
        for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
            plt.text(j,
                     i,
                     format(cm[i, j], fmt),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")

        plt.ylabel('Actual label')
        plt.xlabel('Predicted label')
        plt.tight_layout()
        plt.show()

    if (args.plot):
        train_cm = confusion_matrix(train_y, train_y_pred)
        test_cm = confusion_matrix(test_y, test_y_pred)

        classes = list(range(n_out))

        plot_confusion_matrix(train_cm,
                              classes=classes,
                              title='Confusion matrix for training set')
        plot_confusion_matrix(test_cm,
                              classes=classes,
                              title='Confusion matrix for test set')

    # %%
    # Save the architecture and the lastly trained model

    architecture_path = models_path + model_name + '_architecture'

    last_suffix = file_suffix.format(epoch=args.n_epochs,
                                     val_acc=test_dict['val_acc'],
                                     val_loss=test_dict['val_loss'])

    if (args.save_architecture):
        # Save only the archtitecture (as a JSON file)
        json_string = model.to_json()
        json.dump(json.loads(json_string),
                  open(architecture_path + '.json', "w"))

        # Save only the archtitecture (as a YAML file)
        yaml_string = model.to_yaml()
        yaml.dump(yaml.load(yaml_string), open(architecture_path + '.yml',
                                               "w"))

    # Save only the weights (as an HDF5 file)
    if (args.save_last_weights):
        model.save_weights(weights_path + last_suffix + '.h5')

    # Save the whole model (as an HDF5 file)
    if (args.save_last_model):
        model.save(model_path + last_suffix + '.h5')

    return model
Exemple #2
0
def cnn_dense_mnist(new_dir=os.getcwd()):
    """
    Main function
    """
    # %%
    # IMPORTS

    os.chdir(new_dir)

    # code repository sub-package imports
    from artificial_neural_networks.code.utils.download_mnist import download_mnist
    from artificial_neural_networks.code.utils.generic_utils import none_or_int, none_or_float, \
        save_classif_model
    from artificial_neural_networks.code.utils.vis_utils import plot_confusion_matrix, epoch_plot

    # %%
    # SETTINGS
    parser = argparse.ArgumentParser()

    # General settings
    parser.add_argument('--verbose', type=int, default=1)
    parser.add_argument('--reproducible', type=bool, default=True)
    parser.add_argument('--seed', type=int, default=0)
    parser.add_argument('--time_training', type=bool, default=True)
    parser.add_argument('--plot', type=bool, default=False)

    # Settings for preprocessing and hyperparameters
    parser.add_argument('--scaling_factor', type=float, default=(1 / 255))
    parser.add_argument('--translation', type=float, default=0)
    parser.add_argument('--same_size', type=bool, default=True)
    parser.add_argument('--n_layers', type=int, default=2)
    parser.add_argument('--layer_size', type=int, default=128)
    parser.add_argument('--explicit_layer_sizes',
                        nargs='*',
                        type=int,
                        default=[512, 512])
    parser.add_argument('--n_epochs', type=int, default=12)
    parser.add_argument('--batch_size', type=none_or_int, default=None)
    parser.add_argument('--optimizer', type=str, default='Adadelta')
    parser.add_argument('--lrearning_rate', type=float, default=1e0)
    parser.add_argument('--epsilon', type=none_or_float, default=None)

    # Settings for saving the model
    parser.add_argument('--save_architecture', type=bool, default=True)
    parser.add_argument('--save_last_weights', type=bool, default=True)
    parser.add_argument('--save_last_model', type=bool, default=True)
    parser.add_argument('--save_models', type=bool, default=False)
    parser.add_argument('--save_weights_only', type=bool, default=False)
    parser.add_argument('--save_best', type=bool, default=True)

    args = parser.parse_args()

    if args.verbose > 0:
        print(args)

    # For reproducibility
    if args.reproducible:
        os.environ['PYTHONHASHSEED'] = '0'
        np.random.seed(args.seed)
        rn.seed(args.seed)
        tf.set_random_seed(args.seed)
        sess = tf.Session(graph=tf.get_default_graph())
        K.set_session(sess)
        # print(hash("keras"))

    # %%
    # Load the MNIST dataset

    mnist_path = download_mnist()
    mnist = np.load(mnist_path)
    train_x = mnist['x_train'].astype(np.float32)
    train_y = mnist['y_train'].astype(np.int32)
    test_x = mnist['x_test'].astype(np.float32)
    test_y = mnist['y_test'].astype(np.int32)
    mnist.close()

    # %%
    # PREPROCESSING STEP

    scaling_factor = args.scaling_factor
    translation = args.translation

    img_width = train_x.shape[1]
    img_height = train_x.shape[2]

    n_train = train_x.shape[0]  # number of training examples/samples
    n_test = test_x.shape[0]  # number of test examples/samples

    n_in = img_width * img_height  # number of features / dimensions
    n_out = np.unique(train_y).shape[0]  # number of classes/labels

    # Reshape training and test sets
    train_x = train_x.reshape(n_train, img_width, img_height, 1)
    test_x = test_x.reshape(n_test, img_width, img_height, 1)

    # Apply preprocessing
    train_x = scaling_factor * (train_x - translation)
    test_x = scaling_factor * (test_x - translation)

    one_hot = False  # It works exactly the same for both True and False

    # Convert class vectors to binary class matrices (i.e. One hot encoding)
    if one_hot:
        train_y = to_categorical(train_y, n_out)
        test_y = to_categorical(test_y, n_out)

    # %%
    # Model hyperparameters and ANN Architecture

    N = []
    N.append(n_in)  # input layer
    if args.same_size:
        n_layers = args.n_layers
        for i in range(n_layers):
            N.append(args.layer_size)  # hidden layer i
    else:
        n_layers = len(args.explicit_layer_sizes)
        for i in range(n_layers):
            N.append(args.explicit_layer_sizes[i])  # hidden layer i
    N.append(n_out)  # output layer

    L = len(N) - 1

    x = Input(shape=(img_width, img_height, 1))  # input layer
    h = x

    h = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(h)
    h = MaxPooling2D(pool_size=(2, 2))(h)

    h = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(h)
    h = MaxPooling2D(pool_size=(2, 2))(h)

    h = Flatten()(h)

    for i in range(1, L):
        h = Dense(units=N[i], activation='relu')(h)  # hidden layer i

    out = Dense(units=n_out, activation='softmax')(h)  # output layer

    model = Model(inputs=x, outputs=out)

    if args.verbose > 0:
        model.summary()

    if one_hot:
        loss_function = 'categorical_crossentropy'
    else:
        loss_function = 'sparse_categorical_crossentropy'

    metrics = ['accuracy']

    lr = args.lrearning_rate
    epsilon = args.epsilon
    optimizer_selection = {
        'Adadelta':
        optimizers.Adadelta(lr=lr, rho=0.95, epsilon=epsilon, decay=0.0),
        'Adagrad':
        optimizers.Adagrad(lr=lr, epsilon=epsilon, decay=0.0),
        'Adam':
        optimizers.Adam(lr=lr,
                        beta_1=0.9,
                        beta_2=0.999,
                        epsilon=epsilon,
                        decay=0.0,
                        amsgrad=False),
        'Adamax':
        optimizers.Adamax(lr=lr,
                          beta_1=0.9,
                          beta_2=0.999,
                          epsilon=epsilon,
                          decay=0.0),
        'Nadam':
        optimizers.Nadam(lr=lr,
                         beta_1=0.9,
                         beta_2=0.999,
                         epsilon=epsilon,
                         schedule_decay=0.004),
        'RMSprop':
        optimizers.RMSprop(lr=lr, rho=0.9, epsilon=epsilon, decay=0.0),
        'SGD':
        optimizers.SGD(lr=lr, momentum=0.0, decay=0.0, nesterov=False)
    }

    optimizer = optimizer_selection[args.optimizer]

    model.compile(optimizer=optimizer, loss=loss_function, metrics=metrics)

    # %%
    # Save trained models for every epoch

    models_path = r'artificial_neural_networks/trained_models/'
    model_name = 'mnist_cnn_dense'
    weights_path = models_path + model_name + '_weights'
    model_path = models_path + model_name + '_model'
    file_suffix = '_{epoch:04d}_{val_acc:.4f}_{val_loss:.4f}'

    if args.save_weights_only:
        file_path = weights_path
    else:
        file_path = model_path

    file_path += file_suffix

    # monitor = 'val_loss'
    monitor = 'val_acc'

    if args.save_models:
        checkpoint = ModelCheckpoint(file_path + '.h5',
                                     monitor=monitor,
                                     verbose=args.verbose,
                                     save_best_only=args.save_best_only,
                                     mode='auto',
                                     save_weights_only=args.save_weights_only)
        callbacks = [checkpoint]
    else:
        callbacks = []

    # %%
    # TRAINING PHASE

    if args.time_training:
        start = timer()

    model_history = model.fit(x=train_x,
                              y=train_y,
                              validation_data=(test_x, test_y),
                              batch_size=args.batch_size,
                              epochs=args.n_epochs,
                              verbose=args.verbose,
                              callbacks=callbacks)

    if args.time_training:
        end = timer()
        duration = end - start
        print('Total time for training (in seconds):')
        print(duration)

    # %%
    # TESTING PHASE

    train_y_pred = np.argmax(model.predict(train_x), axis=1)
    test_y_pred = np.argmax(model.predict(test_x), axis=1)

    train_score = model.evaluate(x=train_x, y=train_y, verbose=args.verbose)
    train_dict = {'loss': train_score[0], 'acc': train_score[1]}

    test_score = model.evaluate(x=test_x, y=test_y, verbose=args.verbose)
    test_dict = {'val_loss': test_score[0], 'val_acc': test_score[1]}

    if args.verbose > 0:
        print('Train loss:', train_dict['loss'])
        print('Train accuracy:', train_dict['acc'])

        print('Test loss:', test_dict['val_loss'])
        print('Test accuracy:', test_dict['val_acc'])

    # %%
    # Data Visualization

    if args.plot:

        # Confusion matrices

        classes = list(range(n_out))

        train_cm = confusion_matrix(train_y, train_y_pred)
        plot_confusion_matrix(train_cm,
                              classes=classes,
                              title='Confusion matrix for training set')

        test_cm = confusion_matrix(test_y, test_y_pred)
        plot_confusion_matrix(test_cm,
                              classes=classes,
                              title='Confusion matrix for test set')

        # Loss vs epoch

        epoch_axis = range(1, args.n_epochs + 1)

        train_loss = model_history.history['loss']
        test_loss = model_history.history['val_loss']
        epoch_plot(epoch_axis, train_loss, test_loss, 'Loss')

        # Accuracy vs epoch

        train_acc = model_history.history['acc']
        test_acc = model_history.history['val_acc']
        epoch_plot(epoch_axis, train_acc, test_acc, 'Accuracy')

    # %%
    # Save the architecture and the lastly trained model

    save_classif_model(model, models_path, model_name, weights_path,
                       model_path, file_suffix, test_dict, args)

    # %%

    return model