예제 #1
0
def modeler(params):
    params['maximizer'] = 'loss'
    models = []
    losses = []

    # callback function to store lowest loss value
    params['es'] = EarlyStopping(
        monitor=params['maximizer'],
        mode='min',
        verbose=1,
        patience=5)

    i = 0

    # get the first 3 layer model
    model = get_keras_model_reg(params['data'], i)
    history = model.fit(
        params['X_train'],
        params['y_train'],
        epochs=5,
        validation_data=(
            params['X_test'],
            params['y_test']),
        callbacks=[params['es']])
    models.append(history)

    losses.append(models[i].history[params['maximizer']]
                  [len(models[i].history[params['maximizer']]) - 1])

    # keeps running model and fit functions until the validation loss stops
    # decreasing

    while(all(x > y for x, y in zip(losses, losses[1:]))):
        model = get_keras_model_reg(params['data'], i)
        history = model.fit(
            params['X_train'],
            params['y_train'],
            epochs=5,
            validation_data=(
                params['X_test'],
                params['y_test']),
            callbacks=[params['es']])
        models.append(history)
        losses.append(models[i].history[params['maximizer']]
                      [len(models[i].history[params['maximizer']]) - 1])
        i += 1

    params['models'] = models
    params['losses'] = losses
예제 #2
0
def regression_ann(instruction,
                   callback=False,
                   ca_threshold=None,
                   text=[],
                   dataset=None,
                   drop=None,
                   preprocess=True,
                   test_size=0.2,
                   random_state=49,
                   epochs=50,
                   generate_plots=True,
                   callback_mode='min',
                   maximizer="val_loss",
                   save_model=False,
                   save_path=os.getcwd(),
                   add_layer={}):
    '''
    Body of the regression function used that is called in the neural network query
    if the data is numerical.
    :param many parameters: used to preprocess, tune, plot generation, and parameterizing the neural network trained.
    :return dictionary that holds all the information for the finished model.
    '''

    if dataset is None:
        dataReader = DataReader(get_file())
    else:
        dataReader = DataReader(dataset)
    logger("Reading in dataset")
    data = dataReader.data_generator()
    # data = pd.read_csv(self.dataset)

    if drop is not None:
        data.drop(drop, axis=1, inplace=True)
    data, y, target, full_pipeline = initial_preprocessor(
        data,
        instruction,
        preprocess,
        ca_threshold,
        text,
        test_size=test_size,
        random_state=random_state)
    logger("->", "Target column found: {}".format(target))

    X_train = data['train']
    X_test = data['test']

    # Target scaling
    target_scaler = StandardScaler()

    y_train = target_scaler.fit_transform(np.array(y['train']).reshape(-1, 1))
    y_test = target_scaler.transform(np.array(y['test']).reshape(-1, 1))

    logger("Establishing callback function")

    models = []
    losses = []
    model_data = []

    # callback function to store lowest loss value
    es = EarlyStopping(monitor=maximizer,
                       mode=callback_mode,
                       verbose=0,
                       patience=5)

    callback_value = None
    if callback is not False:
        callback_value = [es]

    i = 0

    # add_layer format: {<object> : list of indexs}
    # get the first 3 layer model
    model = get_keras_model_reg(data, i, add_layer)

    logger("Training initial model")
    history = model.fit(X_train,
                        y_train,
                        epochs=epochs,
                        validation_data=(X_test, y_test),
                        callbacks=callback_value,
                        verbose=0)
    models.append(history)
    model_data.append(model)

    col_name = [[
        "Initial number of layers ", "| Training Loss ", "| Test Loss "
    ]]
    col_width = max(len(word) for row in col_name for word in row) + 2
    for row in col_name:
        print((" " * 2 * counter) + "| " +
              ("".join(word.ljust(col_width) for word in row)) + " |")
    values = []
    values.append(str(len(model.layers)))
    values.append(
        "| " +
        str(history.history['loss'][len(history.history['val_loss']) - 1]))
    values.append(
        "| " +
        str(history.history['val_loss'][len(history.history['val_loss']) - 1]))
    datax = []
    datax.append(values)
    for row in datax:
        print((" " * 2 * counter) + "| " +
              ("".join(word.ljust(col_width) for word in row)) + " |")

    losses.append(history.history[maximizer][len(history.history[maximizer]) -
                                             1])

    # keeps running model and fit functions until the validation loss stops
    # decreasing
    logger("Testing number of layers")
    col_name = [["Current number of layers", "| Training Loss", "| Test Loss"]]
    col_width = max(len(word) for row in col_name for word in row) + 2
    for row in col_name:
        print((" " * 2 * counter) + "| " +
              ("".join(word.ljust(col_width) for word in row)) + " |")
    datax = []
    # while all(x > y for x, y in zip(losses, losses[1:])):
    while (len(losses) <= 2
           or losses[len(losses) - 1] < losses[len(losses) - 2]):
        model = get_keras_model_reg(data, i, add_layer)
        history = model.fit(X_train,
                            y_train,
                            callbacks=callback_value,
                            epochs=epochs,
                            validation_data=(X_test, y_test),
                            verbose=0)
        model_data.append(model)
        models.append(history)

        values = []
        datax = []
        values.append(str(len(model.layers)))
        values.append(
            "| " +
            str(history.history['loss'][len(history.history['val_loss']) - 1]))
        values.append("| " + str(history.history['val_loss'][
            len(history.history['val_loss']) - 1]))
        datax.append(values)
        for row in datax:
            print((" " * 2 * counter) + "| " +
                  ("".join(word.ljust(col_width) for word in row)) + " |")
        del values, datax
        losses.append(
            history.history[maximizer][len(history.history[maximizer]) - 1])
        i += 1
    # print((" " * 2 * counter)+ tabulate(datax, headers=col_name, tablefmt='orgtbl'))
    final_model = model_data[losses.index(min(losses))]
    final_hist = models[losses.index(min(losses))]
    print("")
    logger('->',
           "Best number of layers found: " + str(len(final_model.layers)))

    logger(
        '->', "Training Loss: " +
        str(final_hist.history['loss'][len(final_hist.history['val_loss']) -
                                       1]))
    logger(
        '->', "Test Loss: " +
        str(final_hist.history['val_loss'][len(final_hist.history['val_loss'])
                                           - 1]))

    # calls function to generate plots in plot generation
    plots = {}
    if generate_plots:
        init_plots, plot_names = generate_regression_plots(
            models[len(models) - 1], data, y)
        for x in range(len(plot_names)):
            plots[str(plot_names[x])] = init_plots[x]

    if save_model:
        save(final_model, save_model, save_path)
    # stores values in the client object models dictionary field
    print("")
    logger("Stored model under 'regression_ANN' key")
    clearLog()

    K.clear_session()

    return {
        'id': generate_id(),
        'model': final_model,
        "target": target,
        "num_classes": 1,
        "plots": plots,
        "preprocessor": full_pipeline,
        "interpreter": target_scaler,
        'test_data': {
            'X': X_test,
            'y': y_test
        },
        'losses': {
            'training_loss': final_hist.history['loss'],
            'val_loss': final_hist.history['val_loss']
        }
    }
예제 #3
0
def regression_ann(instruction,
                   ca_threshold=None,
                   text=None,
                   dataset=None,
                   drop=None,
                   preprocess=True,
                   test_size=0.2,
                   random_state=49,
                   epochs=50,
                   generate_plots=True,
                   callback_mode='min',
                   maximizer="val_loss",
                   save_model=True,
                   save_path=os.getcwd()):

    global currLog
    logger("reading in dataset...")

    dataReader = DataReader(dataset)
    data = dataReader.data_generator()
    # data = pd.read_csv(self.dataset)

    if drop is not None:
        data.drop(drop, axis=1, inplace=True)

    data, y, target, full_pipeline = initial_preprocesser(
        data, instruction, preprocess, ca_threshold, text)
    logger("->", "Target Column Found: {}".format(target))

    X_train = data['train']
    X_test = data['test']

    # Target scaling
    target_scaler = StandardScaler()

    y_train = target_scaler.fit_transform(np.array(y['train']).reshape(-1, 1))
    y_test = target_scaler.transform(np.array(y['test']).reshape(-1, 1))

    logger("Establishing callback function...")

    models = []
    losses = []
    model_data = []

    # callback function to store lowest loss value
    es = EarlyStopping(monitor=maximizer,
                       mode=callback_mode,
                       verbose=0,
                       patience=5)

    i = 0

    # get the first 3 layer model
    model = get_keras_model_reg(data, i)

    logger("Training initial model...")
    history = model.fit(X_train,
                        y_train,
                        epochs=epochs,
                        validation_data=(X_test, y_test),
                        callbacks=[es],
                        verbose=0)
    models.append(history)
    model_data.append(model)

    col_name = [[
        "Initial number of layers ", "| Training Loss ", "| Test Loss "
    ]]
    col_width = max(len(word) for row in col_name for word in row) + 2
    for row in col_name:
        print((" " * 2 * counter) + "| " +
              ("".join(word.ljust(col_width) for word in row)) + " |")
    values = []
    values.append(str(len(model.layers)))
    values.append(
        "| " +
        str(history.history['loss'][len(history.history['val_loss']) - 1]))
    values.append(
        "| " +
        str(history.history['val_loss'][len(history.history['val_loss']) - 1]))
    datax = []
    datax.append(values)
    for row in datax:
        print((" " * 2 * counter) + "| " +
              ("".join(word.ljust(col_width) for word in row)) + " |")

    losses.append(history.history[maximizer][len(history.history[maximizer]) -
                                             1])

    # keeps running model and fit functions until the validation loss stops
    # decreasing
    logger("Testing number of layers...")
    print(currLog)
    col_name = [["Current number of layers", "| Training Loss", "| Test Loss"]]
    col_width = max(len(word) for row in col_name for word in row) + 2
    for row in col_name:
        print((" " * 2 * counter) + "| " +
              ("".join(word.ljust(col_width) for word in row)) + " |")
    datax = []
    while (all(x > y for x, y in zip(losses, losses[1:]))):
        model = get_keras_model_reg(data, i)
        history = model.fit(X_train,
                            y_train,
                            epochs=epochs,
                            validation_data=(X_test, y_test),
                            verbose=0)
        model_data.append(model)
        models.append(history)

        values = []
        datax = []
        values.append(str(len(model.layers)))
        values.append(
            "| " +
            str(history.history['loss'][len(history.history['val_loss']) - 1]))
        values.append("| " + str(history.history['val_loss'][
            len(history.history['val_loss']) - 1]))
        datax.append(values)
        for row in datax:
            print((" " * 2 * counter) + "| " +
                  ("".join(word.ljust(col_width) for word in row)) + " |")
        del values, datax
        losses.append(
            history.history[maximizer][len(history.history[maximizer]) - 1])
        i += 1
    #print((" " * 2 * counter)+ tabulate(datax, headers=col_name, tablefmt='orgtbl'))
    final_model = model_data[losses.index(min(losses))]
    final_hist = models[losses.index(min(losses))]
    print("")
    logger('->',
           "Best number of layers found: " + str(len(final_model.layers)))

    logger(
        '->', "Training Loss: " +
        str(final_hist.history['loss'][len(final_hist.history['val_loss']) -
                                       1]))
    logger(
        '->', "Test Loss: " +
        str(final_hist.history['val_loss'][len(final_hist.history['val_loss'])
                                           - 1]))

    # calls function to generate plots in plot generation
    if generate_plots:
        init_plots, plot_names = generate_regression_plots(
            models[len(models) - 1], data, y)
        plots = {}
        for x in range(len(plot_names)):
            plots[str(plot_names[x])] = init_plots[x]

    if save_model:
        save(final_model, save_model)
    # stores values in the client object models dictionary field
    print("")
    logger("Stored model under 'regression_ANN' key")
    return {
        'id': generate_id(),
        'model': final_model,
        "target": target,
        "plots": plots,
        "preprocesser": full_pipeline,
        "interpreter": target_scaler,
        'losses': {
            'training_loss': final_hist.history['loss'],
            'val_loss': final_hist.history['val_loss']
        }
    }