Esempio n. 1
0
print("a")
np.set_printoptions(suppress=True)
print("b")

xy, y_score = load_data(config["task"])
print("c1")

#y_score, best_score = normalize(y_score)
print("c2")

model, i, modelhistory = best_feedforward_model(xy, y_score, True)
print("d")

print("We take:")
print(i)

print("Best score for normalization scale:")
#print(best_score)

WineData = BaseData('data/winequality-red.csv',
                    ';',
                    11,
                    10,
                    config["nr_base_columns"],
                    rifs=True)
WineData.load()

_ = evaluation_wrapper(config["task"], model, 'data/winequality-red.csv', True,
                       WineData)
Esempio n. 2
0
def evaluation_wrapper(task, model, _path, _print, eval_dataset):

    if task == Task.multivariate_time_series:
        BirthDeaths3 = BaseData(_path,
                                ';',
                                3,
                                1,
                                base_size=config["nr_base_columns"])
        BirthDeaths3.load()

        evaluations = evaluation(BirthDeaths3.base_xy, add_eval_columns,
                                 eval_dataset.base_dataset[:, 11], model)

    if task == Task.regression:

        def get_wine_column():
            content = open('data/wineexport.csv').read().split("\n")
            values = list(map(lambda line: line.split(";")[1], content))
            return np.array(values).astype(float)

        wc = get_wine_column()
        import pdb
        pdb.set_trace()

        if config["budget_join"]:

            add_eval_columns = [
                eval_dataset.base_dataset[:, 0],
                eval_dataset.base_dataset[:, 1],
                eval_dataset.base_dataset[:, 2],
                eval_dataset.base_dataset[:, 3],
                eval_dataset.base_dataset[:, 11],
            ]

            for i in np.arange(config["nr_add_columns_budget"] -
                               len(add_eval_columns)):
                add_eval_columns.append(
                    np.random.normal(0, 1, eval_dataset.base_dataset[:,
                                                                     0].shape))
                #add_eval_columns.append(np.ones(eval_dataset.base_dataset[:, 0].shape))
        else:
            add_eval_columns = [eval_dataset.base_dataset[:, 0]]
        import pdb
        pdb.set_trace()

        evaluations = evaluation(
            eval_dataset.base_xy,  #5
            add_eval_columns,  #1200
            eval_dataset.base_dataset[:, 11],  #
            model  #model
        )

        import pdb
        pdb.set_trace()
        import pdb
        pdb.set_trace()
        import pdb
        pdb.set_trace()

    #plot_performance([evaluations[1]], [evaluations[3]])

    regr = linear_model.LinearRegression()
    regr.fit(eval_dataset.base_x, eval_dataset.base_y)
    regression_score_base_data = regr.score(eval_dataset.base_x,
                                            eval_dataset.base_y)
    indices = np.where(evaluations[0] > config["nfs_output_threshold"])

    regr = linear_model.LinearRegression()

    add_columns = np.array(list(map(lambda c: c[0],
                                    eval_dataset.add_columns)))[indices[0], :]

    augmented_x = np.transpose(
        np.concatenate((np.transpose(eval_dataset.base_x,
                                     (1, 0)), add_columns),
                       axis=0), (1, 0))
    regr.fit(augmented_x, eval_dataset.base_y)
    regression_score_augmented = regr.score(augmented_x, eval_dataset.base_y)

    output = ""
    output += "Path: " + str(_path)
    output += "\nBase DataSet size: " + str(eval_dataset.base_x.shape)
    output += "\nScore for Base DataSet: " + str(regression_score_base_data)

    output += "\nColumns presented to NFS: " + str(
        config["nr_add_columns_budget"])
    output += "\nColumns chosen by NFS by threshold: " + str(len(indices[0]))
    output += "\nAugmented DataSet size: " + str(augmented_x.shape)

    output += "\nScore for Augmented DataSet: " + str(
        regression_score_augmented)

    output += "\nNFS Time: " + str(evaluations[1])
    output += "\nPearson Time: " + str(evaluations[3])

    output += "\nConfig: " + str(config)

    f = open("output/" + str(time.time()).split(".")[0] + ".txt", "a")
    f.write(output)
    f.close()

    return _path, evaluations, regression_score_base_data, regression_score_augmented