Ejemplo n.º 1
0
from util.stats import cdf_fit
from math import log

# algorithms = {}
algorithms = {"DelaunayP1", "NeuralNetwork"}

BOX_PLOTS = False
MAX_ERROR_LINE = True

if MAX_ERROR_LINE:
    test_func_name = "oscillatory"
    test_size = 1000
    d = None
    for dimension in [2, 20]:
        file_name = f"final-{dimension}D-{test_size}-{test_func_name}-analytic.pkl"
        data = Data.load(file_name)
        data.add_column([dimension] * len(data),
                        "Dimension",
                        index=data.names.index("Train"))
        # Generate interesting extra columns.
        data["Abs Errors"] = ([float(abs(v)) for v in l]
                              for l in data["Errors"])
        data["Mean Abs Error"] = (sum(l) / len(l) for l in data["Abs Errors"])
        data["Min Abs Error"] = (min(l) for l in data["Abs Errors"])
        data["Max Abs Error"] = (max(l) for l in data["Abs Errors"])
        if (d is None): d = data
        else: d += data
    if (len(algorithms) > 0): d = d[d["Algorithm"] == algorithms]
    print(d)
    d.save("all-data.pkl")
    print()
Ejemplo n.º 2
0
from util.plot import Plot, multiplot
from util.stats import cdf_fit_func
import numpy as np
from util.data import Data

# d = Data.load("knn_results_[yelp-mnist].pkl")
d = Data.load("prediction_results.pkl")
names = d.names.copy()
names = [names[0], names[6], names[1]] + names[2:6] + names[7:]
d.reorder([names[0], names[6], names[1]])
d.sort()
d["Errors"] = ([float(v) if (v <= 5) else 5.0 for v in e] for e in d["Errors"])
d["Mean Error"] = (sum(e) / len(e) for e in d["Errors"])
d["Mean Squared Error"] = (sum(v**2 for v in e) / len(e) for e in d["Errors"])
d["Error Variance"] = (float(np.var(e)) for e in d["Errors"])

d._max_display = 1000
print(d)
all_data = d

# Get the unique dimensions and algorithms.
dims = sorted(set(all_data["Dimension"]))
algs = sorted(set(all_data["Algorithm"]))
data_sets = sorted(set(all_data["Data"]))

for ds in data_sets:
    for alg in algs:
        d = all_data[all_data["Data"] == ds]
        d = d[d["Algorithm"] == alg]
        min_index = int(np.argmin(d["Mean Error"]))
        m = d[min_index, "Method"]
Ejemplo n.º 3
0
from util.approximate import Voronoi
from util.data import Data
from util.system import save, load

try:
    lengths = load()
except:
    d = Data.load("raw_data.csv")
    lengths = []
    for i in range(1, 11):
        print("i:", i)
        train = Data.load(f"{i:02d}-10_train.csv", sample=None)
        test = Data.load(f"{i:02d}-10_test.csv", sample=None)
        # Get the column ranges for inputs and outputs.
        in_idxs = list(range(len(
            train.names)))[:train.names.index("Recidivism Likelihood")]
        out_idxs = list(range(len(
            train.names)))[train.names.index("Recidivism Likelihood"):]
        # in_cols = train.names[:train.names.index("Recidivism Likelihood")]
        # out_cols = train.names[train.names.index("Recidivism Likelihood"):]
        # Get the matrices.
        train_mat = train.to_matrix()
        test_mat = test.to_matrix()
        train_x = train_mat[:, in_idxs]
        test_x = test_mat[:, out_idxs]
        m = Voronoi()
        m.fit(train_x)
        lengths += [len(ids) for ids, wts in m(test_x)]
        save(lengths)

print("len(lengths): ", len(lengths))