Esempio n. 1
0
    # *************** Load Data ************** #
    mm = ModelManager()
    file_list = [BASE_PATH + "foerdern.txt", BASE_PATH + "foerdern_ind.txt",
                 BASE_PATH + "testen.txt", BASE_PATH + "beladen.txt",
                 BASE_PATH + "verpacken.txt"]
    mm.load_data(file_list)
    k_sem_reduced = mm.load_kernel_laplacian(BASE_PATH + "kernel.csv")
    k_full = mm.load_kernel_laplacian(BASE_PATH + "full_kernel.csv")
    k_reg_reduced = mm.load_kernel_laplacian(BASE_PATH + "p_value_kernel.csv")

    dependency_graph_full = mm.load_kernel_laplacian(BASE_PATH + "dependency_full.csv")
    dependency_graph_sem_reduced = mm.load_kernel_laplacian(BASE_PATH + "dependency.csv")

    index_sparse = np.ones(num_examples_sparse, dtype=bool)
    index_sparse = np.concatenate((index_sparse, np.zeros(mm.num_examples() - num_examples_sparse - 1, dtype=bool)))
    np.random.shuffle(index_sparse)

    index_big = np.ones(num_examples_big, dtype=bool)
    index_big = np.concatenate((index_big, np.zeros(mm.num_examples() - num_examples_big - 1, dtype=bool)))
    np.random.shuffle(index_big)

    # ****************** Semantic FS and Standard Regression FS ****************** #
    features = list(k_sem_reduced.columns.values)
    k_sem_reduced.columns = [f.replace("http://www.i40.com/ontology#", "") for f in features]
    print("Getting features: ", k_sem_reduced.columns.values)
    X_sem = mm.get_all_features_except_response(response, index_big, k_sem_reduced)
    num_features_sem = X_sem.shape[1]
    print("Semantic reduced features: ", k_sem_reduced.columns.values)

    X_all = mm.get_all_features_except_response(response, index_big)
Esempio n. 2
0
__author__ = 'martin'

from learning.grakelasso import GraKeLasso, ModelManager
import numpy as np

lambd = 0.1
alpha = 1
num_examples = 1000
response = "TestingProduct"

# *************** Load Data ************** #
mm = ModelManager()
mm.load_data(["../data/test.txt"])
kernel_lap = mm.load_kernel_laplacian("../data/laplacian.csv")
data = mm.get_data()

index_sparse = np.ones(num_examples, dtype=bool)
index_sparse = np.concatenate((index_sparse, np.zeros(mm.num_examples() - num_examples - 1, dtype=bool)))
np.random.shuffle(index_sparse)

X_sparse = mm.get_all_features_except_response(response, index_sparse)
y_sparse = data.ix[index_sparse, response]

# Evaluate GraKeLasso
klasso = GraKeLasso(kernel_lap.as_matrix(), alpha)
rmse, avg_theta = klasso.cross_val(X_sparse, y_sparse, 10, 10000, lambd)
print("MSE and Coefficient Reduction ", rmse, avg_theta)