Beispiel #1
0
    dependency_graph_full = mm.load_kernel_laplacian(BASE_PATH + "dependency_full.csv")
    dependency_graph_sem_reduced = mm.load_kernel_laplacian(BASE_PATH + "dependency.csv")

    index_sparse = np.ones(num_examples_sparse, dtype=bool)
    index_sparse = np.concatenate((index_sparse, np.zeros(mm.num_examples() - num_examples_sparse - 1, dtype=bool)))
    np.random.shuffle(index_sparse)

    index_big = np.ones(num_examples_big, dtype=bool)
    index_big = np.concatenate((index_big, np.zeros(mm.num_examples() - num_examples_big - 1, dtype=bool)))
    np.random.shuffle(index_big)

    # ****************** Semantic FS and Standard Regression FS ****************** #
    features = list(k_sem_reduced.columns.values)
    k_sem_reduced.columns = [f.replace("http://www.i40.com/ontology#", "") for f in features]
    print("Getting features: ", k_sem_reduced.columns.values)
    X_sem = mm.get_all_features_except_response(response, index_big, k_sem_reduced)
    num_features_sem = X_sem.shape[1]
    print("Semantic reduced features: ", k_sem_reduced.columns.values)

    X_all = mm.get_all_features_except_response(response, index_big)
    num_features_all = X_all.shape[1]
    y_all = mm.get_data().ix[index_big, response]
    mean_y_all = np.mean(y_all)
    F, p_vals = f_regression(X_all, y_all)
    index_reg_reduced = p_vals <= p_val
    X_reg = X_all.ix[:, index_reg_reduced]
    num_features_reg = X_reg.shape[1]

    print("P-value reduced features: ", k_full.columns.values[index_reg_reduced])

    for alpha in np.arange(0.1, 2.1, 0.1):
Beispiel #2
0
__author__ = 'martin'

from learning.grakelasso import GraKeLasso, ModelManager
import numpy as np

lambd = 0.1
alpha = 1
num_examples = 1000
response = "TestingProduct"

# *************** Load Data ************** #
mm = ModelManager()
mm.load_data(["../data/test.txt"])
kernel_lap = mm.load_kernel_laplacian("../data/laplacian.csv")
data = mm.get_data()

index_sparse = np.ones(num_examples, dtype=bool)
index_sparse = np.concatenate((index_sparse, np.zeros(mm.num_examples() - num_examples - 1, dtype=bool)))
np.random.shuffle(index_sparse)

X_sparse = mm.get_all_features_except_response(response, index_sparse)
y_sparse = data.ix[index_sparse, response]

# Evaluate GraKeLasso
klasso = GraKeLasso(kernel_lap.as_matrix(), alpha)
rmse, avg_theta = klasso.cross_val(X_sparse, y_sparse, 10, 10000, lambd)
print("MSE and Coefficient Reduction ", rmse, avg_theta)