# *************** Load Data ************** # mm = ModelManager() file_list = [BASE_PATH + "foerdern.txt", BASE_PATH + "foerdern_ind.txt", BASE_PATH + "testen.txt", BASE_PATH + "beladen.txt", BASE_PATH + "verpacken.txt"] mm.load_data(file_list) k_sem_reduced = mm.load_kernel_laplacian(BASE_PATH + "kernel.csv") k_full = mm.load_kernel_laplacian(BASE_PATH + "full_kernel.csv") k_reg_reduced = mm.load_kernel_laplacian(BASE_PATH + "p_value_kernel.csv") dependency_graph_full = mm.load_kernel_laplacian(BASE_PATH + "dependency_full.csv") dependency_graph_sem_reduced = mm.load_kernel_laplacian(BASE_PATH + "dependency.csv") index_sparse = np.ones(num_examples_sparse, dtype=bool) index_sparse = np.concatenate((index_sparse, np.zeros(mm.num_examples() - num_examples_sparse - 1, dtype=bool))) np.random.shuffle(index_sparse) index_big = np.ones(num_examples_big, dtype=bool) index_big = np.concatenate((index_big, np.zeros(mm.num_examples() - num_examples_big - 1, dtype=bool))) np.random.shuffle(index_big) # ****************** Semantic FS and Standard Regression FS ****************** # features = list(k_sem_reduced.columns.values) k_sem_reduced.columns = [f.replace("http://www.i40.com/ontology#", "") for f in features] print("Getting features: ", k_sem_reduced.columns.values) X_sem = mm.get_all_features_except_response(response, index_big, k_sem_reduced) num_features_sem = X_sem.shape[1] print("Semantic reduced features: ", k_sem_reduced.columns.values) X_all = mm.get_all_features_except_response(response, index_big)
__author__ = 'martin' from learning.grakelasso import GraKeLasso, ModelManager import numpy as np lambd = 0.1 alpha = 1 num_examples = 1000 response = "TestingProduct" # *************** Load Data ************** # mm = ModelManager() mm.load_data(["../data/test.txt"]) kernel_lap = mm.load_kernel_laplacian("../data/laplacian.csv") data = mm.get_data() index_sparse = np.ones(num_examples, dtype=bool) index_sparse = np.concatenate((index_sparse, np.zeros(mm.num_examples() - num_examples - 1, dtype=bool))) np.random.shuffle(index_sparse) X_sparse = mm.get_all_features_except_response(response, index_sparse) y_sparse = data.ix[index_sparse, response] # Evaluate GraKeLasso klasso = GraKeLasso(kernel_lap.as_matrix(), alpha) rmse, avg_theta = klasso.cross_val(X_sparse, y_sparse, 10, 10000, lambd) print("MSE and Coefficient Reduction ", rmse, avg_theta)