def test_lwlr(self): # python -m unittest tests_regression.Tests_Regression.test_lwlr import locally_weighted_linear_regression as lwlr1 from discomll.regression import locally_weighted_linear_regression as lwlr2 x_train, y_train, x_test, y_test = datasets.regression_data() train_data, test_data = datasets.regression_data_discomll() lwlr1 = lwlr1.Locally_Weighted_Linear_Regression() taus = [1, 10, 25] sorted_indices = np.argsort([str(el) for el in x_test[:, 1].tolist()]) for tau in taus: thetas1, estimation1 = lwlr1.fit(x_train, y_train, x_test, tau=tau) thetas1, estimation1 = np.array(thetas1)[sorted_indices], np.array( estimation1)[sorted_indices] results = lwlr2.fit_predict(train_data, test_data, tau=tau) thetas2, estimation2 = [], [] for x_id, (est, thetas) in result_iterator(results): estimation2.append(est) thetas2.append(thetas) self.assertTrue(np.allclose(thetas1, thetas2, atol=1e-8)) self.assertTrue(np.allclose(estimation1, estimation2, atol=1e-3))
def test_lwlr(self): # python -m unittest tests_regression.Tests_Regression.test_lwlr import locally_weighted_linear_regression as lwlr1 from discomll.regression import locally_weighted_linear_regression as lwlr2 x_train, y_train, x_test, y_test = datasets.regression_data() train_data, test_data = datasets.regression_data_discomll() lwlr1 = lwlr1.Locally_Weighted_Linear_Regression() taus = [1, 10, 25] sorted_indices = np.argsort([str(el) for el in x_test[:, 1].tolist()]) for tau in taus: thetas1, estimation1 = lwlr1.fit(x_train, y_train, x_test, tau=tau) thetas1, estimation1 = np.array(thetas1)[sorted_indices], np.array(estimation1)[sorted_indices] results = lwlr2.fit_predict(train_data, test_data, tau=tau) thetas2, estimation2 = [], [] for x_id, (est, thetas) in result_iterator(results): estimation2.append(est) thetas2.append(thetas) self.assertTrue(np.allclose(thetas1, thetas2, atol=1e-8)) self.assertTrue(np.allclose(estimation1, estimation2, atol=1e-3))
def lwlr_fit_predict(input_dict): from discomll.regression import locally_weighted_linear_regression predictions_url = locally_weighted_linear_regression.fit_predict( fitting_data=input_dict["fitting_dataset"], training_data=input_dict["training_dataset"], tau=input_dict["tau"], save_results=True) return {"string": predictions_url}
def lwlr_fit_predict(input_dict): from discomll.regression import locally_weighted_linear_regression predictions_url = locally_weighted_linear_regression.fit_predict( fitting_data=input_dict["fitting_dataset"], training_data=input_dict["training_dataset"], tau=input_dict["tau"], save_results=True) return {"string": predictions_url}
from disco.core import result_iterator from discomll import dataset from discomll.regression import locally_weighted_linear_regression training_data = dataset.Data( data_tag=["test:regression_data1", "test:regression_data2"], data_type="chunk", id_index=0, X_indices=[0], y_index=1) fitting_data = dataset.Data( data_tag=["test:regression_data_test1", "test:regression_data_test2"], data_type="chunk", id_index=0, X_indices=[0], y_index=1) # fit fitting data to training data results = locally_weighted_linear_regression.fit_predict(training_data, fitting_data, tau=10) # output results for k, v in result_iterator(results): print k, v
from discomll import dataset from discomll.regression import locally_weighted_linear_regression train = dataset.Data( data_tag=["http://ropot.ijs.si/data/fraction/train/xaaaaa.gz", "http://ropot.ijs.si/data/fraction/train/xaaabj.gz"], data_type="gzip", generate_urls=True, X_indices=range(1, 14), id_index=0, y_index=14, delimiter=",") test = dataset.Data(data_tag=[["http://ropot.ijs.si/data/fraction/test/xaaaaa.gz"]], data_type="gzip", X_indices=range(1, 14), id_index=0, y_index=14, delimiter=",") predictions = locally_weighted_linear_regression.fit_predict(train, test, tau=1, samples_per_job=0, save_results=True) print predictions
from disco.core import result_iterator from discomll import dataset from discomll.regression import locally_weighted_linear_regression training_data = dataset.Data(data_tag=["test:regression_data1", "test:regression_data2"], data_type="chunk", id_index=0, X_indices=[0], y_index=1) fitting_data = dataset.Data(data_tag=["test:regression_data_test1", "test:regression_data_test2"], data_type="chunk", id_index=0, X_indices=[0], y_index=1) # fit fitting data to training data results = locally_weighted_linear_regression.fit_predict(training_data, fitting_data, tau=10) # output results for k, v in result_iterator(results): print k, v
from discomll import dataset from discomll.regression import locally_weighted_linear_regression train = dataset.Data(data_tag=[ "http://ropot.ijs.si/data/fraction/train/xaaaaa.gz", "http://ropot.ijs.si/data/fraction/train/xaaabj.gz" ], data_type="gzip", generate_urls=True, X_indices=range(1, 14), id_index=0, y_index=14, delimiter=",") test = dataset.Data( data_tag=[["http://ropot.ijs.si/data/fraction/test/xaaaaa.gz"]], data_type="gzip", X_indices=range(1, 14), id_index=0, y_index=14, delimiter=",") predictions = locally_weighted_linear_regression.fit_predict(train, test, tau=1, samples_per_job=0, save_results=True) print predictions