def test_first_last_elements_X(self): Xl_first_row_first2c = 0.898881, 1.305756 Xl_last_row_first2c = 1.393745, 0.1967 Xl_first_row_lastc = -0.3353498396562418 Xl_last_row_lastc = -0.025671111438439326 Xu_first_row_first2c = 1.567927, 0.752978 Xu_last_row_first2c = -0.256243, 0.481008 Xu_first_row_lastc = 0.5988862660840178 Xu_last_row_lastc = 0.3787646988802795 Xl, yl, Xu = load_data(self.filename) np.testing.assert_array_almost_equal( x=Xl[0, :2], y=Xl_first_row_first2c, decimal=6, err_msg="Wrong values in Xl (first row, first 2 columns).") np.testing.assert_array_almost_equal( x=Xl[-1, :2], y=Xl_last_row_first2c, decimal=6, err_msg="Wrong values in Xl (last row, first 2 columns).") # TODO À compléter np.testing.assert_array_almost_equal( x=Xu[0, :2], y=Xu_first_row_first2c, decimal=6, err_msg="Wrong values in Xu (first row, first 2 columns).") np.testing.assert_array_almost_equal( x=Xu[-1, :2], y=Xu_last_row_first2c, decimal=6, err_msg="Wrong values in Xu (last row, first 2 columns).")
def test_first_last_elements_y(self): y_first5r = np.array([1985, 2005, 1998, 1973, 1957]) y_last5r = np.array([2006, 1960, 1976, 1988, 2003]) Xl, yl, Xu = load_data(self.filename) np.testing.assert_array_equal( x=yl[:5], y=y_first5r, err_msg="Wrong values in yl(first 5 elements).")
def learn_best_predictor_and_predict_test_data(filename): X_labeled, y_labeled, X_unlabled = load_data(filename) X_permute, y_permute = randomize_data(X_labeled, y_labeled) X_train = X_permute[:500,:] y_train = y_permute[:500] X_test2 = X_permute[500:,:] y_test2 = y_permute[500:] train_errors = [] valid2_errors = [] ln_Mp = learn_all_with_Mp(X_train, y_train) ln_Mp.fit(X_train, y_train) prediction_Mp_train = ln_Mp.predict(X_train) prediction_Mp_test2 = ln_Mp.predict(X_test2) #rajouter les MSE trouvés dans la matrice des MSE train et test train_errors.append(mean_squared_error(y_train, prediction_Mp_train)) valid2_errors.append(mean_squared_error(y_test2, prediction_Mp_test2)) print("la performance du reste des données étiquetées = ",valid2_errors) y_test = ln_Mp.predict(X_unlabled) np.save("io/test_prediction_results.npy", y_test)
def test_data_shape(self): # Valeurs de référence n_lab_ex_expected = 4578 data_dim_expected = 90 n_unlab_ex_expected = 2289 Xl, yl, Xu = load_data(self.filename) # Test des types self.assertIsInstance(Xl, np.ndarray) self.assertIsInstance(yl, np.ndarray) self.assertIsInstance(Xu, np.ndarray) # Test du nombre de dimensions self.assertEqual(Xl.ndim, 2) self.assertEqual(yl.ndim, 1) self.assertEqual(Xu.ndim, 2) # Test des formats np.testing.assert_array_equal(Xl.shape, (n_lab_ex_expected, data_dim_expected)) np.testing.assert_array_equal(yl.shape, n_lab_ex_expected) np.testing.assert_array_equal(Xu.shape, (n_unlab_ex_expected, data_dim_expected))
import matplotlib.pyplot as plt from algorithms.data_utils import load_data """ Build the histogram of the years of the songs from the training set and export the figure to the image file hist_train.png """ #import data X_labeled, y_labeled, X_unlabeled = load_data('io\YearPredictionMSD_100.npz') #visualization plt.figure("hist") plt.title("The years present in the train data") plt.xlabel("Years") plt.ylabel("number of music") plt.hist(y_labeled) plt.savefig('plots/hist_year.png')
import numpy as np from algorithms.data_utils import load_data """ Load training data and print dimensions as well as a few coefficients in the first and last places and at random locations. """ YearPredictionMSD_100 = load_data("io\YearPredictionMSD_100.npz") print("Keys of dict data are : {}".format(list(YearPredictionMSD_100))) for i in list(YearPredictionMSD_100): print("Features : {}".format(i)) print("Type : {}".format(i.dtype)) print("number of dimensions : {}".format(i.ndim)) print("Shape : {}".format(i.shape)) print("\n") print("For y_labeled :") print("first five values : {}".format(YearPredictionMSD_100[1][:5])) print("last five values : {}".format(YearPredictionMSD_100[1][-5:])) X_data = [0, 2] X_data_names = ['X_labeled', 'X_unlabeled'] for i in range(2) : print("For feature : {}".format(X_data_names[i])) print("2 first coefficients of the first line : {} ".format(YearPredictionMSD_100[X_data[i]][0][:2])) print("2 first coefficients of the last line : {} ".format(YearPredictionMSD_100[X_data[i]][-1][:2])) print("last coefficient of the first line : {} ".format(YearPredictionMSD_100[X_data[i]][0][-1]))