def test_mean_squared_error(self): try: from sklearn.metrics import mean_squared_error as skmse except: unittest.TestCase.skipTest(self, "sklearn is not found in the libraries") skmse_score1 = skmse(self.local_reg1.target, self.local_reg1.p_target) dlpymse_score1 = mean_squared_error('target', 'p_target', castable=self.reg_table1) self.assertAlmostEqual(skmse_score1, dlpymse_score1) skmse_score2 = skmse(self.local_reg1.target, self.local_reg2.p_target) dlpymse_score2 = mean_squared_error(self.reg_table1.target,self.reg_table2.p_target, id_vars='id1') self.assertAlmostEqual(skmse_score2, dlpymse_score2)
def test_mean_squared_error(self): try: from sklearn.metrics import mean_squared_error as skmse except: unittest.TestCase.skipTest(self, "sklearn is not found in the libraries") local_reg1 = self.reg_table1.to_frame() skmse_score1 = skmse(local_reg1.target, local_reg1.p_target) dlpymse_score1 = mean_squared_error(self.reg_table1, 'target', 'p_target') self.assertAlmostEqual(skmse_score1, dlpymse_score1)
K_VALS = [3, 5, 7, 9, 11, 13, 15] starttime = time.time() # Repeat each trial 10 times. for i in range (0, 10): x_train, x_test, y_train, y_test = train_test_split(X, y,\ test_size=0.2) """ Try non-optimized methods. """ # Vanilla KNN. for k in K_VALS: reg = KNNRegressor(x_train, y_train, k) y_pred = reg.predict(x_test) mse_iter = skmse(y_test, y_pred) print("xx,knn,", k,",", mse_iter) # Distance-weighted KNN. for k in K_VALS: reg = DwKNNRegressor(x_train, y_train, k) y_pred = reg.predict(x_test) mse_iter = skmse(y_test, y_pred) print("xx,dknn,", k,",", mse_iter) """ PCA with KNN. """ pca = PCA(n_components = 6) pca.fit(x_train.copy()) x_train_pca = pca.transform(x_train.copy())
weights[X_ids] += reweights * A # Results with optimized subsampling best_idx = np.random.choice(len(x_train_p),\ size = int(p * n_samples),\ replace = False,\ p = weights/weights.sum()) X_train_p = x_train_p[best_idx] y_train_p = y_train[best_idx] clf = neighbors.KNeighborsRegressor(k) clf.fit(X_train_p[:, :-1], y_train_p) y_pred = clf.predict(x_test) print("p-sampling,", k, ",", p, ",", skmse(y_pred, y_test)) # Uniform subsampling. best_idx = np.random.choice(len(x_train_p),\ size = int(p * n_samples),\ replace = False) X_train_p = x_train_p[best_idx] y_train_p = y_train[best_idx] clf = neighbors.KNeighborsRegressor(k) clf.fit(X_train_p[:, :-1], y_train_p) y_pred = clf.predict(x_test) print("u-sampling,", ",", k, ",", p, ",", skmse(y_pred, y_test))
def ga_run(x_train, y_train, x_test, y_test, x_verif, y_verif, k): # Run GA to find best weights. N_init_pop = 50 N_crossover = 50 N_selection = 20 improv_thresh = 1e-3 _, nFeats = np.shape(x_train) weight_ga = GeneticAlgorithm(nFeats, N_init_pop, mu=0.1) weight_pop = weight_ga.get_population() metric_array = np.empty(N_init_pop) # Create the initial population. for i in range(len(weight_pop)): # Scale input data scaled_x_train = np.multiply(x_train, weight_pop[i]) # Scale verificaion data scaled_x_verif = np.multiply(x_verif, weight_pop[i]) # Regressor. reg = KNNRegressor(scaled_x_train, y_train, k) neighbors = reg.find_all_neighbors(scaled_x_verif) nbh_std = reg.find_neighborhood_std(neighbors) metric_array[i] = nbh_std # Update fitness in GA object. weight_ga.set_fitness(metric_array) weight_ga.selection(N_selection) new_best_metric = 2.5 # while (best_metric - new_best_metric) > improv_thresh: count = 0 while (count < 20): count += 1 best_metric = new_best_metric # Crossover. weight_ga.crossover(N_crossover) # Get new population. weight_pop = weight_ga.get_population() metric_array = np.empty(N_crossover) # Evaluate and set fitness. for i in range(len(weight_pop)): # Scale input data scaled_x_train = np.multiply(x_train, weight_pop[i]) # Scale verificaion data scaled_x_verif = np.multiply(x_verif, weight_pop[i]) # Regressor. reg = KNNRegressor(scaled_x_train, y_train, k) neighbors = reg.find_all_neighbors(scaled_x_verif) nbh_std = reg.find_neighborhood_std(neighbors) metric_array[i] = nbh_std # Update fitness in GA object weight_ga.set_fitness(metric_array) # get_best_sol best_weights, new_best_metric = weight_ga.best_sol() #print("Metric of this iteration are: ", new_best_metric) weight_ga.selection(N_selection) # print("Best weights = ", best_weights, "\tBest metric = ", new_best_metric) # Test with scaling after GA # Concatenate training and verification sets. x_train = np.concatenate((x_train, x_verif), axis=0) y_train = np.concatenate([y_train, y_verif]) # Print the results of KNN. reg = KNNRegressor(np.multiply(x_train, best_weights), y_train, k) y_pred = reg.predict(np.multiply(x_test, best_weights)) mse_iter = skmse(y_test, y_pred) print("ga,knn,", k, ",", mse_iter) # Print the results of KNN. reg = DwKNNRegressor(np.multiply(x_train, best_weights), y_train, k) y_pred = reg.predict(np.multiply(x_test, best_weights)) mse_iter = skmse(y_test, y_pred) print("ga,dknn,", k, ",", mse_iter)
def lbest_pso_run(x_train, y_train, x_test, y_test, x_verif, y_verif, k): #Run PSO to find best weights N_init_pop = 50 _, nFeats = np.shape(x_train) weight_pso = LBestPSO(nFeats, N_init_pop) pos = weight_pso.get_positions() pbest = weight_pso.get_pbest() pbest_metric_array = np.empty(N_init_pop) pos_metric_array = np.empty(N_init_pop) #Set pbest metrics for i in range(len(pbest)): #Scale input data scaled_x_train = np.multiply(x_train, pbest[i]) #Scale verificaion data scaled_x_verif = np.multiply(x_verif, pbest[i]) #Method 1 reg = KNNRegressor(scaled_x_train, y_train, k) neighbors = reg.find_all_neighbors(scaled_x_verif) nbh_std = reg.find_neighborhood_std(neighbors) pbest_metric_array[i] = nbh_std weight_pso.set_pbest_fitness(pbest_metric_array) #Set pos metrics for i in range(len(pbest)): #Scale input data scaled_x_train = np.multiply(x_train, pos[i]) #Scale verificaion data scaled_x_verif = np.multiply(x_verif, pos[i]) #Method 1 reg = KNNRegressor(scaled_x_train, y_train, k) neighbors = reg.find_all_neighbors(scaled_x_verif) nbh_std = reg.find_neighborhood_std(neighbors) pos_metric_array[i] = nbh_std weight_pso.set_p_fitness(pos_metric_array) #Set initial gbest. weight_pso.set_init_best(pos_metric_array) count = 0 while (count < 50): count += 1 weight_pso.optimize() #get_population weight_pop = weight_pso.get_positions() metric_array = np.empty(N_init_pop) #evaluate and set fitness for i in range(len(weight_pop)): #Scale input data scaled_x_train = np.multiply(x_train, weight_pop[i]) #Scale verificaion data scaled_x_verif = np.multiply(x_verif, weight_pop[i]) #Method 1 reg = KNNRegressor(scaled_x_train, y_train, k) neighbors = reg.find_all_neighbors(scaled_x_verif) nbh_std = reg.find_neighborhood_std(neighbors) metric_array[i] = nbh_std weight_pso.set_p_fitness(metric_array) weight_pso.set_best(metric_array) #get_best_sol best_metric = weight_pso.get_gbest_fit() best_weights = weight_pso.get_gbest() # Concatenate training and verification sets. x_train = np.concatenate((x_train, x_verif), axis=0) y_train = np.concatenate([y_train, y_verif]) # Print the results of KNN. reg = KNNRegressor(np.multiply(x_train, best_weights), y_train, k) y_pred = reg.predict(np.multiply(x_test, best_weights)) mse_iter = skmse(y_test, y_pred) print("lbest-pso,knn,", k, ",", mse_iter) # Print the results of KNN. reg = DwKNNRegressor(np.multiply(x_train, best_weights), y_train, k) y_pred = reg.predict(np.multiply(x_test, best_weights)) mse_iter = skmse(y_test, y_pred) print("lbest-pso,dknn,", k, ",", mse_iter)