Ejemplo n.º 1
0
    def test_mean_squared_error(self):

        try:
            from sklearn.metrics import mean_squared_error as skmse
        except:
            unittest.TestCase.skipTest(self, "sklearn is not found in the libraries")
        
        skmse_score1 = skmse(self.local_reg1.target, self.local_reg1.p_target)    
        dlpymse_score1 = mean_squared_error('target', 'p_target', castable=self.reg_table1)
        
        self.assertAlmostEqual(skmse_score1, dlpymse_score1)
        
        skmse_score2 = skmse(self.local_reg1.target, self.local_reg2.p_target)    
        dlpymse_score2 = mean_squared_error(self.reg_table1.target,self.reg_table2.p_target,
                                            id_vars='id1')
        
        self.assertAlmostEqual(skmse_score2, dlpymse_score2)
Ejemplo n.º 2
0
    def test_mean_squared_error(self):

        try:
            from sklearn.metrics import mean_squared_error as skmse
        except:
            unittest.TestCase.skipTest(self, "sklearn is not found in the libraries")
        
        local_reg1 = self.reg_table1.to_frame()
        skmse_score1 = skmse(local_reg1.target, local_reg1.p_target)    
        dlpymse_score1 = mean_squared_error(self.reg_table1, 'target', 'p_target')
        
        self.assertAlmostEqual(skmse_score1, dlpymse_score1)
Ejemplo n.º 3
0
K_VALS = [3, 5, 7, 9, 11, 13, 15]

starttime = time.time()
# Repeat each trial 10 times.
for i in range (0, 10):
    x_train, x_test, y_train, y_test = train_test_split(X, y,\
                                                        test_size=0.2)

    """
    Try non-optimized methods.
    """
    # Vanilla KNN.
    for k in K_VALS:
        reg = KNNRegressor(x_train, y_train, k)
        y_pred = reg.predict(x_test)
        mse_iter = skmse(y_test, y_pred)
        print("xx,knn,", k,",", mse_iter)
    
    # Distance-weighted KNN.
    for k in K_VALS:
        reg = DwKNNRegressor(x_train, y_train, k)
        y_pred = reg.predict(x_test) 
        mse_iter = skmse(y_test, y_pred)
        print("xx,dknn,", k,",", mse_iter)

    """
    PCA with KNN.
    """
    pca = PCA(n_components = 6)
    pca.fit(x_train.copy())
    x_train_pca = pca.transform(x_train.copy())
                weights[X_ids] += reweights * A

            # Results with optimized subsampling
            best_idx = np.random.choice(len(x_train_p),\
                                        size = int(p * n_samples),\
                                        replace = False,\
                                        p = weights/weights.sum())

            X_train_p = x_train_p[best_idx]
            y_train_p = y_train[best_idx]

            clf = neighbors.KNeighborsRegressor(k)
            clf.fit(X_train_p[:, :-1], y_train_p)
            y_pred = clf.predict(x_test)
            print("p-sampling,", k, ",", p, ",", skmse(y_pred, y_test))

            # Uniform subsampling.

            best_idx = np.random.choice(len(x_train_p),\
                                        size = int(p * n_samples),\
                                        replace = False)

            X_train_p = x_train_p[best_idx]
            y_train_p = y_train[best_idx]

            clf = neighbors.KNeighborsRegressor(k)
            clf.fit(X_train_p[:, :-1], y_train_p)
            y_pred = clf.predict(x_test)
            print("u-sampling,", ",", k, ",", p, ",", skmse(y_pred, y_test))
Ejemplo n.º 5
0
def ga_run(x_train, y_train, x_test, y_test, x_verif, y_verif, k):
    # Run GA to find best weights.
    N_init_pop = 50
    N_crossover = 50
    N_selection = 20
    improv_thresh = 1e-3

    _, nFeats = np.shape(x_train)
    weight_ga = GeneticAlgorithm(nFeats, N_init_pop, mu=0.1)
    weight_pop = weight_ga.get_population()
    metric_array = np.empty(N_init_pop)

    # Create the initial population.
    for i in range(len(weight_pop)):
        # Scale input data
        scaled_x_train = np.multiply(x_train, weight_pop[i])
        # Scale verificaion data
        scaled_x_verif = np.multiply(x_verif, weight_pop[i])

        # Regressor.
        reg = KNNRegressor(scaled_x_train, y_train, k)
        neighbors = reg.find_all_neighbors(scaled_x_verif)
        nbh_std = reg.find_neighborhood_std(neighbors)
        metric_array[i] = nbh_std

    # Update fitness in GA object.
    weight_ga.set_fitness(metric_array)
    weight_ga.selection(N_selection)
    new_best_metric = 2.5

    # while (best_metric - new_best_metric) > improv_thresh:
    count = 0
    while (count < 20):
        count += 1
        best_metric = new_best_metric

        # Crossover.
        weight_ga.crossover(N_crossover)

        # Get new population.
        weight_pop = weight_ga.get_population()
        metric_array = np.empty(N_crossover)

        # Evaluate and set fitness.
        for i in range(len(weight_pop)):
            # Scale input data
            scaled_x_train = np.multiply(x_train, weight_pop[i])
            # Scale verificaion data
            scaled_x_verif = np.multiply(x_verif, weight_pop[i])

            # Regressor.
            reg = KNNRegressor(scaled_x_train, y_train, k)
            neighbors = reg.find_all_neighbors(scaled_x_verif)
            nbh_std = reg.find_neighborhood_std(neighbors)
            metric_array[i] = nbh_std

        # Update fitness in GA object
        weight_ga.set_fitness(metric_array)
        # get_best_sol
        best_weights, new_best_metric = weight_ga.best_sol()
        #print("Metric of this iteration are: ", new_best_metric)
        weight_ga.selection(N_selection)

    # print("Best weights = ", best_weights, "\tBest metric = ", new_best_metric)

    # Test with scaling after GA

    # Concatenate training and verification sets.
    x_train = np.concatenate((x_train, x_verif), axis=0)
    y_train = np.concatenate([y_train, y_verif])

    # Print the results of KNN.
    reg = KNNRegressor(np.multiply(x_train, best_weights), y_train, k)
    y_pred = reg.predict(np.multiply(x_test, best_weights))
    mse_iter = skmse(y_test, y_pred)
    print("ga,knn,", k, ",", mse_iter)

    # Print the results of KNN.
    reg = DwKNNRegressor(np.multiply(x_train, best_weights), y_train, k)
    y_pred = reg.predict(np.multiply(x_test, best_weights))
    mse_iter = skmse(y_test, y_pred)
    print("ga,dknn,", k, ",", mse_iter)
Ejemplo n.º 6
0
def lbest_pso_run(x_train, y_train, x_test, y_test, x_verif, y_verif, k):
    #Run PSO to find best weights
    N_init_pop = 50

    _, nFeats = np.shape(x_train)
    weight_pso = LBestPSO(nFeats, N_init_pop)
    pos = weight_pso.get_positions()
    pbest = weight_pso.get_pbest()
    pbest_metric_array = np.empty(N_init_pop)
    pos_metric_array = np.empty(N_init_pop)

    #Set pbest metrics
    for i in range(len(pbest)):
        #Scale input data
        scaled_x_train = np.multiply(x_train, pbest[i])
        #Scale verificaion data
        scaled_x_verif = np.multiply(x_verif, pbest[i])

        #Method 1
        reg = KNNRegressor(scaled_x_train, y_train, k)
        neighbors = reg.find_all_neighbors(scaled_x_verif)
        nbh_std = reg.find_neighborhood_std(neighbors)
        pbest_metric_array[i] = nbh_std

    weight_pso.set_pbest_fitness(pbest_metric_array)

    #Set pos metrics
    for i in range(len(pbest)):
        #Scale input data
        scaled_x_train = np.multiply(x_train, pos[i])
        #Scale verificaion data
        scaled_x_verif = np.multiply(x_verif, pos[i])

        #Method 1
        reg = KNNRegressor(scaled_x_train, y_train, k)
        neighbors = reg.find_all_neighbors(scaled_x_verif)
        nbh_std = reg.find_neighborhood_std(neighbors)
        pos_metric_array[i] = nbh_std

    weight_pso.set_p_fitness(pos_metric_array)

    #Set initial gbest.
    weight_pso.set_init_best(pos_metric_array)

    count = 0
    while (count < 50):
        count += 1
        weight_pso.optimize()

        #get_population
        weight_pop = weight_pso.get_positions()
        metric_array = np.empty(N_init_pop)

        #evaluate and set fitness
        for i in range(len(weight_pop)):
            #Scale input data
            scaled_x_train = np.multiply(x_train, weight_pop[i])
            #Scale verificaion data
            scaled_x_verif = np.multiply(x_verif, weight_pop[i])

            #Method 1
            reg = KNNRegressor(scaled_x_train, y_train, k)
            neighbors = reg.find_all_neighbors(scaled_x_verif)
            nbh_std = reg.find_neighborhood_std(neighbors)
            metric_array[i] = nbh_std

        weight_pso.set_p_fitness(metric_array)
        weight_pso.set_best(metric_array)

        #get_best_sol
        best_metric = weight_pso.get_gbest_fit()

    best_weights = weight_pso.get_gbest()

    # Concatenate training and verification sets.
    x_train = np.concatenate((x_train, x_verif), axis=0)
    y_train = np.concatenate([y_train, y_verif])

    # Print the results of KNN.
    reg = KNNRegressor(np.multiply(x_train, best_weights), y_train, k)
    y_pred = reg.predict(np.multiply(x_test, best_weights))
    mse_iter = skmse(y_test, y_pred)
    print("lbest-pso,knn,", k, ",", mse_iter)

    # Print the results of KNN.
    reg = DwKNNRegressor(np.multiply(x_train, best_weights), y_train, k)
    y_pred = reg.predict(np.multiply(x_test, best_weights))
    mse_iter = skmse(y_test, y_pred)
    print("lbest-pso,dknn,", k, ",", mse_iter)