Beispiel #1
0
def getBestK(X_train, y_train, X_val, y_val, nns=[30], print_train=True, print_val=True):
    acc_train = np.zeros((1, len(nns)))
    acc_val = np.zeros((1, len(nns)))
    for j in range(0, len(nns)):
        print j
        sys.stdout.flush()
        knn = KNNClassifier(nns[j])
        knn.train(X_train, y_train)
        # acc_train[0, j] = np.mean(knn.predict(X_train) == y_train)
        print acc_train[0, j]
        sys.stdout.flush()
        y_pred = knn.predict(X_val)
        acc_val[0, j] = np.mean(y_pred == y_val)
        print acc_val[0, j]
        sys.stdout.flush()
        print "Confusion matrix:"
        print confusion_matrix(y_pred, y_val)

    if print_train:
        print (acc_train)
    if print_val:
        print (acc_val)

    best_val = np.max(acc_val)
    best_rate, best_reg = np.where(acc_val == np.amax(acc_val))
    return (best_rate[0], best_reg[0]), knn
Beispiel #2
0
def knn():
    X, y = make_blobs(centers=4, n_samples=500, n_features=2,
                      shuffle=True)
    model = KNNClassifier(K=4)
    model.fit(X, y)
    res = model.predict(X)
    print(np.mean(res == y))
Beispiel #3
0
def main():
    curr_dir = os.path.dirname(__file__)
    csv_file = os.path.join(curr_dir, 'data/play.csv')

    test = pd.Series({
        'Tempo': 'Chuva',
        'Temperatura': 'Quente',
        'Humidade': 'Normal',
        'Vento': 'Forte'
    })

    df = pd.read_csv(csv_file, index_col='Dia')
    X, y = df.loc[:, df.columns != 'Jogar'], df['Jogar']

    clf = KNNClassifier(k=1)
    clf.fit(X, y)

    print(f'RESULT k = {clf.k} ::',
          'Jogar' if clf.predict(test) else 'Não Jogar')
    clf.k = 3
    print(f'RESULT k = {clf.k} ::',
          'Jogar' if clf.predict(test) else 'Não Jogar')
    print()
    print('DISTANCES')
    print(clf._gen_distances(test))
Beispiel #4
0
def main():
    df = pd.read_csv('./diabetes.csv')
    normalized_data = normalize(df, 'Outcome')
    knn_classifier = KNNClassifier()
    for k in [5, 10]:
        results = {}
        for nn in [3, 5, 7]:
            knn_classifier.nn = nn
            knn_classifier.cross_validate(
                normalized_data,
                k,  # k folds
                1  # r
            )
            results[str(nn)] = [
                knn_classifier.accuracy, knn_classifier.f1_score
            ]
        plot_results(results)

    for nn in [3, 5, 7]:
        knn_classifier.nn = nn
        knn_classifier.cross_validate(
            normalized_data,
            10,  # k folds
            10  # r
        )
        print('\nGlobal accuracy: %.3f (%.3f)' %
              (knn_classifier.accuracy, knn_classifier.accuracy_std))
        print('Global f1 score accuracy: %.3f (%.3f)\n' %
              (knn_classifier.f1_score, knn_classifier.f1_score_std))
        results[str(nn)] = [knn_classifier.accuracy, knn_classifier.f1_score]
    plot_results(results)
Beispiel #5
0
    def test_knn(self):
        knn_model = KNNClassifier(all_monomials_with_maximum_degrees([1, 1, 1]), 1)


        knn_model.train(np.array([[1], [10], [2], [30]]), np.array([1, 0, 1, 0]))
        self.assertEqual(
            knn_model._trained,
            True,
            "Initially False. Change this property to True after self.train() is called",
        )

        self.assertIn(
            type(knn_model.predict(np.array([1]))),
            (np.float64, float),
            "Return type of predict() is np.float64 or float",
        )


        self.assertIn(
            type(
                knn_model.evaluate(
                    np.array([[1], [10], [2], [20]]), np.array([1, 0, 1, 0])
                )
            ),
            (float, np.float64, int, np.int64),
            "Return type of evaluate() is np.float64 or float",
        )
        self.assertIn(
            knn_model.evaluate(
                np.array([[1], [10], [2], [30]]), np.array([1, 0, 1, 0])
            ),
            (1.0,1),
            "evaluate() for same data returns 1.0",
        )
def test():
    train_data, train_labels = mnist.load_mnist(mode='train', path='data/')
    # test_data, test_labels = mnist.load_mnist(mode='test', path='data/')
    errors = np.array(
        knn.tune_hyperparams(train_data[:1000], train_labels[:1000]))
    X = errors[:, 0]
    Y = errors[:, 1]
    Z = errors[:, 2]

    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(X, Y, Z, c='r', marker='o')

    ax.set_xlabel('X Label')
    ax.set_ylabel('Y Label')
    ax.set_zlabel('Z Label')

    plt.show()
Beispiel #7
0
def run(k,
        mode='knn',
        distance='euclidean',
        keep_punc=False,
        keep_stopwords=False):
    from knn import KNNClassifier
    from ncc import NearestCentroidClassifier
    if mode == 'knn':
        clf = KNNClassifier(k,
                            distance=distance,
                            keep_stopwords=keep_stopwords,
                            keep_punc=keep_punc)
        clf.score()
    elif mode == 'ncc':
        clf = NearestCentroidClassifier(k,
                                        distance=distance,
                                        keep_stopwords=keep_stopwords,
                                        keep_punc=keep_punc)
        clf.score()
Beispiel #8
0
def training(q, DTrain, RTrain, **args):
    '''
    @input topic document q ∈ Q, training collection Dtrain, judgments Rtrain, and
    optional arguments on the classification process
    @behavior learns a classification model to predict the relevance of documents on the
    topic q using Dtrain and Rtrain, where the training process is subjected to
    proper preprocessing, classifier’s selection and hyperparameterization
    @output q-conditional classification model
    '''
    classifier_type = args.get('classifier_type')
    if classifier_type == 'logistic':
        hyper_parameters = args.get('hyper_paremeters')
        classifier = LogisticClassifier(hyper_parameters=hyper_parameters)
    elif classifier_type == 'XGBOOST':
        classifier = XGBOOSTClassifier()
    elif classifier_type == 'MLP':
        classifier = MLPerceptronClassifier()
    elif classifier_type == 'KNN':
        classifier = KNNClassifier()
    classifier.train(q, DTrain, RTrain)
    return classifier
Beispiel #9
0
print(_, nFeats)

# Values of parameter k to iterate over.
K_VALS = [3, 5, 7, 9, 11, 13, 15]

starttime = time.time()
# Repeat each trial 10 times.
for i in range(0, 10):
    x_train, x_test, y_train, y_test = train_test_split(X, y,\
                                                        test_size=0.2)
    """
    Try non-optimized methods.
    """
    # Vanilla KNN.
    for k in K_VALS:
        reg = KNNClassifier(x_train, y_train, k)
        y_pred = reg.predict(x_test)
        mse_iter = accuracy(y_test, y_pred)
        print("xx,knn,", k, ",", mse_iter)

    # Distance-weighted KNN.
    for k in K_VALS:
        reg = DwKNNClassifier(x_train, y_train, k)
        y_pred = reg.predict(x_test)
        mse_iter = accuracy(y_test, y_pred)
        print("xx,dknn,", k, ",", mse_iter)
    """
    PCA with KNN.
    """
    pca = PCA(n_components=4)
    pca.fit(x_train.copy())
# Perform dimensionality reduction #
X_train_reduced = svd.fit_transform(X_train_tfidf)

# Keep results #
prec, rec, f1, accu = ([] for i in range(4))

# Use 10-fold and find metrics #
for train, test in kf.split(X_train_reduced, X_train_le):
    X_train = X_train_reduced[train]
    y_train = X_train_le[train]

    X_test = X_train_reduced[test]
    y_test = X_train_le[test]

    clf_KNN = KNNClassifier(100)

    # Train model #
    clf_KNN.fit(X_train, y_train)

    # Predict categories #
    y_pred = clf_KNN.predict(X_test)

    # Save scores #
    prec.append(precision_score(y_test, y_pred, average='macro'))
    rec.append(recall_score(y_test, y_pred, average='macro'))
    f1.append(f1_score(y_test, y_pred, average='macro'))
    accu.append(accuracy_score(y_test, y_pred))

# Print results to csv #
Evaluation_metric_df = pd.read_csv('EvaluationMetric_10fold.csv', sep="\t")
Beispiel #11
0
    #if (nFeats > 15) or (_ > 4000):
    #    continue
    print("Number of samples: ",_, "Number of features: ", nFeats)
    #print("X :")
    #print(X)
    #print("y :")
    #print(y)
    print("Splitting training and test sets:")

    #Test without scaling
    print("Testing without scaling")

    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    x_train, x_verif, y_train, y_verif = train_test_split(x_train, y_train, test_size=0.33)

    clf = KNNClassifier(x_train, y_train, 5)
    y_pred = clf.predict(x_test)
    print("Accuracy = ", accuracy_score(y_test, y_pred))

    #Run GA to find best weights
    N_init_pop = 30

    _, nFeats = np.shape(x_train)
    weight_pso = GBestPSO(nFeats, N_init_pop)
    pos = weight_pso.get_positions()
    pbest = weight_pso.get_pbest()
    pbest_metric_array = np.empty(N_init_pop)
    pos_metric_array = np.empty(N_init_pop)

    #Set pbest metrics
    for i in range(len(pbest)):
Beispiel #12
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from knn import KNNClassifier
from module_selection import train_test_split

iris = datasets.load_iris()
#print(iris.keys())

X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y)

my_knn_clf = KNNClassifier(k=3)
my_knn_clf.fit(X_train, y_train)
y_predict = my_knn_clf.predict(X_test)

#accuracy
accuracy = sum(y_predict == y_test) / len(y_test)
print(accuracy)

# using sklean KNeighborsClassifier and model_selection
"""
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

iris = datasets.load_iris()
Beispiel #13
0
def lbest_pso_run_ent(x_train, y_train, x_test, y_test, x_verif, y_verif, k):
    #Run PSO to find best weights
    N_init_pop = 50

    _, nFeats = np.shape(x_train)
    weight_pso = LBestPSO(nFeats, N_init_pop)
    pos = weight_pso.get_positions()
    pbest = weight_pso.get_pbest()
    pbest_metric_array = np.empty(N_init_pop)
    pos_metric_array = np.empty(N_init_pop)

    #Set pbest metrics
    for i in range(len(pbest)):
        #Scale input data
        scaled_x_train = np.multiply(x_train, pbest[i])
        #Scale verificaion data
        scaled_x_verif = np.multiply(x_verif, pbest[i])

        #Method 1
        clf = KNNClassifier(scaled_x_train, y_train, k)
        neighbors = clf.find_all_neighbors(scaled_x_verif)
        nbh_ent = clf.find_neighborhood_entropy(neighbors)
        pbest_metric_array[i] = nbh_ent
    
    weight_pso.set_pbest_fitness(pbest_metric_array)

    #Set pos metrics
    for i in range(len(pbest)):
        #Scale input data
        scaled_x_train = np.multiply(x_train, pos[i])
        #Scale verificaion data
        scaled_x_verif = np.multiply(x_verif, pos[i])

        #Method 1
        clf = KNNClassifier(scaled_x_train, y_train, k)
        neighbors = clf.find_all_neighbors(scaled_x_verif)
        nbh_ent = clf.find_neighborhood_entropy(neighbors)
        pos_metric_array[i] = nbh_ent

    weight_pso.set_p_fitness(pos_metric_array)

    #Set initial gbest.
    weight_pso.set_init_best(pos_metric_array)

    count = 0
    while (count < 50):
        count += 1
        weight_pso.optimize()

        #get_population
        weight_pop = weight_pso.get_positions()
        metric_array = np.empty(N_init_pop)
    
        #evaluate and set fitness
        for i in range(len(weight_pop)):
            #Scale input data
            scaled_x_train = np.multiply(x_train, weight_pop[i])
            #Scale verificaion data
            scaled_x_verif = np.multiply(x_verif, weight_pop[i])
        
            #Method 1
            clf = KNNClassifier(scaled_x_train, y_train, k)
            neighbors = clf.find_all_neighbors(scaled_x_verif)
            nbh_ent = clf.find_neighborhood_entropy(neighbors)
            metric_array[i] = nbh_ent

        weight_pso.set_p_fitness(metric_array)
        weight_pso.set_best(metric_array)

        #get_best_sol
        best_metric = weight_pso.get_gbest_fit()

    best_weights = weight_pso.get_gbest()
   
    # Concatenate training and verification sets.
    x_train = np.concatenate((x_train, x_verif), axis = 0)
    y_train = np.concatenate([y_train, y_verif])

    # Print the results of KNN.
    clf = KNNClassifier(np.multiply(x_train, best_weights), y_train, k)
    y_pred = clf.predict(np.multiply(x_test, best_weights))
    mse_iter = accuracy(y_test, y_pred)
    print("lbest-pso-ent,knn,", k, ",", mse_iter)

    # Print the results of KNN.
    clf = DwKNNClassifier(np.multiply(x_train, best_weights), y_train, k)
    y_pred = clf.predict(np.multiply(x_test, best_weights))
    mse_iter = accuracy(y_test, y_pred)
    print("lbest-pso-ent,dknn,", k, ",", mse_iter)
Beispiel #14
0
    points.append([float(inp[0]), float(inp[1]), int(inp[2])])

random.shuffle(points)
n_points = len(points)
points = chunkify(points)

acc1, acc2 = [], []

for i in range(NUMBER_OF_FOLDS):
    train, test = [], []
    for j in range(NUMBER_OF_FOLDS):
        if (i != j):
            train.extend(points[j])
        else:
            test = points[j]
    knn = KNNClassifier()
    svm = SVM()
    train2 = copy.deepcopy(train)
    knn.fit(train)
    svm.fit_transform(train2)
    k_right, s_right = 0, 0
    for point in test:
        k_pred = knn.predict([point[0], point[1]])
        s_pred = svm.predict([point[0], point[1]])
        s_pred = (s_pred + 1) / 2
        if (k_pred == point[2]):
            k_right = k_right + 1
        if (s_pred == point[2]):
            s_right = s_right + 1
    acc1.append(float(k_right / len(test)))
    acc2.append(float(s_right / len(test)))
Beispiel #15
0
train_imgs = utils.read_folder(TRAIN_DIR, 0, ntrain, flatten=False)
print ("\nDone!")
sys.stdout.flush()
X = train_imgs
X = X.reshape((ntrain, -1))
#X = np.insert(X, 0, 1.0, axis = 1)

y = utils.read_labels('trainLabels.csv', 0, ntrain)



X_train, X_val, y_train, y_val = cross_validation.train_test_split(X, y, test_size = 0.1)
nns = [1]
#utils.getBestK(X_train, y_train, X_val, y_val, nns)

knn = KNNClassifier(nns[0])
knn.train(X_train, y_train)
print "X_val shape: ", X_val.shape
print "y_val shape: ", y_val.shape
pred = knn.predict(X_val)
print "Accuracy: ", np.mean(pred == y_val)
#uncomment this to visualize knn prediction - 10 examples from each class
"""
examples = np.zeros((10,10,32,32,3))
for i in range(10):
	examples[i] = ((X_val[pred==i])[0:10]).reshape(10,32,32,3)
num_classes = len(classes)
nexamples = 10
for y, cls in enumerate(classes):
	idxs = np.arange(nexamples)
	for i, idx in enumerate(idxs):
Beispiel #16
0
                    else:
                        matrix = evaluator.compute().cpu()
                        plotconfmat(dataset_name.split(","), matrix,
                                    rootname + ".png")
                        torch.save(matrix, rootname + ".pt")
                        # T.to_pil_image(matrix).save(f"matrix_{classifier_name}_dataset{i:02}.png")

                import json
                with open(rootname + ".json", "w+") as f:
                    json.dump(outputs, f)

        solver.cpu()


classifiers = {
    "knn5": KNNClassifier(k=5),
    "knn10": KNNClassifier(k=10),

    # "relpnet_1": M.RelationNetClassifier_Protonet1(simnet_channels=[128, 64, 32]),

    # "relation_1": M.RelationNetClassifier(in_channels=416, feature_channels=[10], simnet_channels=[32, 16, 4]),
    # "reg_relation_1": M.RelationNetClassifier(in_channels=416, feature_channels=[10], simnet_channels=[32, 16, 4]),

    # "protonet_1": M.ProtonetClassifier(in_channels=416, mid_channels=[], out_channels=32),
    # "protonet_2": M.ProtonetClassifier(in_channels=416, mid_channels=[64], out_channels=32),
    # "protonet_3": M.ProtonetClassifier(in_channels=416, mid_channels=[128, 64], out_channels=32),
    # "protonet_4": M.ProtonetClassifier(in_channels=416, mid_channels=[256, 128, 64], out_channels=32),
    # "protonet_bottleneck_end": M.ProtonetClassifier(in_channels=416, mid_channels=[256, 128, 64], out_channels=10),
    # "protonet_bottleneck_mid": M.ProtonetClassifier(in_channels=416, mid_channels=[128, 32, 128], out_channels=32),

    # "simnet_simple": M.SimnetClassifier(in_channels=416, channels=[10]),
Beispiel #17
0
def ga_run_std(x_train, y_train, x_test, y_test, x_verif, y_verif, k):
    # Run GA to find best weights.
    N_init_pop = 50
    N_crossover = 50
    N_selection = 20
    improv_thresh = 1e-3

    _, nFeats = np.shape(x_train)
    weight_ga = GeneticAlgorithm(nFeats, N_init_pop, mu=0.1)
    weight_pop = weight_ga.get_population()
    metric_array = np.empty(N_init_pop)

    # Create the initial population.
    for i in range(len(weight_pop)):
        # Scale input data
        scaled_x_train = np.multiply(x_train, weight_pop[i])
        # Scale verificaion data
        scaled_x_verif = np.multiply(x_verif, weight_pop[i])

        # Classifier.
        clf = KNNClassifier(scaled_x_train, y_train, k)
        neighbors = clf.find_all_neighbors(scaled_x_verif)
        nbh_ent = clf.find_neighborhood_std(neighbors)
        metric_array[i] = nbh_ent

    # Update fitness in GA object.
    weight_ga.set_fitness(metric_array)
    weight_ga.selection(N_selection)
    new_best_metric = 2.5

    # while (best_metric - new_best_metric) > improv_thresh:
    count = 0
    while (count < 20):
        count += 1
        best_metric = new_best_metric

        # Crossover.
        weight_ga.crossover(N_crossover)

        # Get new population.
        weight_pop = weight_ga.get_population()
        metric_array = np.empty(N_crossover)

        # Evaluate and set fitness.
        for i in range(len(weight_pop)):
            # Scale input data
            scaled_x_train = np.multiply(x_train, weight_pop[i])
            # Scale verificaion data
            scaled_x_verif = np.multiply(x_verif, weight_pop[i])

            # Classifier.
            clf = KNNClassifier(scaled_x_train, y_train, k)
            neighbors = clf.find_all_neighbors(scaled_x_verif)
            nbh_ent = clf.find_neighborhood_std(neighbors)
            metric_array[i] = nbh_ent

        # Update fitness in GA object
        weight_ga.set_fitness(metric_array)
        # get_best_sol
        best_weights, new_best_metric = weight_ga.best_sol()
        #print("Metric of this iteration are: ", new_best_metric)
        weight_ga.selection(N_selection)

    # print("Best weights = ", best_weights, "\tBest metric = ", new_best_metric)

    # Test with scaling after GA

    # Concatenate training and verification sets.
    x_train = np.concatenate((x_train, x_verif), axis=0)
    y_train = np.concatenate([y_train, y_verif])

    # Print the results of KNN.
    clf = KNNClassifier(np.multiply(x_train, best_weights), y_train, k)
    y_pred = clf.predict(np.multiply(x_test, best_weights))
    acc = accuracy(y_test, y_pred)
    print("ga-std,knn,", k, ",", acc)

    # Print the results of KNN.
    clf = DwKNNClassifier(np.multiply(x_train, best_weights), y_train, k)
    y_pred = clf.predict(np.multiply(x_test, best_weights))
    mse_iter = accuracy(y_test, y_pred)
    print("ga-std,dknn,", k, ",", mse_iter)