コード例 #1
0
glass.shape  # (214, 10)
glass.info()
glass['Type'].unique()  # 7 types
glass.head()

# segregate dependent and independent variables
y = glass['Type'].values
x = glass.drop(['Type'], axis=1).values
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)

# building models

knn = KNC(n_neighbors=8)
# fit train data

knn.fit(x_train, y_train)  # model built
y_train_pred = knn.predict(
    x_train)  # predict "y_train" value based on model created
train_acc = np.mean(
    y_train_pred == y_train)  # check accuracy of predicted and real value
train_acc  # 73.68%
knn.score(x_train, y_train)  # 73.68%

# check prediction accuracy of train data and classification error
print(confusion_matrix(y_train, y_train_pred))
print(classification_report(y_train, y_train_pred))  # accuracy = 74%

# fit test data
コード例 #2
0
#Train-Test split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=1)

scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
print(X_train)
print(X_test)

#model1 with 5 nearest neighbour(randomly choosing)
knn = KNC(n_neighbors=5)
m1 = knn.fit(X_train, y_train)
pred = m1.predict(X_test)
print(pred)

print(confusion_matrix(y_test, pred))
print(classification_report(y_test, pred))

# Calculating the accuracy of model1
print(m1.score(X_test, y_test))  #>> accuracy = 0.6744186046511628

error = []

# Calculating error for K values between 1 and 40
for i in range(1, 40):
    knn = KNC(n_neighbors=i)
コード例 #3
0
@author: rahul
"""

import pandas as pd
import numpy as np

zoo = pd.read_csv("E:\\Data Science\\Data Sheet\\zoo.csv")

from sklearn.model_selection import train_test_split

train, test = train_test_split(zoo, test_size=0.2)

from sklearn.neighbors import KNeighborsClassifier as KNC

# for 3 nearest neighbours
neigh = KNC(n_neighbors=3)
# Fitting with training data
neigh.fit(train.iloc[:, 1:19], train.iloc[:, 0])
# train accuracy
train_acc = np.mean(neigh.predict(train.iloc[:, 0:9]) == train.iloc[:, 9])
# test accuracy
test_acc = np.mean(neigh.predict(test.iloc[:, 0:9]) == test.iloc[:, 9])

# 3 to 50 nearest neighbours and storing the accuracy values
acc = []
for i in range(3, 50, 1):
    neigh = KNC(n_neighbors=i)
    neigh.fit(train.iloc[:, 1:19], train.iloc[:, 0])
    train_acc = np.mean(neigh.predict(train.iloc[:, 1:19]) == train.iloc[:, 0])
    test_acc = np.mean(neigh.predict(test.iloc[:, 1:19]) == test.iloc[:, 0])
    acc.append([train_acc, test_acc])
コード例 #4
0
ファイル: Zoo.py プロジェクト: Vimal666/KNN
mean     0.425743    0.198020    0.584158  ...    0.128713    0.435644    2.831683
std      0.496921    0.400495    0.495325  ...    0.336552    0.498314    2.102709
min      0.000000    0.000000    0.000000  ...    0.000000    0.000000    1.000000
25%      0.000000    0.000000    0.000000  ...    0.000000    0.000000    1.000000
50%      0.000000    0.000000    1.000000  ...    0.000000    0.000000    2.000000
75%      1.000000    0.000000    1.000000  ...    0.000000    1.000000    4.000000
max      1.000000    1.000000    1.000000  ...    1.000000    1.000000    7.000000

#getting the train test data's
train,test=train_test_split(Zoo,test_size=0.3)#70% of train data and 30% of test data
#defining a for loop function to get the accuracy of train test data's
#metioning my K values I want to use
x=[1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33]
for k in x:
    if k%2!=0:
        neigh=KNC(n_neighbors=k)
        neigh.fit(train.iloc[:,1:17],train.iloc[:,17])
        print("train_accuracy"+str(k)," : "+str(np.mean(neigh.predict(train.iloc[:,1:17])==train.iloc[:,17])))
        print("test_accuracy"+str(k)," : "+str(np.mean(neigh.predict(test.iloc[:,1:17])==test.iloc[:,17])))

######below shows the accuracy values that I got"
#train_accuracy1  : 1.0
#test_accuracy1  : 1.0
#train_accuracy3  : 0.9714285714285714
#test_accuracy3  : 0.9354838709677419
#train_accuracy5  : 0.9428571428571428
#test_accuracy5  : 0.9032258064516129
#train_accuracy7  : 0.9428571428571428
#test_accuracy7  : 0.9032258064516129
#train_accuracy9  : 0.8571428571428571
#test_accuracy9  : 0.8709677419354839
コード例 #5
0
ファイル: classifier.py プロジェクト: dtsat/ML-Number-Reader
    def __learn_and_validate(self):
        """
        Here comes the magic. Use ML algorithms to learn, validate, and test.
        """

        print('Using extractor:', self.extractor, '\n')

        ###################################################################
        print("Learning using SVC...")
        classifier = SVC(C=1.0,
                         kernel='sigmoid',
                         gamma='auto',
                         probability=False,
                         verbose=True,
                         max_iter=60000,
                         decision_function_shape='ovr')
        classifier.fit(self.learn[0], self.learn[1])

        print("Validating using SVC...")
        self.validateResults[self.__which]['SVC'] = classifier.score(
            self.validate[0], self.validate[1])

        print("Testing using SVC...")
        predicted = classifier.predict(self.test[0])
        expected = self.test[1]
        matrix = confusion_matrix(expected, predicted)
        self.writeConfusionMatrixCsv('SVC', matrix)
        print('\n', matrix, '\n')

        if self.__crossValidate:
            self.testResults[self.__which]['SVC'].append(
                classifier.score(self.test[0], self.test[1]))
        else:
            self.testResults[self.__which]['SVC'] = classifier.score(
                self.test[0], self.test[1])

        ####################################################################

        print("Learning using MLP...")
        classifier = MLP(solver='lbfgs', alpha=1e-5, random_state=1)
        classifier.fit(self.learn[0], self.learn[1])

        print("Validating using MLP...")
        self.validateResults[self.__which]['MLP'] = classifier.score(
            self.validate[0], self.validate[1])

        print("Testing using MLP...")
        predicted = classifier.predict(self.test[0])
        expected = self.test[1]
        matrix = confusion_matrix(expected, predicted)
        self.writeConfusionMatrixCsv('MLP', matrix)
        print('\n', matrix, '\n')

        if self.__crossValidate:
            self.testResults[self.__which]['MLP'].append(
                classifier.score(self.test[0], self.test[1]))
        else:
            self.testResults[self.__which]['MLP'] = classifier.score(
                self.test[0], self.test[1])

        ########################################################################3

        print("Learning using DTC...")
        classifier = DTC(random_state=0)
        classifier.fit(self.learn[0], self.learn[1])

        print("Validating using DTC...")
        self.validateResults[self.__which]['DTC'] = classifier.score(
            self.validate[0], self.validate[1])

        print("Testing using DTC...")
        predicted = classifier.predict(self.test[0])
        expected = self.test[1]
        matrix = confusion_matrix(expected, predicted)
        self.writeConfusionMatrixCsv('DTC', matrix)
        print('\n', matrix, '\n')

        if self.__crossValidate:
            self.testResults[self.__which]['DTC'].append(
                classifier.score(self.test[0], self.test[1]))
        else:
            self.testResults[self.__which]['DTC'] = classifier.score(
                self.test[0], self.test[1])

        ##########################################################################

        print("Learning using KNC...")
        classifier = KNC(n_neighbors=self.__range - 1)
        classifier.fit(self.learn[0], self.learn[1])

        print("Validating using KNC...")
        self.validateResults[self.__which]['KNC'] = classifier.score(
            self.validate[0], self.validate[1])

        print("Testing using KNC...")
        predicted = classifier.predict(self.test[0])
        expected = self.test[1]
        matrix = confusion_matrix(expected, predicted)
        self.writeConfusionMatrixCsv('KNC', matrix)
        print('\n', matrix, '\n')

        if self.__crossValidate:
            self.testResults[self.__which]['KNC'].append(
                classifier.score(self.test[0], self.test[1]))
        else:
            self.testResults[self.__which]['KNC'] = classifier.score(
                self.test[0], self.test[1])
コード例 #6
0
def clust(nparray, labels):
    neighs = KNC(n_neighbors=2)
    neighs.fit(nparray, labels)
    _, pairs = neighs.kneighbors(nparray)  # get neighbor
    acc = sum([labels[a] == labels[b] for a, b in pairs]) / len(labels)
    return acc
コード例 #7
0
import pandas
from sklearn import cross_validation as cv
from sklearn.neighbors import KNeighborsClassifier as KNC
from sklearn.preprocessing import scale

data = pandas.read_csv(filepath_or_buffer='../wine.data', header=None)

attributes = data.ix[:, 1:]
classes = data[0]
kfold = cv.KFold(n=178, n_folds=5, random_state=42, shuffle=True)

attributes = scale(attributes)
max = 0
k = 1
k_neighbor = None
while k <= 50:
    knc = KNC(n_neighbors=k)
    m = cv.cross_val_score(knc, attributes, classes, cv=kfold).mean()
    print '{} neighbor: {}'.format(k, m)
    if m > max:
        max = m
        k_neighbor = k
    k += 1

print 'Maximum: {}\nNeighbors: {}'.format(max, k_neighbor)
コード例 #8
0
ファイル: KNN (1).py プロジェクト: m-sahana/KNN
# In[20]:

import seaborn as sns

sns.boxplot(glass['Type'])

# In[9]:

from sklearn.model_selection import train_test_split

train, test = train_test_split(glass, test_size=0.2)
from sklearn.neighbors import KNeighborsClassifier as KNC

# In[10]:

neigh = KNC(n_neighbors=7)
neigh.fit(train.iloc[:, 1:9], train.iloc[:, 9])
train_acc_1 = np.mean(neigh.predict(train.iloc[:, 1:9]) == train.iloc[:, 9])
train_acc_1

# In[11]:

test_acc_1 = np.mean(neigh.predict(test.iloc[:, 1:9]) == test.iloc[:, 9])
test_acc_1

# In[12]:

neigh = KNC(n_neighbors=7)
neigh.fit(train.iloc[:, 1:9], train.iloc[:, 9])
train_acc = np.mean(neigh.predict(train.iloc[:, 1:9]) == train.iloc[:, 9])
train_acc
コード例 #9
0
ファイル: KNN.py プロジェクト: Yzp109062/programming
def get_sample(dimensions):
	return [get_random_point(dimensions) for i in range(1000)]


dimensions = 2
sample = get_sample(dimensions)

xs = [i[0] for i in sample]
ys = [i[1] for i in sample]
classes = [get_classification(i) for i in sample]
colors = ["r" if i == 0 else "b" for i in classes]
plt.scatter(xs, ys, color=colors)
plt.show()

clf1 = KNC(10, weights="distance")
clf2 = KNC(10, weights="uniform")

clfs = [clf1, clf2]
for clf in clfs:
	clf.fit(sample, classes)

new_sample = get_sample(dimensions)
new_classes = [get_classification(i) for i in new_sample]
for clf in clfs:
	print(clf.score(new_sample, new_classes))

a1 = []
a2 = []
for k in range(1, 50):
	clf1 = KNC(k, weights="distance")
コード例 #10
0
        the raw data with which the model will be trained
    training-classes: list
        the class to which the data corresponds
    """
    known_classes = []
    new_data = []
    for i, training_class in enumerate(training_data):
        for training_data in training_class:
            known_classes.append(training_classes[i])
            transformed_data = transform_data(training_data)
            new_data.append(transformed_data)
    knc_model.fit(new_data, known_classes)


if __name__ == "__main__":
    FILENAME = os.path.dirname(__file__)
    os.chdir(FILENAME)
    model = KNC(n_neighbors=1)
    data = load_all_training_data("training sounds")
    classes, data = split_data(data)
    train_model(model, data, classes)

    _, sound = load_training_file("training sounds/click.wav")
    transformed_sound = transform_data(sound)
    prediction = predict(model, transformed_sound)
    print(f"This sound is a \"{prediction}\"")

    # NOTE: Save the trained model to a file for easy reuse without training
    # (this is for when a model uses more input data)
    # save_model(model, "models/trained.knn-model")
コード例 #11
0
                 #"GNB",
                 "QDA"]
model_types = [LR,
               RFC,
               #ABC,
               MLPC,
               KNC,
               SVC,
               #DTC,
               #GNB,
               QDA]
models = [LR(),
          RFC(n_estimators=30),
          #ABC(),
          MLPC(),
          KNC(),
          SVC(probability=True),
          #DTC(),
          #GNB(),
          QDA()]


### experiment bright students math finance
N = 15000

minority_percent = 0.3
MIN = int(minority_percent * N)
MAJ = int((1 - minority_percent) * N)
# print(MIN, MAJ)
# p_S_brightmath = 0.9
# p_T_brightmath = 0.1
コード例 #12
0
 def __init__(self):
     self.clf = KNC()
コード例 #13
0
ファイル: Wine.py プロジェクト: ConsVin/DataAnalysis
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier as KNC
from sklearn.cross_validation import KFold
from sklearn.cross_validation import cross_val_score
from sklearn.preprocessing import scale
##
data = np.loadtxt('wine.data.txt', delimiter=",")
X = data[:,1:14]
y = data[:,0]

kf = KFold(len(X), n_folds=5, shuffle=True, random_state=42);

# First, without scaling
kMeans = list()
for k in range (1,50):
    knn = KNC( n_neighbors = k ) # Create classifier
    knn.fit(X,y) # Train classifier
    arr = cross_val_score(estimator = knn, X=X,y=y,scoring='accuracy', cv=kf)
    m = arr.mean();
    kMeans.append(m)

plt.plot(kMeans)
plt.xlabel('N of neighbors');
plt.ylabel('Quality');
plt.title('Cross validation quality');
maxValue = max(kMeans);
maxIndex = kMeans.index(maxValue);
optK = maxIndex + 1;
print "Maximum value is %2.2f at %d" % (maxValue,optK)

# Write answers
コード例 #14
0
                      batch_size=32, nb_epoch=50, verbose=0)
            #score = model.evaluate(X_test, Y_test, verbose=0)
            res_pred = model.predict_classes(X_test)
        else:
            if method_clsf == 'SVC':
                #print 'Method: SVC'
                clf = svm.SVC(kernel='rbf', C=10., gamma=0.1)

                # ["linear", "poly", "rbf", "sigmoid", "precomputed"]
                #print dfs[0], len(dfs), len(X_test)
                #for i in range(len(X_test)):
                #    print np.argmax(dfs[i]), res_pred[i]
            elif method_clsf == 'Logit':
                clf = LR(C=10.)
            elif method_clsf == 'kNN':
                clf = KNC()
            elif method_clsf == 'boosting':
                clf = XGBC()
            elif method_clsf == 'GNB':
                clf = GNB()
            else:
                clf = None

            clf.fit(X_train, y_train)
            res_pred = clf.predict(X_test)

            #dfs = clf.decision_function(X_test)

        res_by_seg = mf.get_corr_ratio(res_pred=res_pred, y_test=y_test, type='by_seg')
        res_by_categ = mf.get_corr_ratio(res_pred=res_pred, y_test=y_test, type='by_categ')
        one_res = (float(format(res_by_seg, '.3f')), float(format(res_by_categ, '.3f')))
コード例 #15
0
ファイル: Glass solution.py プロジェクト: AadityaRathod97/KNN
import pandas as pd
import numpy as np

glass = pd.read_csv("glass.csv")

# Training and Test data using
from sklearn.model_selection import train_test_split
train_glass, test_glass = train_test_split(
    glass, test_size=0.2)  # 0.2 => 20 percent of entire data

# KNN using sklearn
# Importing Knn algorithm from sklearn.neighbors
from sklearn.neighbors import KNeighborsClassifier as KNC

# for 3 nearest neighbours
neigh_glass = KNC(n_neighbors=3)
neigh_glass.fit(glass.iloc[:, 0:9], glass.iloc[:, 9])

train_glass_acc = np.mean(
    neigh_glass.predict(train_glass.iloc[:, 0:9]) == train_glass.iloc[:, 9])

test_glass_acc = np.mean(
    neigh_glass.predict(test_glass.iloc[:, 0:9]) == test_glass.iloc[:, 9])

glass_pred = []

for i in range(3, 50, 2):
    neigh_glass = KNC(n_neighbors=i)
    neigh_glass.fit(glass.iloc[:, 0:9], glass.iloc[:, 9])
    train_glass_acc = np.mean(
        neigh_glass.predict(train_glass.iloc[:, 0:9]) == train_glass.iloc[:,
コード例 #16
0
from sklearn.externals.six import StringIO

iris = DS.load_iris()
iris_data = iris.data
# iris_data = preprocessing.scale(iris_data)
iris_label = iris.target

train_data, test_data, train_label, test_label = TTS(iris_data,
                                                     iris_label,
                                                     test_size=0.3,
                                                     random_state=3)

#KNN Accuracy
KNN_Accuracy = []
for k in range(1, 101):
    knn = KNC(n_neighbors=k)
    knn.fit(train_data, train_label)
    result = knn.predict(test_data)
    KNN_Accuracy.append(knn.score(test_data, test_label))

plt.plot(KNN_Accuracy)
plt.title('Accuracy influenced by different neighbers in KNN')
plt.xlabel('neighbers(1-100)')
plt.ylabel('accuracy(%)')
plt.show()

NB = GaussianNB()
NB.fit(train_data, train_label)

fig = plt.figure(figsize=(12, 15))
fig.tight_layout()
コード例 #17
0
import data_extractor as data_ext
import cross_validation as cross_val
import numpy as np

data = data_ext.read_data("../data/book_data.xlsx")

author_data = data["author"]
genre_data = data["genre"]
features = np.column_stack((author_data, genre_data))
rating_data = data["user_rating"]
labels = np.array(rating_data)

# Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier as DTC

print "Decision Tree Classifier accuracy: \t", cross_val.leave_k_out_cross_validation(
    1, DTC(), features, labels)

# KNeighbors Classifier
from sklearn.neighbors import KNeighborsClassifier as KNC

print "KNeighbors Classifier accuracy: \t", cross_val.leave_k_out_cross_validation(
    1, KNC(), features, labels)

# SVM
from sklearn.svm import SVC

print "SVM accuracy: \t", cross_val.leave_k_out_cross_validation(
    1, SVC(gamma='auto'), features, labels)
コード例 #18
0
plt.scatter(bumpy_fast, grade_fast, color="b", label="fast")
plt.scatter(grade_slow, bumpy_slow, color="r", label="slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
# plt.show()
################################################################################

# your code here!  name your classifier object clf if you want the
# visualization code (prettyPicture) to show you the decision boundary
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.ensemble import AdaBoostClassifier as ABC
from sklearn.neighbors import KNeighborsClassifier as KNC
from sklearn.tree import DecisionTreeClassifier as DTC

clf_list = [RFC(), ABC(), KNC()]
acc_list = []
# RandomForestClassifier
# n_estimators ~10
# criterion = 'entropy'/ 'gini'
# max_features ~0.4
# Max acc: 0.94

# KNeighborsClassifier
# n_neighbors: 8
# weights: uniform
# algorithm: any
# Max acc: 0.944

# AdaBoostClassifier
# base_estimator:
コード例 #19
0
 def asd(X, X2, Y, Y2):
     neighs = KNC(n_neighbors=1)
     neighs.fit(X, Y)
     _, pairs = neighs.kneighbors(X2)
     return sum([Y[a] == b for a, b in zip(pairs, Y2)]) / len(Y2)
コード例 #20
0
    "vae_batch_size": 1000
}
vptsne.fit(mnist.train._images, **fit_params)
ptsne.fit(mnist.train._images, **fit_params)
vptsne.save_weights("models/mnist_vptsne.ckpt", "models/mnist_vae.ckpt")
ptsne.save_weights("models/mnist_ptsne.ckpt")

#vptsne.load_weights("models/mnist_vptsne.ckpt", "models/mnist_vae.ckpt")
#ptsne.load_weights("models/mnist_ptsne.ckpt")

pca = PCA(n_components=2).fit(mnist.train._images)

estimators = [vptsne, ptsne, vae, pca]
transformed_train = [
    estimator.transform(mnist.train._images) for estimator in estimators
]
transformed_test = [
    estimator.transform(mnist.test._images) for estimator in estimators
]

print("Trustworthiness for test set (vptsne, ptsne, vae, pca):", [
    trustworthiness(mnist.test._images, transformed, n_neighbors=12)
    for transformed in transformed_test
])

print("1-NN score for test set (vptsne, ptsne, vae, pca)", [
    KNC(n_neighbors=1).fit(train, mnist.train._labels).score(
        test, mnist.test._labels)
    for train, test in zip(transformed_train, transformed_test)
])
コード例 #21
0
    print(i)
    c = S1[i]

    #INSTANCES
    L2 = []
    S2 = []
    for n in rl(num[0]):
        if array[n][c] == 0:
            S2.append(n)
        else:
            L2.append(n)

    XTrain, XImpute = split(X, L2, axis=0)  ##TrainX,TestX
    YTrain, YImpute = split(Y0, L2, axis=0)  ##TrainY,Prediction

    Knn = KNC(n_neighbors=k0)
    Knn.fit(XTrain, YTrain)
    y = Knn.predict(XImpute)
    print(y)
    for j in rl(y):
        YImpute[j] = y[j]
    Yi.append(weave(YTrain, YImpute, L2, S2))
    X = weave(XTrain, XImpute, L2, S2)

Y_ = []
for i in range(3):
    Y_.append(list(np.dot(Yi[i], 1 / max(Yi[i]))))
Y_ = np.transpose(Y_)
Y = np.transpose(Yi)

Data = pd.DataFrame(np.transpose(
コード例 #22
0
                                  'min_samples_split': [2, 3, 4]},
                                 use_kbest=True,
                                 use_scaler=False)

# Nearest Neighbors
clf, features_list = build_model(all_features, KNC(),
                                 {'n_neighbors': [2, 3, 4, 5],
                                  'weights': ['uniform', 'distance'],
                                  'leaf_size': [2, 3, 4, 5, 6],
                                  'p': [1, 2, 3]},
                                 use_kbest=True,
                                 use_scaler=True)
'''

# The best estimator found
estimator = KNC(n_neighbors=3, weights='uniform', leaf_size=2, p=3)

clf, features_list = build_model(all_features,
                                 estimator, {},
                                 use_kbest=True,
                                 k=[14],
                                 use_scaler=True)

# ----------------------------------------------------------
#   Assess New Features
# ----------------------------------------------------------

original_features = [
    f for f in features_list
    if f not in ['grand_total', 'to_poi_ratio', 'from_poi_ratio']
]
コード例 #23
0
import pandas as pd
import seaborn as sns
import numpy as np

glass = pd.read_csv(
    "C:/Users/USER/Desktop/KNN-TECHNIQUE/knn-assignment/glass.csv")
from sklearn.model_selection import train_test_split
train, test = train_test_split(glass, test_size=0.2)

from sklearn.neighbors import KNeighborsClassifier as KNC
neigh = KNC(n_neighbors=2)
###model
neigh.fit(train.iloc[:, 0:9], train.iloc[:, 9])
##train accuracy
train_acc = np.mean(neigh.predict(train.iloc[:, 0:9]) == train.iloc[:, 9])

# test accuracy
test_acc = np.mean(neigh.predict(test.iloc[:, 0:9]) == test.iloc[:, 9])
##finding best k value and checking accuracy
acc = []

for i in range(3, 50, 2):
    neigh = KNC(n_neighbors=i)
    neigh.fit(train.iloc[:, 0:9], train.iloc[:, 9])
    train_acc = np.mean(neigh.predict(train.iloc[:, 0:9]) == train.iloc[:, 9])
    test_acc = np.mean(neigh.predict(test.iloc[:, 0:9]) == test.iloc[:, 9])
    acc.append([train_acc, test_acc])

##plots

import matplotlib.pyplot as plt  # library to do visualizations
コード例 #24
0
ファイル: glass.py プロジェクト: hardikvora200/Python
glass.head(30) # to get top n rows use glass.head(10)
glass.tail(10)
##############################################################
# Training and Test data using 
from sklearn.model_selection import train_test_split
# KNN using sklearn 
# Importing Knn algorithm from sklearn.neighbors
def norm_func(i):
    x = (i-i.mean())/(i.std())
    return (x)
# Normalized data frame (considering the numerical part of data)
df_norm = norm_func(glass.iloc[:,0:])
train,test = train_test_split(glass,test_size = 0.2) # 0.2 => 20 percent of entire data 
from sklearn.neighbors import KNeighborsClassifier as KNC
# for 4 nearest neighbours 
neigh = KNC(n_neighbors= 4)
# Fitting with training data 
neigh.fit(train.iloc[:,0:10],train.iloc[:,9])
# train accuracy 
train_acc = np.mean(neigh.predict(train.iloc[:,0:10])==train.iloc[:,9])  ### 95.91%
# test accuracy
test_acc = np.mean(neigh.predict(test.iloc[:,0:10])==test.iloc[:,9]) ### 93.02%
# for 5 nearest neighbours
neigh = KNC(n_neighbors=5)
# fitting with training data
neigh.fit(train.iloc[:,0:10],train.iloc[:,9])
# train accuracy 
train_acc = np.mean(neigh.predict(train.iloc[:,0:10])==train.iloc[:,9]) ### 95.32%
# test accuracy
test_acc = np.mean(neigh.predict(test.iloc[:,0:10])==test.iloc[:,9]) #### 90.69%
# creating empty list variable 
コード例 #25
0
ファイル: assign1.py プロジェクト: Jyotsna1292/POC
import pandas as pd
import numpy as np

glass = pd.read_csv("C:/Users/USER/Downloads/glass.csv")

# Training and Test data using
from sklearn.model_selection import train_test_split
train, test = train_test_split(
    glass, test_size=0.2)  # 0.2 => 20 percent of entire data

# KNN using sklearn
# Importing Knn algorithm from sklearn.neighbors
from sklearn.neighbors import KNeighborsClassifier as KNC

# for 3 nearest neighbours
neigh = KNC(n_neighbors=3)

# Fitting with training data
neigh.fit(train.iloc[:, 0:9], train.iloc[:, 9])

# train accuracy
train_acc = np.mean(neigh.predict(
    train.iloc[:, 0:9]) == train.iloc[:, 9])  # accuracy 82.45%

# test accuracy
test_acc = np.mean(neigh.predict(test.iloc[:, 0:9]) == test.iloc[:,
                                                                 9])  # 65.11%

# for 5 nearest neighbours
neigh = KNC(n_neighbors=5)
コード例 #26
0
# coding: utf-8

# In[14]:


import numpy as np
from sklearn import datasets
from sklearn.neighbors import KNeighborsClassifier as KNC
iris = datasets.load_iris()
x= iris.data
y= iris.target
np.unique(y)
np.random.seed(123)
indices = np.random.permutation(len(x))
iris_x_train = x[indices[:-10]]
iris_y_train = y[indices[:-10]]
iris_x_test  = x[indices[-10:]]
iris_y_test  = y[indices[-10:]]
model=KNC()
model.fit(iris_x_train, iris_y_train) 
KNC(algorithm='auto',leaf_size=30, metric='minkowski',
   metric_params=None,n_jobs=1,n_neighbors=5, p=2,weights='uniform')
out=model.predict(iris_x_test)
print("predicted:",out)
print("True     :",iris_y_test)

コード例 #27
0
clf.best_estimator_
clf.best_score_

# Predicting test set results
Y_pred = clf.predict(X_test)
confusion_matrix(Y_test,Y_pred)
accuracy_score(Y_test, Y_pred)

# Another method without Hyperparameter tunning

# running KNN algorithm for 3 to 50 nearest neighbours(odd numbers) and 
# storing the accuracy values 
accuracy = []
from sklearn.neighbors import KNeighborsClassifier as KNC
for i in range(3,50,2):
    neigh = KNC(n_neighbors=i)
    neigh.fit(X_train,Y_train)
    train_acc = np.mean(neigh.predict(X_train)==Y_train)
    test_acc = np.mean(neigh.predict(X_test)==Y_test)
    accuracy.append([train_acc,test_acc])

import matplotlib.pyplot as plt # library to do visualizations 

# train accuracy plot 
plt.plot(np.arange(3,50,2),[i[0] for i in accuracy],"bo-")
# test accuracy plot
plt.plot(np.arange(3,50,2),[i[1] for i in accuracy],"ro-")
plt.legend(["train","test"])

# In both the methods n_neighbors = 3 is the best and observed from second method k=23 looks good but accuracy decreases.
# Build the KNN classifier with k=3
コード例 #28
0
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Fitting classifier to the Training set
from sklearn.neighbors import KNeighborsClassifier as KNC
classifier = KNC(n_neighbors=5)
classifier.fit(X_train, y_train)

# Predicting the Test set results
y_pred = classifier.predict(X_test)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

# Visualising the Training set results
from matplotlib.colors import ListedColormap
X_set, y_set = X_train, y_train
X1, X2 = np.meshgrid(
    np.arange(start=X_set[:, 0].min() - 1,
              stop=X_set[:, 0].max() + 1,
コード例 #29
0
# generate train test datasets
print('\n generating train test datasets...')
X_train, X_validation, Y_train, Y_validation = m_s.train_test_split(
    X, Y, test_size=validation_size, random_state=seed)

# define 'scoring' parameter as 'accuracy'
scoring = 'accuracy'

# define array to hold candidate models
models = []

# instantiate candidate models and add to array
print('\n instantiating candidate models...')
models.append(('LR', LR()))
models.append(('LDA', LDA()))
models.append(('KNC', KNC()))
models.append(('DTC', DTC()))
models.append(('GNB', GNB()))

# run test harness
results = []
names = []
print('\n running test harness...')
for name, model in models:
    # 'kfold' var sets up the k-fold cross validation
    kfold = m_s.KFold(n_splits=10, random_state=seed)
    # 'cv_results' applies cross validation process to each model using the
    # training data i.e. features matrix X_train and results vector Y_train
    cv_results = m_s.cross_val_score(model,
                                     X_train,
                                     Y_train,
コード例 #30
0
    if isinstance(estimator, VAE):  # Already fitted
        transformed = estimator.transform(levine_data)
    else:
        transformed = estimator.fit_transform(levine_data)
    print(estimator.__class__.__name__, "fit_transform completed in",
          curr_millis() - start, "(ms)")
    return transformed


transformed_all = [fit_transform_fn(estimator) for estimator in estimators]

print("Trustworthiness (vptsne, ptsne, vae, pca, umap, tsne)", [
    trustworthiness(levine_data[subset_b_indices],
                    transformed[subset_b_indices],
                    n_neighbors=12) for transformed in transformed_all
])

print("1-NN score for test set (vptsne, ptsne, vae, pca, umap, tsne)", [
    KNC(n_neighbors=1).fit(
        transformed[subset_a_indices], levine_labels[subset_a_indices]).score(
            transformed[subset_b_indices], levine_labels[subset_b_indices])
    for transformed in transformed_all
])

for i, transformed in enumerate(transformed_all):
    plt.clf()
    for label in np.unique(levine_labels):
        tmp = transformed[levine_labels == label]
        plt.scatter(tmp[:, 0], tmp[:, 1], s=0.2, c=color_palette[label])
    plt.show()