Esempio n. 1
0
def exercise03(neighbors,split):
    '''
        Data set: Iris
        Split the Iris dataset into a train / test model with the split ratio between the two established by 
        the function parameter split.
        Fit KNN with the training data with number of neighbors equal to the function parameter neighbors
        Generate and return back an accuracy score using the test data was split out

    '''
    random_state = 21

    

    # ------ Place code below here \/ \/ \/ ------

    #splits train and test data
    X_train, X_test, y_train, y_test = tts(iris_df,iris_target,test_size = split,random_state = random_state, stratify = iris_target)
    
    #initializes knn with number of neighbors specified from input
    knn = KNN(n_neighbors = neighbors)
    
    #fits data from training data
    knn.fit(X_train,y_train)
    
    #generates score on test data
    knn_score = knn.score(X_test,y_test)
    

    # ------ Place code above here /\ /\ /\ ------


    return knn_score
Esempio n. 2
0
def exercise02(new_observations):
    '''
        Data set: Iris
        Fit the Iris dataset into a kNN model with neighbors=5 and predict the category of observations passed in 
        argument new_observations. Return back the target names of each prediction (and not their encoded values,
        i.e. return setosa instead of 0).
    '''

    # ------ Place code below here \/ \/ \/ ------
    # Create arrays for the features and the response variable
    X = iris.data
    y = iris.target
    
    # Create a KNN classifier with 5 neighbors
    knn = KNN(n_neighbors = 5)
    
    # Fit the classifier to the data
    knn.fit(X, y)
    
    # Predict and print the label for the new data point new_observations
    classes = {0:'setosa', 1:'versicolor', 2:'virginica'}
    iris_predictions = []  
    for i in range(len(new_observations)):
        name = classes[knn.predict(new_observations)[i]]
        iris_predictions.append(name)
        
    # ------ Place code above here /\ /\ /\ ------
 
    return iris_predictions
Esempio n. 3
0
def main():
    # prepare data
    trainingSet = []
    testSet = []
    accuracy = 0.0
    split = 0.25
    loadDataset('../Dataset/combined.csv', split, trainingSet, testSet)
    print 'Train set: ' + repr(len(trainingSet))
    print 'Test set: ' + repr(len(testSet))
    # generate predictions
    predictions = []
    trainData = np.array(trainingSet)[:, 0:np.array(trainingSet).shape[1] - 1]
    columns = trainData.shape[1]
    X = np.array(trainData)
    y = np.array(trainingSet)[:, columns]
    clf = BaggingClassifier(
        KNN(n_neighbors=10,
            weights='uniform',
            algorithm='auto',
            leaf_size=10,
            p=1,
            metric='minkowski',
            metric_params=None,
            n_jobs=1))
    clf.fit(X, y)
    testData = np.array(testSet)[:, 0:np.array(trainingSet).shape[1] - 1]
    X_test = np.array(testData)
    y_test = np.array(testSet)[:, columns]
    accuracy = clf.score(X_test, y_test)
    accuracy *= 100
    print("Accuracy %:", accuracy)
Esempio n. 4
0
def handwritingClassTest(
    file1dir, file2dir
):  #手写数字识别函数,file1dir为存储训练集数据的文件夹,file2dir为测试集数据文件夹,文件名格式为1_12,表示第一类的第十二张图片
    hwlabels = []  #测试集的labels
    trainingFileList = listdir(file1dir)  #返回文件夹下的所有文件名
    m = len(trainingFileList)  #返回文件夹下文件的数目
    trainingMat = np.zeros((m, 1024))  #初始化所有训练集的1024向量构成的矩阵
    for i in range(m):
        fileNameStr = trainingFileList[i]  #获得文件的名字
        classNumber = int(fileNameStr.split('_')[0])  #取文件名‘_’前面的数据的第一个
        hwlabels.append(classNumber)  #类别向量
        trainingMat[i, :] = img2vector(file1dir + '/%s' % (fileNameStr))
    neigh = KNN(n_neighbors=3, algorithm='brute')  #调用sklearn工具包的KNN分类器
    neigh.fit(trainingMat, hwlabels)  #拟合模型,trainingMat为训练矩阵,hwlabels为对应的标签
    testFileList = listdir(file2dir)
    errorCount = 0.0  #错误分类计数
    mTest = len(testFileList)  #测试集文件个数
    for i in range(mTest):
        fileNameStr = testFileList[i]
        classNumber = int(fileNameStr.split('_')[0])
        vectorUnderTest = img2vector(file2dir + '/%s' %
                                     (fileNameStr))  #%()代替字符串中的%s
        classifierResult = neigh.predict(vectorUnderTest)  #预测测试集的标签
        print("分类返回结果为%d\t真实结果为%d" % (classifierResult, classNumber))
        if (classifierResult != classNumber):
            errorCount += 1.0
    print("总共分类错误%d个数据\n错误率为%f%%" % (errorCount, errorCount * 100 / mTest))
Esempio n. 5
0
    def pred(self, x_support, x_query):
        way, shot = int(x_support.size(0)), int(x_support.size(1))

        knn = KNN(n_neighbors=1, )

        x_support = x_support.view(-1, 3, 32, 32)
        x_support = self.vgg16(x_support)
        x_support = x_support.view(x_support.size(0), -1)
        x_support = self.fc_1(x_support)
        x_support = tor.mean(x_support.view(20, shot, -1),
                             dim=1).cpu().detach().numpy()
        y_support = np.array([i // 1 for i in range(1 * way)])

        knn.fit(x_support, y_support)

        pred_list = []

        for query in x_query.view(-1, 3, 32, 32):
            query_feature = self.vgg16(query.view(1, 3, 32, 32))
            query_feature = self.fc_1(
                query_feature.view(query_feature.size(0), -1))
            query_feature = query_feature.cpu().detach().numpy()
            pred = knn.predict(query_feature)
            pred_list.append(int(pred[0]))

        return np.array(pred_list)
def trainingarray():
    trainMat = np.array([[1, 2, 3], [2, 3, 5], [55, 33, 66], [55, 44, 66]])
    label = np.array([0, 0, 1, 1])
    neigh = KNN(n_neighbors=3, algorithm='auto', weights='distance', n_jobs=1)
    neigh.fit(trainMat, label)
    testmat = np.array([[2, 3, 4], [55, 33, 66]])
    print(neigh.predict(testmat))
Esempio n. 7
0
def k_nearest_neighbors(M, m, D, d, feature_mean, diag, accuracy):

    #k-near neighbor
    training_start = t.time()
    knn = KNN()
    knn.fit(M, D)
    training_end = t.time()
    print("\nKNN\nTraining time: {0:.0000001} sec".format(training_end -
                                                          training_start))

    testing_start = t.time()
    p = knn.predict(m)
    testing_end = t.time()
    print("Testing/Predict time: {0:.0000001} sec".format(testing_end -
                                                          testing_start))

    validation = []
    validation = cross_val_score(knn, feature_mean, diag, cv=5)
    accuracy.append(accuracy_score(p, d))

    print("Accuracy: {0:.01%}".format(accuracy_score(p, d)))
    print("Cross validation result: {0:.01%} (+/- {1:.01%})".format(
        num.mean(validation),
        num.std(validation) * 2))
    print(classification_report(d, p))
 def get_new_model(self):
     if (self.model_type.split("_")[-1] == "Regressor"):
         if (self.model_type == "Linear-Regressor"):
             from sklearn.linear_model import LinearRegression
             self.model = LinearRegression(**self.model_args)
         elif (self.model_type == "Support-Vector-Regressor"):
             import sklearn.svm as SVR
             self.model = SVR(**self.model_args)
         elif (self.model_type == "Decision-Tree-Regressor"):
             from sklearn.tree import DecisionTreeRegressor as DTR
             self.model = DTR(**self.model_args)
         elif (self.model_type == "Random-Forest-Regressor"):
             from sklearn.ensemble import RandomForestRegressor as RFR
             self.model = RFR(**self.model_args)
     else:
         if (self.model_type == "Logistic-Regression-Classifier"):
             from sklearn.linear_model import LogisticRegression
             self.model = LogisticRegression(**self.model_args)
         elif (self.model_type == "KNN-Classifier"):
             from sklearn.neighbors import KNeighborsClassifier as KNN
             self.model = KNN(**self.model_args)
         elif (self.model_type == "Support-Vector-Classifier"):
             import sklearn.svm as SVC
             self.model = SVC(**self.model_args)
         elif (self.model_type == "Naive-Bayes-Classifier"):
             from sklearn.naive_bayes import GNB
             self.model = GNB(**self.model_args)
         elif (self.model_type == "Decision-Tree-Classifier"):
             from sklearn.tree import DecisionTreeClassifier as DTC
             self.model = DTC(**self.model_args)
         elif (self.model_type == "Random-Forest-Classifier"):
             from sklearn.ensemble import RandomForestClassifier as RFC
             self.model = RFC(**self.model_args)
Esempio n. 9
0
def exercise03(neighbors,split):
    '''
        Data set: Iris
        Split the Iris dataset into a train / test model with the split ratio between the two established by 
        the function parameter split.
        Fit KNN with the training data with number of neighbors equal to the function parameter neighbors
        Generate and return back an accuracy score using the test data was split out

    '''

    
    # ------ Place code below here \/ \/ \/ ------

    ir = pd.DataFrame(iris.data)
    ir.columns = iris.feature_names 
    
    X = np.array(ir) 	
    y = np.array(iris.target) 	

    X_train, X_test, y_train, y_test = tts(X, y, test_size=split, random_state = 21)
    
    knn = KNN(n_neighbors=neighbors)
    knn.fit(X_train, y_train)
    knn_score = knn.score(X_test, y_test)

    # ------ Place code above here /\ /\ /\ ------

    return knn_score
def tuning_evaluation(X_train, y_train):
    '''
    Grid search algorithm to tune hyperparameters and evaluation of a accuracy. Returns the best classifier.
    :param X_train: numpy array [n_samples, n_features]
    :param y_train: numpy array [n_samples]
    :return: best classifier.
    '''

    pipe_KNN = Pipeline([('scl', StandardScaler()),
                         ('pca', PCA(n_components=2)),
                         ('clf', KNN(p=2, metric='minkowski'))])
    param_range = [1, 2, 4, 6, 8, 10, 20, 30, 50, 100]
    gs = GridSearchCV(estimator=pipe_KNN,
                      param_grid=[{
                          'clf__n_neighbors': param_range
                      }],
                      scoring='accuracy',
                      cv=10,
                      n_jobs=1)
    gs = gs.fit(X_train, y_train)
    print('The grid search best score is ', gs.best_score_)
    print('The best parameters according to the grid search algorithm are ',
          gs.best_params_)

    return gs.best_estimator_
    def __init__(self,
                 k,
                 weights=[0.4 / 3] * 3 + [0.3 / 27] * 27 + [0.3 / 108] * 108):
        self.weights = weights
        self.isFit = False

        # print(currentdir + '/data/filenames.txt')

        with open(currentdir + '/data/filenames.txt') as f:
            self.filenames = [_.strip('\n') for _ in f.readlines()]

        if 'hists.npy' not in os.listdir(currentdir + '/data/'):
            self.get_hists()

        self.hists = np.load(currentdir + '/data/hists.npy', allow_pickle=True)

        # print(self.hists)

        self.nn = KNN(k,
                      metric=ImageHistogram.compare_hists,
                      metric_params={
                          'n_features': 138,
                          'method': cv2.HISTCMP_BHATTACHARYYA,
                          'feature_weights': self.weights
                      })
Esempio n. 12
0
def plot_boundary(bound_points, bound_labels, real_points, real_labels, points,
                  args, name):

    if args.nn:
        knn = KNN()

        print(f'Starting KNN - {name}')
        knn.fit(bound_points, (bound_labels > 0.5))
        background = knn.predict(points)
        plt.scatter(points[:, 0], points[:, 1], c=background, alpha=0.2)

    else:
        plt.scatter(bound_points[:, 0],
                    bound_points[:, 1],
                    c=bound_labels,
                    s=300,
                    alpha=0.2)

    plt.scatter(real_points[:, 0],
                real_points[:, 1],
                c=real_labels,
                linewidths=1,
                edgecolors='black')
    plt.axis('off')
    plt.savefig(f'{args.save}_{name}.png', bbox_inches='tight', pad_inches=0)
Esempio n. 13
0
def exercise03(neighbors, split):
    '''
        Data set: Iris
        Split the Iris dataset into a train / test model with the split ratio between the two established by 
        the function parameter split.
        Fit KNN with the training data with number of neighbors equal to the function parameter neighbors
        Generate and return back an accuracy score using the test data was split out
    '''
    #random_state = 21

    # ------ Place code below here \/ \/ \/ ------

    X = iris.data
    y = iris.target
    df = pd.DataFrame(X, columns=iris.feature_names)
    X_train, X_test, y_train, y_test = tts(X,
                                           y,
                                           test_size=split,
                                           random_state=21,
                                           stratify=iris.target)

    #Fitting K-NN classifier to the training set
    classifier = KNN(n_neighbors=neighbors)
    classifier.fit(X_train, y_train)

    #test score - ratio of # of predictions found correct
    knn_score = classifier.score(X_test, y_test)

    # ------ Place code above here /\ /\ /\ ------

    return knn_score
Esempio n. 14
0
def exercise02(new_observations):
    '''
        Data set: Iris
        Fit the Iris dataset into a kNN model with neighbors=5 and predict the category of observations passed in 
        argument new_observations. Return back the target names of each prediction (and not their encoded values,
        i.e. return setosa instead of 0).
    '''

    # ------ Place code below here \/ \/ \/ ------

    X = iris.data
    y = iris.target
    df = pd.DataFrame(X, columns=iris.feature_names)
    #split train and test sets
    X_train, X_test, y_train, y_test = tts(X, y, random_state=0)

    #Fitting K-NN classifier to the training set
    classifier = KNN(n_neighbors=5)
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(new_observations)

    iris_predictions = iris['target_names'][y_pred]
    # ------ Place code above here /\ /\ /\ ------

    return iris_predictions
Esempio n. 15
0
def knn_classification_nca(X_train, Y_train, X_test, state=20):
    """A function that applies grid and random search to tune model and also gives a prediction, it also uses the
    NCA transformation of the data which seems to improve performance"""

    # Creating a score and parameters to search from
    scoring = {"f1": "f1_weighted"}

    grid_param2_nca = {
        "knn__n_neighbors": range(1, 11),
        "knn__p": range(1, 6),
        "knn__metric": ["minkowski", "canberra", "hamming"]
    }

    with_nca = namedtuple("with_nca",
                          ["fitted_grid", "y_grid", "grid_train_Y"])

    # Model setting
    nca = NCA(random_state=state)
    pipe = Pipeline(steps=[("nca", nca), ("knn", KNN(n_jobs=-1))])
    knn_grid = GridSearchCV(pipe,
                            grid_param2_nca,
                            scoring=scoring,
                            refit="f1",
                            cv=5)

    # Model training with nca
    fitted_grid = knn_grid.fit(X_train, Y_train)
    # Model predictions with nca
    y_grid = fitted_grid.best_estimator_.predict(X_test)
    # training data prediction with nca
    grid_train_Y = fitted_grid.best_estimator_.predict(X_train)
    nca_model_list = with_nca(*[fitted_grid, y_grid, grid_train_Y])

    return nca_model_list
Esempio n. 16
0
def handwritingClassify():
    """手写数字识别
    :return:
    """
    hwLabels = []  # 测试集标签
    trainingFileList = listdir('trainingDigits')
    number_of_files = len(trainingFileList)
    trainingMat = np.zeros((number_of_files, 1024))
    for i in range(number_of_files):
        fileNameStr = trainingFileList[i]
        classNumber = int(fileNameStr.split('_')[0])
        hwLabels.append(classNumber)
        # classNumber=int(fileNameStr[0])
        trainingMat[i, :] = img2vector('trainingDigits/%s' % (fileNameStr))
    neigh = KNN(n_neighbors=3, algorithm='auto')
    neigh.fit(trainingMat, hwLabels)
    testFileList = listdir('testDigits')
    errorCount = 0.0
    number_of_files_test = len(testFileList)
    for i in range(number_of_files_test):
        fileNameStr = testFileList[i]
        classNumber = int(fileNameStr.split('_')[0])
        vector_of_test = img2vector('testDigits/%s' % (fileNameStr))
        vector_of_test = np.asarray(vector_of_test)
        vector_of_test = np.expand_dims(vector_of_test, axis=0)
        classfier_result = neigh.predict(vector_of_test)
        print("分类返回结果为%d\t真实结果为%d" % (classfier_result, classNumber))
        if (classfier_result != classNumber):
            errorCount += 1.0
    print("总共错了%d个数据\n错误率为%f%%" % (errorCount, errorCount / number_of_files_test * 100))
Esempio n. 17
0
def knn_classification(X_train, Y_train, X_test):
    """A function that applies grid and random search to tune model and also gives a prediction, it also uses the
    NCA transformation of the data which seems to improve performance"""

    # Creating a score and parameters to search from
    scoring = {"f1": "f1_weighted"}
    grid_param2 = {
        "n_neighbors": range(1, 11),
        "p": range(1, 6),
        "metric": ["minkowski", "canberra", "hamming"]
    }

    no_nca = namedtuple("no_nca", ["fitted_grid", "y_grid", "grid_train_Y"])

    # Model setting
    knn_grid = GridSearchCV(KNN(n_jobs=-1),
                            grid_param2,
                            scoring=scoring,
                            refit="f1",
                            cv=5)

    # Model training
    fitted_grid = knn_grid.fit(X_train, Y_train)
    # Model predictions
    y_grid = fitted_grid.best_estimator_.predict(X_test)
    # training data prediction
    grid_train_Y = fitted_grid.best_estimator_.predict(X_train)

    no_nca_model_list = no_nca(*[fitted_grid, y_grid, grid_train_Y])

    return no_nca_model_list
Esempio n. 18
0
def handWriteTest():
    # 读取训练数据文件夹
    trainMat,trainLables = importData('trainingDigits')
    # 构建kNN分类器 参数如下:
    # neigh = kNN(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30,p=2, metric=’minkowski’, metric_params=None, n_jobs=1)
    # n_neighbors:默认为5,就是k-NN的k的值
    # weights:默认是uniform,参数可以是uniform(均等的权重)、distance(不均等的权重)
    # algorithm:快速k近邻搜索算法,默认为auto 搜索算法ball_tree、kd_tree、brute
    # leaf_size:默认是30,这个是构造的kd树和ball树的大小
    # metric:用于距离度量,默认度量是minkowski(欧氏距离)
    # p:距离度量公式
    # metric_params:距离公式的其他关键参数,使用默认的None即可。
    # n_jobs:并行处理设置。默认为1,临近点搜索并行工作数。如果为-1,那么CPU的所有cores都用于并行工作。
    knnNeigh = KNN(n_neighbors = 3,algorithm= 'kd_tree')
    # 将训练数据和标签放入KNN分类器中
    knnNeigh.fit(trainMat, trainLables)
    # 导入测试数据
    trainFileList = listdir('testDigits')
    m = len(trainFileList)
    errorCount = 0
    for i in range(m):
        # 获取文件名
        fileName = trainFileList[i]
        # 获取标签名
        className = int(fileName.split("_")[0])
        # 将32*32 转成 1*1024
        testnMat = img2Vector('testDigits/%s' % fileName)
        result = knnNeigh.predict(testnMat)
        print('预测值为:%s,测试值为:%s' %(result,className))
        if result != className:
            errorCount += 1
            print("----- 预测错误 -----")
    print("错误率为:%f%%" %(errorCount/float(m)*100))
 def __init__(self, Np=100):
     self.ctr = 1
     self.laser_tf_br = tf.TransformBroadcaster()
     self.laser_frame = rospy.get_param('~laser_frame')
     self.pub_particlecloud = rospy.Publisher('/particlecloud',
                                              PoseArray,
                                              queue_size=60)
     self.pub_estimated_pos = rospy.Publisher('/MCL_estimated_pose',
                                              PoseWithCovarianceStamped,
                                              queue_size=60)
     self.pub_particlecloud2fusion = rospy.Publisher(
         '/particlecloud2fuse_out', PoseArray, queue_size=60)
     self.scan = MapClientLaserScanSubscriber()
     self.last_time = rospy.Time.now().to_sec()
     self.Np = Np
     self.init()
     self.i_TH = 0.0
     self.nbrs = KNN(n_neighbors=1,
                     algorithm='ball_tree').fit(self.scan.obs())
     self.M_idxs = (np.linspace(0,
                                len(self.scan.z.ranges) - 1,
                                20)).astype(np.int32)
     rospy.Subscriber('/odom', Odometry, self.get_odom)
     rospy.Subscriber('/initialpose', PoseWithCovarianceStamped,
                      self.init_pose)
Esempio n. 20
0
    def predict(self,testX,testY,classifier=KNN(n_neighbors=3,n_jobs=-1),filename="individual.txt"):

        f=open(filename+".txt",mode="w")
        for fl in self.hof[0]:
            f.write(fl["pool"]+"\n")
            for v in fl["filter"]:
                for i in range(len(v)-1):
                    f.write(str(v[i])+",")
                f.write(str(v[-1])+"\n")
            f.write("\n")
        f.close()
        print("Evaluating")
        trainingFeat=np.array([self.get_feature_values(self.hof[0],i) for i in self.trainingX])
        testFeatures=np.array([self.get_feature_values(self.hof[0],i) for i in np.array(testX)])
        eval = classifier.fit(trainingFeat, self.trainingY)
        pre = eval.predict(testFeatures)
        fit = 0
        for p, tv in zip(pre, testY):
            if p == tv:
                fit += 1
        fit /= len(testY)
        tst=open("test_features.csv",mode="w")
        trn = open("train_features.csv", mode="w")
        for x,l in zip(trainingFeat,self.trainingY):
            for f in x:
                trn.write(str(round(f,4))+",")
            trn.write(str(l)+"\n")
        for x,l in zip(testFeatures,testY):
            for f in x:
                tst.write(str(round(f,4))+",")
            tst.write(str(l)+"\n")
        tst.close()
        trn.close()
        return fit
Esempio n. 21
0
def handwritingClassTest():
    hwLabels = []
    trainingFileList = listdir('trainingDigits')
    m = len(trainingFileList)
    trainingMat = np.zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        classNumber = int(fileNameStr.split('_')[0])
        hwLabels.append(classNumber)
        trainingMat[i, :] = img2vector('trainingDigits/%s' % (fileNameStr))

    neigh = KNN(n_neighbors=3, algorithm='auto')
    neigh.fit(trainingMat, hwLabels)
    testFileList = listdir('testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        classNumber = int(fileNameStr.split('_')[0])
        vectorUnderTest = img2vector('testDigits/%s' % (fileNameStr))

        classifierResult = neigh.predict(vectorUnderTest)
        print('分类返回结果为%d\t真实结果为%d' % (classifierResult, classNumber))
        if (classifierResult != classNumber):
            errorCount += 1.0
    print("总共错了%d个数据\n错误率为%f%%" % (errorCount, errorCount / mTest * 100))
Esempio n. 22
0
    def __init__(self,trainingX,trainingY,classifier=KNN(n_neighbors=3,n_jobs=-1),
                 populationSize=100,mutationRate=0.3,crossoverRate=0.7,
                 kfodls=-1,ngens=100,numberOfFilters=20):
        creator.create("FitnessMin", base.Fitness, weights=(1.0,))
        creator.create("Individual", list, fitness=creator.FitnessMin,
                       __eq__=lambda self, other: np.array_equal(self[0], other[0]))
        self.mutationRate=mutationRate
        self.crossoverRate=crossoverRate
        self.ngens=ngens
        self.numberOfFilters=numberOfFilters
        self.trainingX=np.array(trainingX)
        self.trainingY=np.array(trainingY)
        toolbox = base.Toolbox()

        toolbox.register("individual",generate, icls=creator.Individual,numberOfFilters=numberOfFilters)
        toolbox.register("population", tools.initRepeat, list, toolbox.individual)
        TX,TY,FX,FY=self.splitData(trainingX,trainingY)
        toolbox.register("evaluate", self.fitness, trainingX=TX, trainingY=TY,fitnessX=FX,fitnessY=FY,classifier=classifier)

        toolbox.register("mate", self.cross)
        toolbox.register("mutate", self.mutation)

        stats_fit = tools.Statistics(lambda ind: ind.fitness.values)

        mstats = tools.MultiStatistics(fitness=stats_fit)
        mstats.register("std", np.std)
        mstats.register("min", np.min)
        mstats.register("max", np.max)
        mstats.register("av", np.average)

        toolbox.register("select", selection, tournsize=3,icls=creator.Individual,numberOfFilters=numberOfFilters)
        self.hof = tools.HallOfFame(1)
        self.pop = toolbox.population(populationSize)
        self.mstats=mstats
        self.toolbox=toolbox
Esempio n. 23
0
def impact_of_k_on_knn():
    dataset_data = get_dataset_data(1)
    k_list = []
    r_sq_list = []
    for k in range(1, 346):
        predicted_knn, r_knn = fill(KNN(n_neighbors=k), dataset_data)
        k_list.append(k)
        r_sq_list.append(r_knn)

    print 'first k values:'
    print k_list[:5]
    print r_sq_list[:5]

    max_r = max(r_sq_list)
    max_k = r_sq_list.index(max_r) + 1
    print 'max R^2: %.3f, k: %d' % (max_r, max_k)

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    ax1.plot(k_list, r_sq_list)
    ax1.set_xlabel('k')
    ax1.set_ylabel('$R^2$')
    ax1.set_title('impact of k on the performance of KNN, k: [1, 345]')

    ax2_k_start = 0
    ax2_k_limit = 79
    ax2.plot(k_list[ax2_k_start:ax2_k_limit],
             r_sq_list[ax2_k_start:ax2_k_limit])
    ax2.set_xlabel('k')
    ax2.set_title('k: [%d, %d]' % (ax2_k_start + 1, ax2_k_limit + 1))

    plt.show()
def getKNN(data, target, N):
    Y = data[target]
    X = data.drop(target, axis=1)
    model = KNN(n_neighbors=N)
    model.fit(X, Y)
    #cv = cross_val_score(model,X,Y,cv=N_FOLDS)
    return model
Esempio n. 25
0
def exercise03(neighbors,split):
    '''
        Data set: Iris
        Split the Iris dataset into a train / test model with the split ratio between the two established by 
        the function parameter split.
        Fit KNN with the training data with number of neighbors equal to the function parameter neighbors
        Generate and return back an accuracy score using the test data was split out
    '''
    random_state = 21 

    # ------ Place code below here \/ \/ \/ ------
    # Create arrays for the features and the response variable
    X = iris.data
    y = iris.target

    # Split into training and test set    
    X_train, X_test, y_train, y_test = tts(X, y, test_size = split, random_state = random_state, stratify = y)
    
    # Create a KNN classifier with n neighbors
    knn = KNN(n_neighbors = neighbors)
    
    # Fit the classifier to the training data
    knn.fit(X_train, y_train)
    
    # Return the accuracy
    knn_score = knn.score(X_test, y_test)

    # ------ Place code above here /\ /\ /\ ------

    return knn_score
Esempio n. 26
0
def draw_knn(dataset):
    data_x, data_y = dataset.data, dataset.target
    info = np.zeros([12, 5])
    for _ in range(100):
        data = list()
        for i in range(1, 25, 2):
            data.append(
                (i, *pipeline(KNC(n_neighbors=i), data_x, data_y, label='my'),
                 *pipeline(KNN(n_neighbors=i, algorithm='brute'),
                           data_x,
                           data_y,
                           label='sk')))
        np.add(info, np.array(data), out=info)

    np.multiply(info, 0.01, out=info)

    plt.figure()
    plt.plot(info[:, 0], info[:, 1], label='my')
    plt.plot(info[:, 0], info[:, 3], label='sklearn')
    plt.xlabel('k'), plt.ylabel('accuracy')
    plt.legend(loc='best')
    plt.show()

    plt.figure()
    plt.plot(info[:, 0], info[:, 2], label='my')
    plt.plot(info[:, 0], info[:, 4], label='sklearn')
    plt.xlabel('k'), plt.ylabel('time (sec)')
    plt.legend(loc='best')
    plt.show()

    mean = info.mean(axis=0)
    print(f'avg acc  my: {mean[1]}, sk: {mean[3]}')
    print(f'avg time my: {mean[2]}, sk: {mean[4]}')
    return
Esempio n. 27
0
def handwritingClassTest():
    #训练集的labels
    hwlabel = []
    #返回训练集目录下的文件名
    trainingFileList = listdir('trainingDigits')
    m = len(trainingFileList)
    #初始化训练集
    trainingMat = np.zeros((m, 1024))

    for i in range(m):
        fileNameStr = trainingFileList[i]
        #训练集真实labels
        classNumber = int(fileNameStr.split('_')[0])
        hwlabel.append(classNumber)
        #将每一个文件的1*1024数据存储到trainingMat矩阵中
        trainingMat[i, :] = img2vector('trainingDigits/%s' % (fileNameStr))
    #构建KNN分类器
    neigh = KNN(n_neighbors=3, algorithm='auto')
    #拟合模型
    neigh.fit(trainingMat, hwlabel)
    #返回测试集下的文件列表
    testFileList = listdir('testDigits')
    #检测错误数据
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        testNameStr = testFileList[i]
        classNumber = int(testNameStr.split('_')[0])
        vectorTest = img2vector('testDigits/%s' % (testNameStr))
        classiFileResult = neigh.predict(vectorTest)
        print("分类返回结果为%d\t真实结果为%d" % (classiFileResult, classNumber))
        if (classiFileResult != classNumber):
            errorCount += 1.0
    print("总共错了%d个数据\n错误率为%f%%" % (errorCount, errorCount / mTest * 100))
Esempio n. 28
0
def hand_writing_test():
    hw_labels = []
    training_file_list = listdir('trainingDigits')
    m = len(training_file_list)
    training_data_mat = np.zeros((m, 1024))
    index = 0

    for training_file_name in training_file_list:
        class_label = int(training_file_name.split('_')[0])
        hw_labels.append(class_label)
        training_data_mat[index, :] = creat_img_vector('trainingDigits/%s' %
                                                       (training_file_name))
        index += 1
    neigh = KNN(n_neighbors=3, algorithm='auto')
    neigh.fit(training_data_mat, hw_labels)
    test_file_list = listdir('testDigits')
    n = len(test_file_list)
    error_count = 0
    for test_file_name in test_file_list:
        class_label = int(test_file_name.split('_')[0])
        testing_vector = creat_img_vector('testDigits/%s' % (test_file_name))
        classifyer_result = neigh.predict(testing_vector)
        if (classifyer_result != class_label):
            error_count += 1
    print('testing error %d data, error rate: %.2f%%' %
          (error_count, error_count / n * 100))
Esempio n. 29
0
def myPredict():
    csv_file = ('WinePredictor.csv')
    data = pd.read_csv(csv_file)
    print(data.head())
    features = data.drop("Class", axis=1)
    print(features.head())
    target = data["Class"]
    print(target.head())

    data_train, data_test, target_train, target_test = train_test_split(
        features, target, test_size=0.5)

    classifier = KNN()

    classifier.fit(data_train, target_train)

    predictions = classifier.predict(data_test)

    Accuracy = accuracy_score(target_test, predictions)

    print('Accuracy using KNN: ', Accuracy * 100, '%')

    classifier2 = tree.DecisionTreeClassifier()

    classifier2.fit(data_train, target_train)

    predictions2 = classifier2.predict(data_test)

    Accuracy2 = accuracy_score(target_test, predictions2)

    print('Accuracy using DT: ', Accuracy2 * 100, '%')
Esempio n. 30
0
def train_model(value2, value3, value4):

        if len(value2) < 1:
            return "Please select at least 1 column"
        else:
            X_train, X_test, y_train, y_test = train_test_split(df[value2], df['species'], test_size=value3)

            if value4 == "KNN":
                clf = KNN()

            elif value4 == "SVC":
                clf = SVC(gamma='auto', probability=True)

            clf.fit(X_train, y_train)
            pred = clf.predict(X_test)
            acc = np.round(accuracy_score(pred, y_test),3)

            fig5 = px.scatter_3d(x=clf.predict_proba(X_test)[:,0], y=clf.predict_proba(X_test)[:,1], z=clf.predict_proba(X_test)[:,2], color=clf.predict(X_test))

            return html.Div([
                html.Br(),
                html.H6('Model accuracy %s'%str(acc)),
                html.Br(),
                dcc.Graph(id='g5', figure=fig5)
            ])