コード例 #1
0
def main():
    k_range = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]

    X_train, X_test, y_train, y_test = loadDataDivided(ifSubDir=False, ifScale=True, suffix='_LDA')
    X_train_proj, X_test_proj = runMLKR(X_train, X_test, y_train, y_test)
    KNN.runKNN(X_train_proj, X_test_proj, y_train, y_test, k_range, metric='euclidean', metric_params=None,
                label='_MLKR_euclidean')
コード例 #2
0
def TestShapeAccuracy(train_images, train_labels, test_images, test_labels,
                      neigh, percentageTrain):
    limitTrain = int(train_labels.shape[0] * percentageTrain / 100)
    knn = KNN(train_images[:limitTrain], train_labels[:limitTrain])
    preds = knn.predict(test_images, neigh)
    percentage = Get_shape_accuracy(preds, test_labels)
    return percentage
def mix_up():
    """creating a hybrid model mixing ML algorithms and neural net , it accumulates the errors from individual algorithms 
	and it increases the error in neural net so much that the model is not flexible enough to decide the trend in market
	or find patterns in data , model2 removes that redundant error ."""
    ind = 0
    for i in bar(xrange(len(x))):
        b_pred, b_y = bayes.naive_bayes_model(x[i], net=True)
        s_pred, s_y = SVM.svm_model(x[i], net=True)
        k_pred, k_y = KNN.knn_algo_model(x[i], net=True)
        print b_pred, b_y
        mix.new_net(s_pred, b_pred, k_pred, s_y, x[i])
    ind = 0
    report = pd.DataFrame(index=range(0),
                          columns=[
                              'Stock Name', 'accuracy', 'profit count',
                              'loss count', 'total no of rise',
                              'total number of loss'
                          ])
    for i in bar(xrange(len(x))):
        b_pred, b_y = bayes.naive_bayes_model(x[i], net=True, actual=True)
        s_pred, s_y = SVM.svm_model(x[i], net=True, actual=True)
        k_pred, k_y = KNN.knn_algo_model(x[i], net=True, actual=True)
        p_count, total_count_p, l_count, total_count_l, accuracy = mix.new_net(
            s_pred, b_pred, k_pred, s_y, x[i], create=False)
        report.loc[ind] = [
            x[i], accuracy, p_count, l_count, total_count_p, total_count_l
        ]
        ind = ind + 1
    print "Mean accuracy----------", report['accuracy'].mean()
    report.to_csv("./report/mix_result.csv")
コード例 #4
0
    def __init__(self, symb, predlen, cat='RL', kwargs=None):

        self.symb = symb
        self.predlen = predlen
        self.kwargs = kwargs
        self.cat = cat

        if cat == 'RF':
            if kwargs != None:
                self.learner = RF.RandomForest(**kwargs)
            else:
                self.learner = RF.RandomForest()

        elif cat == 'KNN':
            if kwargs != None:
                self.learner = KNN.KNN(**kwargs)
            else:
                self.learner = KNN.KNN()

        elif cat == 'SVM':
            if kwargs != None:
                self.learner = SVM.SVM(**kwargs)
            else:
                self.learner = SVM.SVM()

        elif cat == 'NN':
            if kwargs != None:
                self.learner = NN.NN(**kwargs)
            else:
                self.learner = NN.NN()
コード例 #5
0
def crossvalidation(userManager, artistManager, folders):
	"""split data into folders and validate the performance"""
	userIDs = userManager.keys()
	userFolders = {}
	for i in range(folders):
		userFolders[i] = []
	for userID in userIDs:
		i = random.randrange(folders)
		userFolders[i].append(userID)
	for f in range(folders):
		testUserSet, testUserIDList, testUserMostFavourite = splitTrainSet(userManager, 1.0/folders, userFolders[f])
		knn = KNN(6)
		knn.training(userManager, artistManager)
		rightNum = 0
		totalNum = len(testUserIDList)
		for i in range(len(testUserIDList)):
			print i, totalNum,
			favOfOne = knn.testing(testUserSet[testUserIDList[i]], userManager, artistManager)
			print testUserIDList[i], testUserMostFavourite[testUserIDList[i]].keys()[0], favOfOne
			if favOfOne == testUserMostFavourite[testUserIDList[i]].keys()[0]:
				rightNum += 1
		print "Folder", f, ":"
		print "Total:", totalNum
		print float(rightNum)/len(testUserIDList)
		for i in range(len(testUserIDList)):
			userManager[testUserIDList[i]] = testUserSet[testUserIDList[i]]
コード例 #6
0
def handwritingClassTest():
    # 训练
    hwLabels = []
    trainingFileList = listdir('trainingDigits/')
    m = len(trainingFileList)
    trainingMat = zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i,:] = KNN.img2vector('trainingDigits/%s' % fileNameStr)
    
    print "Training done."

    # 测试
    testFileList = listdir('testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = KNN.img2vector('testDigits/%s' % fileNameStr)
        classifierResult = KNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3)

        if (classifierResult != classNumStr):
            print "[%s] the classifier came back with: %d, the real answer is: %d" % (fileStr, classifierResult, classNumStr)
            errorCount += 1.0

    print "\nthe total number of errors is: %d" % errorCount
    print "\nthe total error rate is: %f" % (errorCount/float(mTest))
コード例 #7
0
def main():
    k_range = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
    LMNN_k_range = [2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16]
    X_train, X_test, y_train, y_test = loadDataDivided(ifSubDir=False, ifScale=True, suffix='_LDA')
    for i in LMNN_k_range:
        X_train_proj, X_test_proj = runLMNN(X_train, X_test, y_train, y_test, i)
        KNN.runKNN(X_train_proj, X_test_proj, y_train, y_test, k_range, metric='euclidean', metric_params=None,
        label='_LMNN_euclidean_k='+str(i))
コード例 #8
0
 def test_fit(self):
     for ix, (train_imgs,
              train_labels) in enumerate(self.test_cases['input']):
         knn = KNN(train_imgs, train_labels)
         preds = knn.predict(self.test_cases['test_input'][ix][0],
                             self.test_cases['rnd_K'][ix])
         np.testing.assert_array_equal(preds,
                                       self.test_cases['get_class'][ix])
コード例 #9
0
 def test_get_k_neighbours(self):
     for ix, (train_imgs,
              train_labels) in enumerate(self.test_cases['input']):
         knn = KNN(train_imgs, train_labels)
         knn.get_k_neighbours(self.test_cases['test_input'][ix][0],
                              self.test_cases['rnd_K'][ix])
         np.testing.assert_array_equal(knn.neighbors,
                                       self.test_cases['get_k_neig'][ix])
コード例 #10
0
ファイル: MrRabbits.py プロジェクト: chiho828/Classifier
def run():
    'Main loop, it gets and processes user input until "bye".'
    print(
        '''Hi there! My name is Mr. Rabbits!                        (\_/)           
Welcome to Mr. Rabbits' Machine Learning Adventure!      (^.^)
Today we will be exploring the difference between       c(> <)
Naive Bayes classification and k-nearest neighbors.
There are two datasets to choose from: Fisher's Iris flower data set or ________.'''
    )
    while True:
        invalid = False
        info = input(
            '''Please let me know which classifier you would like to explore:
(type 'knn' or 'naive bayes' or 'bagging' or 'bye' to exit).\n''')
        if info == 'bye':
            print('Goodbye! Bring me a carrot next time! :3"')
            return
        print(
            "Which dataset will you be exploring today? Fisher's iris flower dataset or Wisconsin breast cancer diagnostics?"
        )
        dataset = input("Type 'FI' or 'BC'\n")

        split = input(
            "What % of the dataset should be split into the training set? (type a value from 0 to 100)\n"
        )
        split = float(split) / 100

        filename = ''
        if dataset == "FI":
            filename = 'iris.csv'
        elif dataset == "BC":
            filename = 'wdbc_clean.csv'

        trainSet = []
        testSet = []

        createDataset(filename, trainSet, testSet, split)

        if info == 'knn':
            k = input("What value should k be? (# of nearest neighbors)\n")
            KNN.run(trainSet, testSet, int(k))
        elif info == 'naive bayes':
            NaiveBayes.run(trainingSet=trainSet, testSet=testSet)
        elif info == 'bagging':
            k = input("What value should k be? (# of nearest neighbors)\n")
            bagSize = input("How big should the bags be?\n")
            bagNum = input("How many bags should I use?\n")
            bagging(int(k), trainSet, testSet, int(bagSize), int(bagNum))
        else:
            invalid = True

        if invalid:
            print(
                "Oops! There was some invalid input somewhere along the way.")
            print("Let's start from the top again.\n")
        else:
            print("Wow! That was fun. Let's do it again.\n")
コード例 #11
0
ファイル: DataTest.py プロジェクト: MushroomCJ/KNN
def main():
    dataSet, labels = KNN.Load_Train_Data()
    print("Newgroup;", dataSet)
    print(dataSet.shape)
    print("Newlabels;", labels)
    print(labels.shape)

    dataIn = np.loadtxt(open("./low_Dim_Data_test.csv"),
                        delimiter=",",
                        skiprows=0)
    val = np.loadtxt(open("./_names_test.csv"), delimiter=",", skiprows=0)
    k = 100
    '''
    dataOut = KNN.classify(dataIn[197],dataSet,labels,k)
    print("测试数据为:",dataIn[197],"分类结果为:",dataOut)
    print("长度:",len(dataIn))
    '''
    bb = 0

    #dataOut={}
    wrong = 0
    all = len(dataIn)

    wrong1 = 0
    wrong0 = 0
    predict_0 = 0
    predict_1 = 0
    original_1 = 0
    original_0 = 0
    for i in range(len(dataIn)):
        #print(i)
        dataOut = KNN.classify(dataIn[i], dataSet, labels, k)
        if dataOut == val[i]:
            bb = bb + 1
            #print('ok')
        else:
            wrong = wrong + 1
            #print('false')
        if val[i] == 1.0:
            original_1 = original_1 + 1
        if val[i] == 0.0:
            original_0 = original_0 + 1
        if dataOut == 1.0:
            predict_1 = predict_1 + 1
        if dataOut == 0.0:
            predict_0 = predict_0 + 1
        if (val[i] == 1.0) & (dataOut == 0.0):
            wrong1 = wrong1 + 1
        if (dataOut == 1.0) & (val[i] == 0.0):
            wrong0 = wrong0 + 1
    print(np.sum(val == 0.0))
    print(np.sum(val == 1.0))
    print("准确率:", (all - wrong) / all)  #准确率
    #print("正确为1的误判:",wrong0/all0)     #正确为1预测为0的个数,准确率
    #print("预测为1中的错误:",wrong1/all1)   #召回率
    print("精确率:", (original_1 - wrong1) / (original_1 - wrong1 + wrong0))
    print("召回率:", (original_1 - wrong1) / (original_1))
コード例 #12
0
def main():
    inputFile=sys.argv[1]
    global trainSet
    global testSet
    global bootstrap

    generateTrainTestSample(inputFile)
    bootstrapping(trainSet)
    KNN.main(bootstrap[1],testSet,3)
コード例 #13
0
def test_simple():
    data_set, labels = KNN.create_data_set()

    test1 = array([1.2, 1.0])
    test2 = array([0.1, 0.3])
    k = 3
    output_label1 = KNN.knn_classify(test1, data_set, labels, k)
    output_label2 = KNN.knn_classify(test2, data_set, labels, k)
    print test1, output_label1
    print test2, output_label2
コード例 #14
0
def test_simple():
    data_set, labels = KNN.create_data_set()

    test1 = array([1.2, 1.0])
    test2 = array([0.1, 0.3])
    k = 3
    output_label1 = KNN.knn_classify(test1, data_set, labels, k)
    output_label2 = KNN.knn_classify(test2, data_set, labels, k)
    print test1, output_label1
    print test2, output_label2
コード例 #15
0
ファイル: my1stplot.py プロジェクト: foxli180/Self_Learning
def plotwithlable():
    
    xcord1 = []; ycord1 = []; zcord1=[]
    xcord2 = []; ycord2 = []; zcord2=[]
    xcord3 = []; ycord3 = []; zcord3=[]  
    #group ,labels = createDataSet()    
    datingDataMat, datingLables = KNN.file2matrix('datingTestSet2.txt')
    #print(datingDataMat)
    #print(datingDataMat[0,2])
    #print(datingLables)
    normDataMat,  ranges, minVals = KNN.autoNorm(datingDataMat)
    #print(normDataMat)
    tmp = datingDataMat
    datingDataMat = normDataMat
    fig = plt.figure() #create pic: fig 
    ax = fig.add_subplot(311) #create a subplot with 1 row 1 colum, select pic 1   
    #type1 = ax.scatter(xcord1, ycord1, s=20, c='red')
    #type2 = ax.scatter(xcord2, ycord2, s=30, c='green')
    #type3 = ax.scatter(xcord3, ycord3, s=50, c='blue')   
    
    for index, value in enumerate(datingLables):
        if value == 1:
            xcord1.append(datingDataMat[index,0]) 
            ycord1.append(datingDataMat[index,1])
            zcord1.append(datingDataMat[index,2])
        elif value == 2:
            xcord2.append(datingDataMat[index,0]) 
            ycord2.append(datingDataMat[index,1])
            zcord2.append(datingDataMat[index,2])
        else:
            xcord3.append(datingDataMat[index,0]) 
            ycord3.append(datingDataMat[index,1])
            zcord3.append(datingDataMat[index,2])
    type1 = ax.scatter(xcord1, ycord1, s=20, c='red')
    type2 = ax.scatter(xcord2, ycord2, s=30, c='green')
    type3 = ax.scatter(xcord3, ycord3, s=50, c='blue')   
    ax.legend([type1, type2, type3], ["Did Not Like", "Liked in Small Doses", "Liked in Large Doses"], loc=2)
    
    ax2 = fig.add_subplot(312)
    type1 = ax2.scatter(xcord1, zcord1, s=20, c='red')
    type2 = ax2.scatter(xcord2, zcord2, s=30, c='green')
    type3 = ax2.scatter(xcord3, zcord3, s=50, c='blue')   
    ax2.legend([type1, type2, type3], ["Did Not Like", "Liked in Small Doses", "Liked in Large Doses"], loc=2)

    plt.xlabel("Frequent Flyier Miles Earned Per Year")
    plt.ylabel("Liters of Ice Cream Consumed Per Week")
    ax3 = fig.add_subplot(313)
    type1 = ax3.scatter(ycord1, zcord1, s=20, c='red')
    type2 = ax3.scatter(ycord2, zcord2, s=30, c='green')
    type3 = ax3.scatter(ycord3, zcord3, s=50, c='blue')   
    ax3.legend([type1, type2, type3], ["Did Not Like", "Liked in Small Doses", "Liked in Large Doses"], loc=2)

    plt.xlabel("Percentage of Body Covered By Tatoos")
    plt.ylabel("Liters of Ice Cream Consumed Per Week")       
    plt.show()
コード例 #16
0
ファイル: interface.py プロジェクト: Rhaegar2012/RoboDoc-
 def match_query(self):
     query = cv.imread(self.filename, 0)
     query_instance = KNN.creates_query_instance(query, '')
     match = KNN.finds_best_match(query_instance, self.training_set)
     self.model_prediction_value.configure(text=match.prediction)
     match_image = cv.drawMatches(query, match.keypoints, match.best_match.image,
                                       match.best_match.keypoints, match.k_matches[:20], None, flags=2)
     output_image = cv.imwrite('../output.jpg', match_image)
     match_image_display = Image.open('../output.jpg')
     match_image_display = match_image_display.resize((250, 250), Image.ANTIALIAS)
     self.image_match = ImageTk.PhotoImage(match_image_display)
     self.match_canvas.create_image(20, 20, anchor=NW, image=self.image_match)
コード例 #17
0
ファイル: test.py プロジェクト: bzhou830/ML-python
def classifyPerson():
    print "输入相关信息"
    resultList = ['一点不喜欢','有点希望','可能性很大']
    percentTats = float(raw_input("玩游戏时间数目?"))
    ffMiles = float(raw_input("旅游公路数?"))
    ice = float(raw_input("冰淇淋消耗量?"))
    datingDataMat,datingLabels = KNN.file2matrix('datingTestSet2.txt')
    normMat,ranges,minVals = KNN.autoNorm(datingDataMat)
    inArr = np.array([ffMiles,percentTats,ice])
    classfierRt = KNN.classify0((inArr-minVals)/ranges,normMat,datingLabels,3)
    print resultList[classfierRt - 1]
    PrintFigure(normMat, datingLabels)
コード例 #18
0
def datingClassTest():
    hoRatio = 0.10
    datingdataMat, datingLabels = KNN.file2matrix('datingTestSet2.txt')
    normMat, ranges, minVals = KNN.autoNorm(datingdataMat)
    m = normMat.shape[0]
    numTestVecs = int(m*hoRatio)
    errorCount = 0.0
    for i in range(numTestVecs):
        classifierResult = KNN.classify0(normMat[i,:], normMat[numTestVecs:m,:], datingLabels[numTestVecs:m], 3)
        print "the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[i])
        if (classifierResult != datingLabels[i]):
            errorCount += 1.0
    print "tht total error rate is: %f" % (errorCount/float(numTestVecs))
コード例 #19
0
ファイル: app.py プロジェクト: Yaliang/last.fm.server
def testUser(testUserID):
    if not UserManager.has_key(testUserID):
        return "don't has user with userID = "+str(testUserID)
    testUserSet, testUserIDList = splitTrainSetWithoutRemoving(TrainUserManager, 0, [testUserID])
    knn = KNN(40)
    knn.training(TrainUserManager, ArtistManager)
    favOfOne, allArtist, allTag = knn.testing(testUserSet[testUserID], UserManager, ArtistManager, True)
    realfavOfOne = UserManager[testUserID].getMostFav().keys()[0]
    ret = "The most listen artist:\n"+str(ArtistManager[realfavOfOne])+"\n"
    ret += "The artist we predict:\n"+str(ArtistManager[favOfOne])
    ret = ret.replace("\n","</br>")
    # recovery modified TrainUserManager
    TrainUserManager[testUserID]=testUserSet[testUserID]

    return ret
コード例 #20
0
ファイル: interface.py プロジェクト: Rhaegar2012/RoboDoc-
 def train_model(self):
     ABT = DataPrep.populates_ABT()
     print(ABT, len(ABT.ABT))
     precision = 0.5
     required_precision = 0.83
     while precision < required_precision:
         training_set, test_set = DataPrep.generates_training_test_sets(ABT.ABT)
         prediction_set = KNN.test_model(test_set, training_set)
         precision, recall = KNN.compute_metrics(prediction_set)
         print('run precision: ', precision, 'run recall: ', recall)
     self.training_set, self.test_set = training_set, test_set
     self.precision = str(int(precision*100))+"%"
     self.recall = str(int(recall*100))+"%"
     self.precision_value.configure(text=self.precision)
     self.recall_value.configure(text=self.recall)
コード例 #21
0
def topNRecs():
    #Get Entries
    csv_file_name = entry.get()
    string = entry2.get()

    #User selections
    numNeighbors = int(string[0])
    selectedPoint = int(string[2])

    #Call KNN class and get the neighbors
    knnClient = KNN(csv_file_name, selectedPoint, numNeighbors)
    list_of_Neighbors = knnClient.driver()

    #Catch bad input from a bad csv
    if list_of_Neighbors is None:
        out = "Warning: No Neighbors. Edit the CSV or try again."
    else:
        out = "Results are in the txt file"

    #Dump the results to a file
    with open('output.txt', 'w') as f:
        for neighbor in list_of_Neighbors:
            f.write("%s\n" % str(neighbor))

    #Transform the csv into a dict of dicts
    reader = csv.DictReader(open(csv_file_name))
    dict_list = []
    for line in reader:
        dict_list.append(line)

    #Store the csv in a txt file with appropriate headers for better analysis
    with open('dict.txt', 'w') as d:
        for dictonary in dict_list:
            d.write("%s\n" % str(dictonary))

    #print(dict_list)
    list_of_Neighbors = [[str(str(j)) for j in i] for i in list_of_Neighbors]
    res = [''.join(ele) for ele in list_of_Neighbors]

    #Output windows
    title = tk.Label(root, text=out, font=('helvetica', 10))
    canvas.create_window(220, 80, window=title)

    #More output windows suggesting the user try another csv file
    ans = tk.Label(root,
                   text="Try another txt file below!",
                   font=('helvetica', 15))
    canvas.create_window(250, 300, window=ans)
def calculateTestError():
    testError=KNN.getTestError(resultKNN,KNN.actualLabel)
    print("****Test Error*****")
    print(testError)
    print("*******Accuracy********")
    accuracy=100-testError
    print(accuracy)
コード例 #23
0
    def test_input_knn(self) -> None:
        """ test for diabetes"""
        my_dict = {
            "B": float(self.l1.text()),
            "C": float(self.l2.text()),
            "D": float(self.l3.text()),
            "E": float(self.l4.text()),
            "F": float(self.l5.text())
        }
        knn_output = KNN.check_input(my_dict)
        self.setFixedSize(850, 342)
        self.report_subhead.setText("Reports")
        self.model_details.setText(
            "K-Nearest Neighbours classifier used.\nAccuracy of model: 81.16%\nWe have used PIMA Indians diabetes dataset."
        )
        self.details.setText(
            "Patient's name: {}\nPlasma glucose concentration: {} \
\nDiastolic blood pressure: {}\nTriceps skin fold thickness: {}\nSerum insulin: {}\nBody mass index: {}"
            .format(self.l0.text(), self.l1.text(), self.l2.text(),
                    self.l3.text(), self.l4.text(), self.l5.text()))

        if knn_output == 0:
            self.results.setText(
                "Diagnosis suggests that patient does not suffers from diabetes."
            )
        else:
            self.results.setText(
                "Our diagnosis suggests patient does suffer from diabetes.\nPlease get checked soon."
            )
        self.results.setFont(QFont("Arial", 14, weight=QFont.Bold))
コード例 #24
0
ファイル: handwriting.py プロジェクト: Shitong91/python
def handwritingClassTest():
    hwLabels = []
    trainingFileList = listdir('trainingDigits')  #读取该文件夹下的文件,文件名以列表的形式储存
    m = len(trainingFileList)  #获取该文件夹下的数目
    trainingMat = np.zeros((m, 1024))
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr)
    testFileList = listdir('testDigits')
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)
        classifierResult = KNN.classify0(vectorUnderTest, trainingMat,
                                         hwLabels, 3)
        print("the classifier came back with: %d, the real answer is: %d" %
              (classifierResult, classNumStr))
        if (classifierResult != classNumStr):
            errorCount += 1.0
    print("the total number of errors is: %d" % errorCount)
    print("the total error rate is: %f" % (errorCount / float(mTest)))
コード例 #25
0
ファイル: Recognition.py プロジェクト: M-a-i/opencv_sudoku
def handwriteClassfiy(testfile, trainfile, k):
    """函数将trainfile中的文本图片转换成样本特征集和样本类型集,用testfile中的测试样本测试,无返回值
    
    Keyword argument:
    testfile -- 测试图片目录
    trainfile -- 样本图片目录
    """

    trainFileList = os.listdir(trainfile)
    trainFileSize = len(trainFileList)
    labels = []
    trainDataSet = np.zeros((trainFileSize, 1024))
    for i in range(trainFileSize):
        filenameStr = trainFileList[i]
        digitnameStr = filenameStr.split('.')[0]
        digitLabels = digitnameStr.split('_')[0]
        labels.append(digitLabels)
        trainDataSet[i, :] = img2vector(trainfile + '/' + filenameStr)
    #testFileList = os.listdir(testfile)
    #testNumber = len(testFileList)
    #errorcount = 0.0
    #for testname in testFileList:
    #testname='%s.txt'%str(num)
    testdigit = img2vector(testfile + '/' + 'x.txt')
    classifyresult = KNN.classify(testdigit, trainDataSet, labels, k)
    #testStr = testname.split('.')[0]
    #testDigitLabel = testStr.split('_')[0]
    #if classifyresult != testDigitLabel:
    #errorcount += 1.0
    #print(classifyresult)
    #print('this test real digit is:%s, and the result is: %s' % (testDigitLabel, classifyresult))
    #print('k = %d, errorRatio is: %f' % (k, errorcount/float(testNumber)))
    return classifyresult
コード例 #26
0
def cross_validation_nn(k,folds_array):

    #Initial values
    corrects = 0
    incorrects = 0

    #Separate train and test data
    for i in range(0,10):
        training_data = []
        test_data = []
        for j in range(0,10):
            if j == i:
                test_data = folds_array[j]
            else:
                training_data = training_data + folds_array[j]
        #Predict values
        for j in range(0,len(test_data)):
            prediction = KNN.knearest(k,training_data,test_data[j],True)
            length = len(test_data[j])-1
            #Check if the value is correct
            if prediction == test_data[j][length]:
                corrects = corrects + 1
            else:
                incorrects = incorrects + 1

    return float(corrects)/float(corrects+incorrects)
コード例 #27
0
ファイル: LLE.py プロジェクト: philipz1/ML
def lle(data, k = 10, target_dim = 2):
	p = data.shape[1]
	graph = KNN.knn(data, k)
 	
	n = len(graph.keys())
	weights_vec = np.zeros((n,k))
	weights_dict = {}
	locals_ = np.zeros((n,k))
	for i, key in enumerate(list(graph.keys())):
		local = construct_knn_vector(key, graph, k)
		local_centered = local - np.repeat(np.array(key).reshape([1, p]), k, axis = 0)
		gram = do_gram(local_centered, k)

		w_num = np.dot(np.linalg.inv(gram), np.ones(gram.shape[0]).T)
		w = w_num / w_num.sum()
		weights_vec[i] = w

		temp_dict = {}
		for q in range(len(local)):
			temp_dict[tuple(local[q])] = w[q]
		weights_dict[tuple(key)] = temp_dict

	weights = reconstruct(data, weights_dict)

	M = np.dot((np.identity(n) - weights).T, (np.identity(n) - weights))
	eigvals, eigvecs = np.linalg.eigh(M)

	index = np.argsort(eigvals)[::1]
	eigvals = eigvals[index]
	eigvecs = eigvecs[:,index]

	return eigvecs[:,1:target_dim + 1]
コード例 #28
0
def handwritingClassTest():
    hwLabel = []
    trainingFileList = listdir('trainingDigits')  # 获取目录内容 ,type(list)
    m = len(trainingFileList)

    trainingMat = zeros((m, 1024))
    for i in range(m):
        filenameStr = trainingFileList[i]
        filename = filenameStr.split('.')[0]
        classNum = int(filename.split('_')[0])
        hwLabel.append(classNum)  # 从文件名中解析分类数字
        trainingMat[i, :] = img2vector('trainingDigits/%s' % filenameStr)

    testFileList = listdir('testDigits')
    m_test = len(testFileList)
    error_count = 0.0
    for j in range(m_test):
        test_filenameStr = testFileList[j]
        test_filename = test_filenameStr.split('.')[0]
        test_ClassNum = int(test_filename.split('_')[0])  # 通过文件名获取实际的数字编号
        classfierResult = KNN.classify(
            img2vector('testDigits/%s' % test_filenameStr), trainingMat,
            hwLabel, 4)  # 通过KNN算法得到的编号

        print 'the classfier came back with: %d, the realnum came back with %d' % (
            classfierResult, test_ClassNum)
        if classfierResult != test_ClassNum:
            error_count += 1
    print 'the total error num: %d' % error_count
    print 'the total error rate is :%f' % (error_count / m_test)
コード例 #29
0
def hand_writing_class_test():
    """
    构建训练样本数据
    :return:
    """
    # ['5_135.txt', '4_36.txt', '8_102.txt', '8_116.txt', ....]
    dir_path = './data/trainingDigits/'
    training_file_list = os.listdir(dir_path)
    m = len(training_file_list)
    # 存储训练样本数据
    training_mat = np.zeros((m, 1024))
    # 存储训练样本的标签
    hw_labels = []
    for i in range(m):
        file_name = training_file_list[i]
        label = file_name.split('_')[0]
        hw_labels.append(label)
        training_mat[i, :] = img_2_vector(dir_path + file_name)

    # 读取测试样本数据 进行测试
    test_dir_path = './data/testDigits/'
    test_file_list = os.listdir(test_dir_path)
    m = len(test_file_list)
    count = 0
    for i in range(m):
        file_name = test_file_list[i]
        label = file_name.split('_')[0]
        temp_vector = img_2_vector(test_dir_path + file_name)
        pre_result = KNN.classify(temp_vector, training_mat, hw_labels, k=3)
        print("the classifier came back with: %s, the real answer is: %s" % (pre_result, label))
        if pre_result == label:
            count += 1.0
    # 正确率:0.988372
    print('正确率:%f' % (count / m))
コード例 #30
0
ファイル: classifier.py プロジェクト: cycoe/jwgl_spider
 def recognizer(self, imgPath):
     vectorList = img2Vector(imgPath)
     nameList = []
     for vector in vectorList:
         nameList.append(
             KNN.classify0(vector, self.trainingMat, self.labels, self.k))
     return ''.join(nameList)
コード例 #31
0
def get_training_error(train, k):
    myKNN = knn.KNN(train)
    num_errors = 0
    for i in range(0, len(train)):
        if (myKNN.predict(train[i], k) != train[i][0]):
            num_errors += 1
    return num_errors
コード例 #32
0
def get_test_error(train, test, k):
    myKNN = knn.KNN(train)
    num_errors = 0
    for i in range(0, len(test)):
        if (myKNN.predict(test[i], k) != test[i][0]):
            num_errors += 1
    return num_errors
コード例 #33
0
def datingClassTest():
    '''
    对约会网站 的测试方法
    :return: 错误数目
    '''
    # 设置测试数据的一个比例(训练数据集的比例 = 1 - hotRatio)
    hoRatio = 0.1  #测试范围, 一部分测试一部分作为样本
    # 从文件中加载数据
    datingDataMat, datingLabels = fileParse.file2matrix('./datingTestSet.txt')
    # 归一化数据
    normMat, ranges, miuVals = fileParse.autoNorm(datingDataMat)
    # m  表示数据h的行数, 即矩阵的第一维
    m = normMat.shape[0]
    # 设置测试的样本数量, numTestVecs: m 便是训练样本的数量
    numTestVecs = int(m * hoRatio)

    print('numTestVecs =', numTestVecs)

    errorCount = 0.0

    for i in range(numTestVecs):
        # 对数据测试
        classifierResult = KNN.classify0(normMat[i, :],
                                         normMat[numTestVecs:m, :],
                                         datingLabels[numTestVecs:m], 3)
        print('||' * 40)
        print('The clssifier came back with:%d, the real answer is:%d' %
              (classifierResult, datingLabels[i]))

        if (classifierResult != datingLabels[i]):
            errorCount += 1.0

        print('the total error rate is:%f' % (errorCount / float(numTestVecs)))
        print('errorCount is:', errorCount)
        print()
コード例 #34
0
def handwritingClassTest():
    hwLabels = []  #s手写数字的标签
    trainingFileList = os.listdir('trainingDigits')  #文件夹中的文件名 获取目录的内容
    m = len(trainingFileList)  #统计一共有多少个训练
    trainingMat = zeros((m, 1024))
    #从文件名解析分类数字 开始
    for i in range(m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]  #用'.'号分隔然后取第一个元素
        classNumStr = int(fileStr.split('_')[0])  #用'_'分隔然后取第一个元素
        hwLabels.append(classNumStr)  #将数字标签存入数组
        trainingMat[i, :] = img2vector('trainingDigits/%s' % fileNameStr)
    testFileList = os.listdir('testDigits')  #获得测试数据
    errorCount = 0.0
    mTest = len(testFileList)  #一共有多少测试数据
    for i in range(mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest = img2vector('testDigits/%s' % fileNameStr)
        classifierResualt = KNN.classify0(vectorUnderTest, trainingMat,
                                          hwLabels, 30)
        print "the classsifier came back with :%d , the real answer is %d" % (
            classifierResualt, classNumStr)
        if (classifierResualt != classNumStr): errorCount += 1.0
    print "\nthe totle number of error is : %d" % errorCount
    print "\nthe totle error rate is : %d" % (errorCount / float(mTest))
コード例 #35
0
def camplist():
    #error = None
    if request.method == 'POST':
        u_userid = request.form['userid']
        # u_userid=int(u_userid)
        conn = sqlite3.connect(db_path)
        cur = conn.cursor()
        cur.execute("select Latitude,Longitude from User where Id=" + u_userid)
        data = cur.fetchall()
        for row in data:
            lat = row[0]
            lat = float(lat)
            print(row[0])
            long = row[1]
            long = float(long)
            print(row[1])
        campid = []
        campid = KNN.Knn(lat, long)
        print("In app.py")
        print(campid)
        campid = tuple(campid)
        campid = str(campid)
        print("select * from Camp where Id in" + campid)
        cur.execute("select * from Camp where Id in" + campid)
        data1 = cur.fetchall()
        print(data1)
        return render_template('list.html', data1=data1)
    return render_template('camplist.html')
コード例 #36
0
def handwritingClassTest():
    hwLabels = []
    trainingFileList = listdir("trainingDigits")
    m = len(trainingFileList)
    trainingMat = np.zeros((m,1024))
    for i in range(0,m):
        fileNameStr = trainingFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        hwLabels.append(classNumStr)
        trainingMat[i,:] = img2vector("trainingDigits/"+fileNameStr)
    testFileList = listdir("testDigits")
    errorCount = 0.0
    mTest = len(testFileList)
    for i in range(0,mTest):
        fileNameStr = testFileList[i]
        fileStr = fileNameStr.split('.')[0]
        classNumStr = int(fileStr.split('_')[0])
        vectorUnderTest= img2vector("testDigits/" + fileNameStr)
        classiferResult = KNN.classify0(vectorUnderTest, trainingMat, hwLabels, 3)
        print("the classifier came back with: "+str(classiferResult[0][0])+",the real answer is: "+str(classNumStr))
        if classiferResult[0][0] != classNumStr:
            errorCount += 1.0
    print("\nthe total number od errors is: "+ str(errorCount))
    print("\nthe total error rate is: ",(errorCount / float(mTest)))
コード例 #37
0
def datingClassTest():
    #打开的文件名 修改自己的地址
    filename = "C:/Users/Administrator/Desktop/blog/github/AILearners/data/2.KNN/datingTestSet2.txt"
    #将返回的特征矩阵和分类向量分别存储到datingDataMat和datingLabels中
    datingDataMat, datingLabels = file2matrix(filename)
    #取所有数据的百分之十
    hoRatio = 0.10
    #数据归一化,返回归一化后的矩阵,数据范围,数据最小值
    normMat, ranges, minVals = autoNorm(datingDataMat)
    #获得normMat的行数
    m = normMat.shape[0]
    #百分之十的测试数据的个数
    numTestVecs = int(m * hoRatio)
    #分类错误计数
    errorCount = 0.0

    for i in range(numTestVecs):
        #前numTestVecs个数据作为测试集,后m-numTestVecs个数据作为训练集
        classifierResult = KNN.classify0(normMat[i, :],
                                         normMat[numTestVecs:m, :],
                                         datingLabels[numTestVecs:m], 3)
        print("分类结果:%d\t真实类别:%d" %
              (classifierResult,
               datingLabels[i])).decode('utf-8').encode('gb2312')
        if classifierResult != datingLabels[i]:
            errorCount += 1.0
    print("错误率:%f%%" % (errorCount / float(numTestVecs) *
                        100)).decode('utf-8').encode('gb2312')
コード例 #38
0
def datingClassTest():
    """
    Desc:
        对约会网站的测试方法
    parameters:
        none
    return:
        错误数
    """
    # 设置测试数据的比列
    hoRatio = 0.1 # 测试范围 一部分测试 一部分作为样本
    # 从文件中加载数据
    datingDataMat, datingLabels = get_data(filename)
    # 归一化数据
    normDataSet, ranges, minVals = autoNorm(datingDataMat)
    normDataSet = np.array(normDataSet)
    print('$'*100, normDataSet, len(normDataSet))
    datingLabels = datingLabels.iloc[:,0].tolist()
    # 表示数据的行数
    dataSize = normDataSet.shape[0]
    # 设置测试样本的数据
    numTestVecs = int(dataSize * hoRatio)
    print(numTestVecs)
    print('NumTestVecs:', numTestVecs)
    print(normDataSet[numTestVecs:])
    errorCount = 0
    for n in range(numTestVecs):
        # 对数据进行测试
        classifierResult = KNN.classify(normDataSet[n], normDataSet[numTestVecs: ], datingLabels[numTestVecs : dataSize], 3)
        print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, datingLabels[n]))
        if (classifierResult != datingLabels[n]):
            errorCount += 1.0
    print("the total error rate is: %f" % (errorCount / float(numTestVecs)))
    print(errorCount)
コード例 #39
0
def main():
    path = r'C:\Users\mdhal\Desktop\Fall 2018\Machine Learning\Project\Compressed\reviews_Books_5.json.gz'
    weight_range = (0, 150)
    queries = TestBase.get_query_list(path,
                                      5 * (weight_range[1] - weight_range[0]))
    max_to_grab = TestBase.find_count(queries)
    for i in range(len(Review.weights)):
        num_correct = []
        for j in range(weight_range[0], weight_range[1]):
            num_off = [0] * 5
            off = 0
            Review.weights[i] = j
            current_star = 0
            for k in range(200):
                knn_val = KNN.guess_review(queries[current_star][j])
                current_star = (current_star + 1) % 5
                curr_off = abs(current_star + 1 - knn_val)  # actual - estimate
                num_off[curr_off] += 1
                off += curr_off
            print("i:{} j:{}".format(i, j))
            num_correct.append((num_off[0], j))
        Review.weights[i] = get_max_weight(num_correct)
        print(Review.weights[i])
    for i in range(len(Review.weights)):
        print("Weight {} = {}".format(i, Review.weights[i]))
def main():
    appid = '[Your eBay Product AppID]'  # Change this to your eBay product AppID
    search_keyword = 'wine'
    categoryId = '38182'  # red wine
    
    items = get_items(appid, search_keyword, categoryId)
    
    # using un-weighted KNN
    print 'using un-weighted KNN:'
    print KNN.get_KNN(items, (1,1000), k = 3)
    print KNN.get_KNN(items, (2,2000), k = 3)
    print '*********************'
    
    # using weighted KNN
    print 'weighted KNN using Gaussian function:'
    print KNN.get_weightedKNN(items, (1,1000), k = 3)
    print KNN.get_weightedKNN(items, (2,1000), k = 3)
    print KNN.get_weightedKNN(items, (2,2000), k = 3)
コード例 #41
0
def test_non_norm():
    dating_mat, dating_label = KNN.file_to_matrix('datingTestSet2.txt')
    for i in range(30):
        print dating_mat[i], dating_label[i]
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(dating_mat[:, 0], dating_mat[:, 1],
               15.0 * array(dating_label), 15.0 * array(dating_label))
    plt.show()
コード例 #42
0
ファイル: app.py プロジェクト: Yaliang/last.fm.server
def buildMockUser():
    artists = request.form['artists']
    artistlist = json.loads(artists)
    testUser = User(-100)
    missingArtist = []
    for artistRecord in artistlist:
        artistID = int(artistRecord.keys()[0])
        artistWeight = artistRecord.values()[0]
        if artistWeight == 0:
            artistWeight = 0.0000001
        if ArtistManager.has_key(artistID):
            testUser.insertArt(artistID, artistWeight)
        else:
            missingArtist.append(artistID)
    knn = KNN(40)
    knn.training(UserManager, ArtistManager)
    favOfOne, allArtist, allTag = knn.testing(testUser, UserManager, ArtistManager, True)
    ret = {'artistID': favOfOne}
    if len(missingArtist) > 0:
        ret['warning'] = {'missingArtist':missingArtist}

    ret['artists'] = []
    allArtistLen = len(allArtist)-1
    maxArtistMatchWeight = allArtist[-1][1]
    for i in range(allArtistLen, max(-1, allArtistLen-10), -1):
        artistID = allArtist[i][0]
        matchWeight = allArtist[i][1] / maxArtistMatchWeight
        artistName = ArtistManager[artistID].Name
        topTag = ArtistManager[artistID].getTopTag()
        if topTag == -1:
            topTagName = ""
        else:
            topTagName = TagManager[topTag]
        ret['artists'].append({'id':artistID, 'name':artistName, 'match':matchWeight, 'tag':topTag, 'tagName':topTagName})

    ret['tags'] = []
    allTagLen = len(allTag)-1
    for i in range(allTagLen, max(-1, allTagLen-10), -1):
        tagID = allTag[i][0]
        tagWeight = allTag[i][1]
        tagName = TagManager[tagID]
        ret['tags'].append({'id':tagID, 'name':tagName, 'match':tagWeight})
    # dataObj = {'artists-num':len(artistlist)}
    return json.dumps(ret)
コード例 #43
0
def date_class_test():
    ratio = 0.04    # ratio of the test examples
    # data_set:1000*3,  data_labels: 1000*1
    data_set, data_labels = KNN.file_to_matrix('datingTestSet2.txt')

    # normilize the data_set.   Note:  data_labels is not nessary to normlize
    norm_set, ranges, min_val = KNN.normalize(data_set)

    all_rows = norm_set.shape[0]   # number of all rows
    test_rows = int(ratio * all_rows)  # number of test rows
    error_num = 0
    for i in range(test_rows):
        # return the predict labels
        label_res = KNN.knn_classify(norm_set[i, :], norm_set[test_rows: all_rows, :],\
                                     data_labels[test_rows: all_rows, :], 3)
        print 'Classifier predict: %d, real result is: %d' % (label_res, data_labels[i])
        if label_res != data_labels[i]:
            error_num += 1
    print 'total error rate is: %f ' % (error_num * 1.0 / float(test_rows))
コード例 #44
0
def gameRecommendations(u_name):
    # Get API key
    all_api_keys1 = get_keys("./num1.txt")
    all_api_keys2 = get_keys("./num2.txt")
    api_key = str(all_api_keys1[0]) + str(all_api_keys2[0])

    if len(api_key) != 32:
        print("Uh-oh, don't forget to enter your API key!")
        return

    # Set up a requests session to allow retries when a request fails
    session = reqGet.Session()
    session.mount("http://", reqGet.adapters.HTTPAdapter(max_retries=10))

    games_response_json = getUserGames(u_name, api_key)

    all_games = loadGameIDs("./data/id_header.csv")

    # Get all of the game names and IDs from steam and save them in a dictionary for easy usage
    game_list = json.loads(session.get(url="http://api.steampowered.com/ISteamApps/GetAppList/v2").text)['applist']['apps']
    game_dict = {}
    for game in game_list:
        game_dict[game['appid']] = game

    user_game_array = ["0"] * len(all_games)

    if not games_response_json:
        return

    for game in games_response_json:
        if game['appid'] in all_games:
            game_index = all_games.index(game['appid'])
            user_game_array[game_index] = "1"

    all_games = [game_dict[x]['name'] for x in all_games]

    game_bit_string = int(''.join(user_game_array), 2)
    dataset = KNN.loadDataset("./data/games_by_username_all.csv")
    closest = KNN.findClosest(dataset, game_bit_string, 100)
    return KNN.getTopGames(KNN.getVotes(all_games, closest, game_bit_string), 5)
コード例 #45
0
ファイル: KNNstudy.py プロジェクト: beyondliyang/KNN
def handwriteDigitTest():
    trainData, trainLabel = loadTrainData()
    testData = loadTestData()
    m, n = shape(testData)
    testLabel = loadTestResult() 
    resultList = []
    k = 5
    # predict every testData row's label 
    for i in xrange(m):
        classifyClassResult = KNN.classify0(testData[i], trainData, trainLabel.transpose(), k)
        resultList.append(classifyClassResult)
        print "the classifier calcute is: %d, the real answer is : %d" %(classifyClassResult, testLabel[0,i])
    saveResult(resultList)
def probabilitygraph(data, vec, upperbound, k=5, weightf = KNN.gaussian_weight, sigma = 5.0):
    t = arange(0.0, upperbound, 0.1)
    points = []
    
    # get probabilities for each point
    probabilities = [prob_guess(data, vec, v, v+0.1, k, weightf) for v in t]
    
    # smooth the graph
    for i in range(len(probabilities)):
        sv = 0.0
        for j in range(len(probabilities)):
            dist = abs(i-j)*0.1
            weight = KNN.gaussian_weight(dist, sigma)
            sv += weight*probabilities[j]
        points.append(sv)
    plot(t,array(points))
    show()
コード例 #47
0
ファイル: Isomap.py プロジェクト: philipz1/ML
def isomap(data, k = 10, target_dim = 2, load = False, save = False):
	if load == False:
		graph = KNN.knn(data, k)
		A = construct_A(data, graph)
		dists = shortest_dist_weight(A)
		if save != False:
			np.save(save, dists)
	else:
		dists = np.load(load)

	gram_tilda = do_gram_tilda(dists)
	eigvals, eigvecs = np.linalg.eigh(gram_tilda)

	index = np.argsort(eigvals)[::-1]
	eigvals = eigvals[index]
	eigvecs = eigvecs[:,index]

	return eigvecs[:,0:target_dim]
コード例 #48
0
def test(data, k):
    random.shuffle(data)
    pts, labels = column(data, 0), column(data, 1)

    trainingData = pts[:800]
    trainingLabels = labels[:800]
    testData = pts[800:]
    testLabels = labels[800:]

    f = KNN.makeKNNClassifier(trainingData, trainingLabels, k, KNN.euclideanDistance)
    correct = 0.0
    total = len(testLabels)

    for (point, label) in zip(testData, testLabels):
        if f(point) == label:
            correct += 1

    return correct / total
コード例 #49
0
ファイル: LaplacianEigenmap.py プロジェクト: philipz1/ML
def le(data, k = 10, target_dim = 2):
	graph = KNN.knn(data, k)
	A = construct_mesh(data, graph)
	from sklearn import manifold
	return(manifold.spectral_embedding(A, 2))

	D = construct_degree(A)
	L = D - A

	eigvals, eigvecs = scipy.linalg.eigh(A, L)

	index = np.argsort(eigvals)[::-1]
	eigvals = eigvals[index]
	eigvecs = eigvecs[:,index]
	
	return eigvecs[:,1:target_dim + 1]

# print(le(npdata))
def prob_guess(data, new_item, low, high, k=5, weightf = KNN.gaussian_weight):
    # get sorted distance list
    dlist = KNN.get_sorted_distances(data, new_item)
    top_k = dlist[0:k]
    
    rweight = 0.0
    total_weight = 0.0
    
    for i in range(k):
        dt = top_k[i][0]
        weight = weightf(dt)
        idx = top_k[i][1]
        price = data[idx]['price']
        
        if price > low and price < high:
            rweight += weight
        total_weight += weight
    if total_weight == 0:
        return 0
    return rweight/total_weight
コード例 #51
0
ファイル: LaplacianEigenmap.py プロジェクト: philipz1/ML
def le(data, k = 10, target_dim = 2):
	graph = KNN.knn(data, k)
	A = construct_mesh(data, graph)
	# from sklearn import manifold
	# return(manifold.spectral_embedding(A, 2))

	D = np.diag(A.sum(1))
	# L = D - A
	# print(D**(-1/2))
	x = D ** (-1/2)
	x[np.isinf(x)] = 0
	L = np.dot(x, D-A)
	L = np.dot(L, x)
	# L[np.isinf(L)] = 0

	eigvals, eigvecs = eig(L, D)

	index = np.argsort(eigvals)[::1]
	eigvals = eigvals[index]
	eigvecs = eigvecs[:,index]
	
	return eigvecs[:, 1: 1 + target_dim]
コード例 #52
0
ファイル: main.py プロジェクト: Wummer/ML1
plt.title('With $\\theta$ = %1.1f'%theta)
plt.axis('equal')
#plt.show()

"""
------------------------------------ I.4.x ---------------------------------------

"""

""" See the module for the explanation of each function. """

train = open('IrisTrain2014.dt', 'r')
test = open('IrisTest2014.dt', 'r')

#Calling read and split
train_set = KNN.read_data(train)
test_set = KNN.read_data(test)
transformed_test = KNN.transformtest(train_set, test_set)

print "*" * 45
print "Mean and variance"
print "*" * 45

print " Train set:"
zeromean_train = KNN.meanfree(train_set)
print "-" * 45

print " Normalized rain set:"
getting_mean_for_normalized_train = KNN.meanfree(zeromean_train)
print "-" * 45
コード例 #53
0
ファイル: __init__.py プロジェクト: zerozzl/MLStudy
'''
# Generate Image
trainDatas, trainLabels = Pretreatment.loadTrainData('/home/hadoop/workdatas/kaggle/DigitRecognizer/train_sort.csv');
trainLabels = trainLabels[0];
Pretreatment.generateImage('/home/hadoop/workdatas/kaggle/DigitRecognizer/imgs/', trainDatas, trainLabels);
'''


'''
# KNN Test
import numpy
group = numpy.array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
labels = ['A', 'A', 'B', 'B']

tar = [1.0, 1.2];

result = KNN.classify(tar, group, labels, 3);

print result;
'''

# KNN Clasify
trainDatas, trainLabels = Pretreatment.loadTrainData('/home/hadoop/workdatas/kaggle/DigitRecognizer/train.csv');
trainLabels = trainLabels[0];
testDatas = Pretreatment.loadTestData('/home/hadoop/workdatas/kaggle/DigitRecognizer/test.csv');
result = KNN.process(testDatas, trainDatas, trainLabels);
Pretreatment.generateResultFile('/home/hadoop/workdatas/kaggle/DigitRecognizer/result_knn_10.csv', result);

print 'success'
コード例 #54
0
ファイル: test.py プロジェクト: bzhou830/ML-python
        elif datingLabels[i] == 3:
             ax.scatter(datingDataMat[i][0],datingDataMat[i][1],datingDataMat[i][2], c='g',marker='*')
        elif datingLabels[i] == 4:
             ax.scatter(datingDataMat[i][0],datingDataMat[i][1],datingDataMat[i][2], marker='1')
       
    #ax.scatter(datingDataMat[:,0],datingDataMat[:,1],datingDataMat[:,2],
    #          5.0 * np.array(datingLabels), 5.0 * np.array(datingLabels))
    
    plt.show()


def classifyPerson():
    print "输入相关信息"
    resultList = ['一点不喜欢','有点希望','可能性很大']
    percentTats = float(raw_input("玩游戏时间数目?"))
    ffMiles = float(raw_input("旅游公路数?"))
    ice = float(raw_input("冰淇淋消耗量?"))
    datingDataMat,datingLabels = KNN.file2matrix('datingTestSet2.txt')
    normMat,ranges,minVals = KNN.autoNorm(datingDataMat)
    inArr = np.array([ffMiles,percentTats,ice])
    classfierRt = KNN.classify0((inArr-minVals)/ranges,normMat,datingLabels,3)
    print resultList[classfierRt - 1]
    PrintFigure(normMat, datingLabels)

#classifyPerson()
datingDataMat,datingLabels = KNN.file2matrix('datingTestSet2.txt')
print datingLabels
PrintFigure(datingDataMat,datingLabels)

    
コード例 #55
0
ファイル: skEntropy.py プロジェクト: Kamlapati/skEntropy
def main(argv):
    """ Start of the program """
    
    if len(argv)==0:      ## check of the arguments
        print("\n Improper command format")
        usage()
        sys.exit()
        
    filename=''  
    flow = False
    dump = False
    try:
        opts,args=getopt.getopt(argv,"hf:d",["ifile=", "dump"])   
    except getopt.GetoptError:
        print("\n Improper command format")
        usage()
        sys.exit()
        
    ## to read the arguments
    for opt,arg in opts:
        if opt=="-h":
            usage()
            sys.exit()
        elif opt in ("-f","--ifile"):
            filename=arg
            flow = True
        elif opt in ("d","--dump"):
            dump=True
        else:
             print("\n Improper command format")
             usage()
             sys.exit()
    if flow :
        #print(filename)
        if os.path.isfile(filename):
            #print("File name  : ",filename)
            openfile = open(filename,'rb')
            readfile = openfile.read()
            print("Entropy of file is ", H(readfile))
       
            pe=pefile.PE(filename)
            sizeofHeader = pe.OPTIONAL_HEADER.SizeOfHeaders
            unknownEntropy = []
            unknownPackedEntropy = []
            for section in pe.sections:
                init=section.VirtualAddress
                last=section.VirtualAddress+section.Misc_VirtualSize
                sectionData= readfile[init:last]
                unknownEntropy.append(HsetReduction(sectionData))

                hex_bytes = binascii.hexlify(sectionData)
                cleartext = hex_bytes.decode("utf-8")
                cipherText= encrypFile(cleartext)
                unknownPackedEntropy.append(HsetReduction(cipherText))
            TotalunknownEntropy= 0
            TotalunknownPackedEntropy = 0 
            for i in range (len(unknownEntropy)):
                TotalunknownEntropy = TotalunknownEntropy+ unknownEntropy[i]
                TotalunknownPackedEntropy = TotalunknownPackedEntropy+ unknownPackedEntropy[i]
            firstEntropy = TotalunknownEntropy/len(unknownEntropy)
            secondEntropy =TotalunknownPackedEntropy/len(unknownPackedEntropy)
            #print("First Entropy = ",firstEntropy)
            #print("Entropy after packing = ",secondEntropy)
            TestingList = [firstEntropy,secondEntropy,(secondEntropy-firstEntropy)]
            predictedResult = KNN.ibk(TestingList)
            print(filename," is ",predictedResult)
            #KNN.ibktest()
            if dump:
                print(dump_info(filename))
        
        else:
            print("File doesn't exit or path is improper")
    else:
        print("\n Improper command format")
        usage()
コード例 #56
0
train = NaiveBayes.train_nb(data)

#Read example data
f = open(examples, 'r')

#Test every example
for line in f:
    array_line = line.split(',')
    row = []
    length = len(array_line)
    for i in range (0,length):
        row.append(float(array_line[i]))

    #Apply the algorithm
    if algorithm != 'NB':
        print KNN.knearest(int(algorithm),data,row)
    else:
        print NaiveBayes.naive_bayes(row,train)


##############################
#                            #
# Cross validation called    #
# when need it               #
#                            #
##############################
#folds = CrossValidation.fold_divide(data)
#print CrossValidation.cross_validation_nn(1,folds)
#print CrossValidation.cross_validation_nb(folds)

コード例 #57
0
	# 	# data[4]: user_taggedartists.dat 
	# 	# tag = [userID	artistID	tagID	day	month	year]
	# 	if UserManager.has_key(int(tag[0])):
	# 		UserManager[int(tag[0])].insertTag(int(tag[1]),int(tag[2]))



	#train with UserManager, test with TestUserManager
	# counter = 0
	# for userID,user in TestUserManager.iteritems():
	# 	if len(user.ArtistList) == 0:

	# 		counter += 1
	# 	# print userID, len(user.ArtistList)
	# print counter, len(TestUserManager)
	knn = KNN(30)
	knn.training(UserManager, ArtistManager)

	theSameNum = 0
	for userID in TestUserManager:
		favOfOne, neighbors = knn.testingTimeBased(TestUserManager[userID],UserManager, ArtistManager)
		favTruth = TestUserManager[userID].getMostFav().keys()[0]
		if favOfOne == favTruth:
			theSameNum += 1
		print userID, theSameNum, favOfOne

	print 1.0*theSameNum/len(TestUserManager)

	# print favOfOne

コード例 #58
0
ファイル: knntest.py プロジェクト: RayleighChen/Improve
# -*- coding: utf-8 -*-
'''
Created on 2014年9月30日

@author: Rayleigh
'''
import KNN as kNN
from numpy import * 

dataSet, labels = kNN.createDataSet()

testX = array([0.2, 0.9])
k = 3
outputLabel = kNN.kNNClassify(testX, dataSet, labels, 3)
print "Your input is:", testX, "and classified to class: ", outputLabel

testX = array([0.1, 0.3])
outputLabel = kNN.kNNClassify(testX, dataSet, labels, 3)
print "Your input is:", testX, "and classified to class: ", outputLabel