예제 #1
0
def build_dataset(dataset = 0, k = 3):
    #load the iris data set

    if dataset == "0":
        dataset = datasets.load_iris()
    elif dataset == "1":
        dataset = datasets.load_digits()
    print type(dataset.data)
    #returns a numpy array for each variable, this will allow us to use the variables to test our algorithm
    data_train, data_test, targets_train, test_target = train_test_split(dataset.data, dataset.target, test_size = .3)

    print "Data: ", data_train
    #Select the kNearest Neighbors
    classifier = KNeighborsClassifier(int(k))
    model = classifier.fit(data_train, targets_train)

    targets_predicted = model.predict(data_test)

    count = 0
    for index in range(len(data_test)):
        if targets_predicted[index] == test_target[index]:
            count += 1

    correctness = float(count) / len(data_test) * 100

    print "Accuracy: {:.2f}".format(correctness)
예제 #2
0
파일: main.py 프로젝트: fggj228/IT-master
def main(station, k_neighbors, stations, transfers, beverage_data):
    knc = KNeighborsClassifier(stations, transfers, beverage_data)

    knc.fit(station, k_neighbors)
    pred = knc.predict()

    if pred['tea'] > pred['coffee']:
        print(f'На станции {station} пьют чай')
    elif pred['coffee'] > pred['tea']:
        print(f'На станции {station} пьют кофе')
    else:
        print(f'На станции {station} пьют и чай и кофе')

    return 0
예제 #3
0
파일: main.py 프로젝트: jessyjinshu/knn
import sklearn.utils as utils
import sklearn.datasets as datasets
from knn import KNeighborsClassifier

iris = datasets.load_iris()
X = iris.data
y = iris.target
ynames = iris.target_names

X, y = utils.shuffle(X, y, random_state=1)
train_set_size = 100
X_train = X[:train_set_size]  # selects first 100 rows (examples) for train set
y_train = y[:train_set_size]
X_test = X[
    train_set_size:]  # selects from row 100 until the last one for test set
y_test = y[train_set_size:]

k = 5
knn = KNeighborsClassifier(k=k)

knn.fit(X_train, y_train)
y_pred_test = knn.predict(X_test)
print("Accuracy of KNN test set:", knn.score(y_pred_test, y_test))
예제 #4
0
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import fetcher
import processor
import normalizer
from knn import KNeighborsClassifier

print('Fetching data...')
raw_data = fetcher.fetchVehicles()
print('Processing data...')
processed_data = processor.process(raw_data)
normalized_data_x = normalizer.normalize(processed_data["x"])

train_x, test_x, train_y, test_y = train_test_split(normalized_data_x,
                                                    processed_data["y"],
                                                    test_size=0.5)

classifier = KNeighborsClassifier(1)
classifier.fit(train_x, train_y)
predictions = classifier.predict(test_x)

print(f'Done! Accuracy: {accuracy_score(test_y, predictions)}')
예제 #5
0
loocvs = []

# This array holds the loocv accuracies for the various datasets
accuracies = []

dsets = ['iris_m10', 'iris_m20', 'iris_m30', 'iris_m50']

for df in dsets:

    print('Working on dataset = ' + df)

    # Create a new instance of the parser
    iris = IrisParser(df)

    # Create a new instance of the KNN model with K=1
    knn = KNeighborsClassifier(n_neighbors=3)

    # Get features and labels
    features, labels = iris.parse(shuffle=False)

    # ************************************************* Train error
    print('Computing training LOOCV error')

    # Create LOOCV folds
    x_folds, y_folds = iris.k_fold(features, labels, len(features))

    # Compute LOOCV error
    loocv_e = cross_validate(x_folds, y_folds, knn)

    # Add this error to a list, for later plotting, and the accuracy too
    loocvs.append(loocv_e)
예제 #6
0
            outputData.append(data[20])
    return inputData, outputData


if __name__ == '__main__':
    inputToLearn, outputToLearn = readData("file.txt")

    inputTest = []
    outputTest = []

    for i in range(10):
        index = randint(0, len(inputToLearn) - 1)

        inputTest.append(inputToLearn[index])
        outputTest.append(outputToLearn[index])

        inputToLearn.remove(inputToLearn[index])
        outputToLearn.remove(outputToLearn[index])

    classifier = KNeighborsClassifier()

    classifier.fit(inputToLearn, outputToLearn)

    accuracy = 0

    for i in range(len(inputTest)):
        if classifier.predict(inputTest[i]) == outputTest[i]:
            accuracy += 1

    print("The accuracy is: ", float(accuracy) / len(inputTest))
예제 #7
0
        samples[i].append([lat, lon])

labels = []
for i in range(0, len(journeyPatternIds.keys())):
    index = journeyPatternIds.keys()[i]
    labels.append(journeyPatternIds[index])

test = []
for i in range(0, len(testSet.keys())):
    index = testSet.keys()[i]
    test.append([])
    for t, lon, lat in testSet[index]:
        test[i].append([lat, lon])

# Create the csv with the results of the KNN on the requested file
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(samples, labels)
result = neigh.predict(test, haversine)

dic = {"Test_Trip_ID": range(len(test)), "Predicted_JourneyPatternID": result}
out_df = pd.DataFrame(dic,
                      columns=['Test_Trip_ID', 'Predicted_JourneyPatternID'])
out_df.to_csv("testSet_JourneyPatternIDs.csv", sep='\t', index=False)

# Do a 10 fold for our KNN and save results in file
kf = KFold(n_splits=10)

f = open('10fold_2p_out.txt', 'w')

sum_acc = 0
count = 1
예제 #8
0
        samples[i].append([lat, lon])

labels = []
for i in range(0, len(journeyPatternIds.keys())):
    index = journeyPatternIds.keys()[i]
    labels.append(journeyPatternIds[index])

test = []
for i in range(0, len(testSet.keys())):
    index = testSet.keys()[i]
    test.append([])
    for t, lon, lat in testSet[index]:
        test[i].append([lat, lon])

# Create a KNN classifier and fit with train set
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(samples, labels)

# Get K=5 neighbors for each item in test set
result = neigh.kneighbors(test, haversine, 5)

# Display results and print info
for i, answer in enumerate(result):
    gmap = gmplot.GoogleMapPlotter(53.383015, -6.237581, 12)

    longitudes = list()
    latitudes = list()
    for lat, lon in test[i]:
        longitudes.append(lon)
        latitudes.append(lat)