def runA1andA2(K, dynamic_datasets_path): dataSets = readDatasets.read_dataset(True, True, False, dynamic_datasets_path) trainSet = dataSets[0] testSet = dataSets[1] maxWarpingWindowPercentage = 0.11 makeListOfAllNeighbors = False plotPatterns = True task2A1.findKnearestNeighbors(K, maxWarpingWindowPercentage, plotPatterns, makeListOfAllNeighbors, trainSet, testSet) # This doesn't take any dataSets as parameters.. it gets them on its own task2A2.runLCSS(K, dynamic_datasets_path)
def runClassification(K, dynamic_datasets_path): print('Start KNN Classification..') dataSets = readDatasets.read_dataset(True, False, True, dynamic_datasets_path) trainSet = dataSets[0] testSet = dataSets[1] makeListsOfNeighborsForAllTests = True plotPatterns = False # We just want the KNN, not the html-maps. maxWarpingWindowPercentage = 0.33 # For testSet_a2, we need a bigger window to get the right patternIDs. ### Run KNN for test_a2 neighborsTestsLists = findKnearestNeighbors( K, maxWarpingWindowPercentage, plotPatterns, makeListsOfNeighborsForAllTests, trainSet, testSet) testData = getVotes(neighborsTestsLists) write_predictions_to_csv(testData) ### Run cross-validation. crossValidation(trainSet, K, maxWarpingWindowPercentage, num_folds=10)
def crossValidation(trainSet, K, maxWarpingWindowPercentage, num_folds): accuracies = [] subset_size = int(len(trainSet) / num_folds) for i in range(num_folds): testing_this_round = trainSet[i * subset_size:][:subset_size] training1 = trainSet[0:][:subset_size * i] training2 = trainSet[(i + 1) * subset_size:][:subset_size * (10 - i - 1)] training_this_round = training1 + training2 # np.concatenate((training1, training2), axis=0) # print training2['journeyPatternId'] makeListsOfNeighborsForAllTests = True plotPatterns = False # We just want the KNN, not the html-maps. neighborsTestsLists = findKnearestNeighbors( K, maxWarpingWindowPercentage, plotPatterns, makeListsOfNeighborsForAllTests, training2, testing_this_round) testData = getVotes(neighborsTestsLists) print(testData) correct = 0 i = 0 for row in trainSet['journeyPatternId']: if i == 100: break try: print("Predicted: ", testData[i][1], ' - ', "Actual: ", row) if testData[i][1] is row: correct += 1 except IndexError: break i += 1 print('Correct predictions: ', correct) accuracy = float(correct) / 100 print('Accuacy: ', accuracy) return accuracy