Beispiel #1
0
def runA1andA2(K, dynamic_datasets_path):
    dataSets = readDatasets.read_dataset(True, True, False,
                                         dynamic_datasets_path)

    trainSet = dataSets[0]
    testSet = dataSets[1]

    maxWarpingWindowPercentage = 0.11
    makeListOfAllNeighbors = False
    plotPatterns = True

    task2A1.findKnearestNeighbors(K, maxWarpingWindowPercentage, plotPatterns,
                                  makeListOfAllNeighbors, trainSet, testSet)

    # This doesn't take any dataSets as parameters.. it gets them on its own
    task2A2.runLCSS(K, dynamic_datasets_path)
def runClassification(K, dynamic_datasets_path):

    print('Start KNN Classification..')

    dataSets = readDatasets.read_dataset(True, False, True,
                                         dynamic_datasets_path)
    trainSet = dataSets[0]
    testSet = dataSets[1]

    makeListsOfNeighborsForAllTests = True
    plotPatterns = False  # We just want the KNN, not the html-maps.

    maxWarpingWindowPercentage = 0.33  # For testSet_a2, we need a bigger window to get the right patternIDs.

    ### Run KNN for test_a2

    neighborsTestsLists = findKnearestNeighbors(
        K, maxWarpingWindowPercentage, plotPatterns,
        makeListsOfNeighborsForAllTests, trainSet, testSet)

    testData = getVotes(neighborsTestsLists)

    write_predictions_to_csv(testData)

    ### Run cross-validation.
    crossValidation(trainSet, K, maxWarpingWindowPercentage, num_folds=10)
def crossValidation(trainSet, K, maxWarpingWindowPercentage, num_folds):
    accuracies = []
    subset_size = int(len(trainSet) / num_folds)

    for i in range(num_folds):

        testing_this_round = trainSet[i * subset_size:][:subset_size]
        training1 = trainSet[0:][:subset_size * i]
        training2 = trainSet[(i + 1) * subset_size:][:subset_size *
                                                     (10 - i - 1)]

        training_this_round = training1 + training2  # np.concatenate((training1, training2), axis=0)

        # print training2['journeyPatternId']

        makeListsOfNeighborsForAllTests = True
        plotPatterns = False  # We just want the KNN, not the html-maps.

        neighborsTestsLists = findKnearestNeighbors(
            K, maxWarpingWindowPercentage, plotPatterns,
            makeListsOfNeighborsForAllTests, training2, testing_this_round)

        testData = getVotes(neighborsTestsLists)

        print(testData)

        correct = 0
        i = 0
        for row in trainSet['journeyPatternId']:
            if i == 100:
                break
            try:
                print("Predicted: ", testData[i][1], ' - ', "Actual: ", row)
                if testData[i][1] is row:
                    correct += 1
            except IndexError:
                break
            i += 1

        print('Correct predictions: ', correct)
        accuracy = float(correct) / 100
        print('Accuacy: ', accuracy)

    return accuracy