def runClassification(K, dynamic_datasets_path):

    print('Start KNN Classification..')

    dataSets = readDatasets.read_dataset(True, False, True,
                                         dynamic_datasets_path)
    trainSet = dataSets[0]
    testSet = dataSets[1]

    makeListsOfNeighborsForAllTests = True
    plotPatterns = False  # We just want the KNN, not the html-maps.

    maxWarpingWindowPercentage = 0.33  # For testSet_a2, we need a bigger window to get the right patternIDs.

    ### Run KNN for test_a2

    neighborsTestsLists = findKnearestNeighbors(
        K, maxWarpingWindowPercentage, plotPatterns,
        makeListsOfNeighborsForAllTests, trainSet, testSet)

    testData = getVotes(neighborsTestsLists)

    write_predictions_to_csv(testData)

    ### Run cross-validation.
    crossValidation(trainSet, K, maxWarpingWindowPercentage, num_folds=10)
Esempio n. 2
0
def data_visualization(K, dynamic_datasets_path):

    print("Going to visualize bus-trajectories..")
    dataSets = readDatasets.read_dataset(True, False, False,
                                         dynamic_datasets_path)
    trainSet = dataSets[0]

    #print trainSet.shape[0]  # DEBUG!
    #print trainSet['Trajectory']  # DEBUG!

    journeyPatternIDs, trainTrajs, trainListSize = TrainData.getListsOfTrainData(
        trainSet)

    storeMapsDir = os.path.join('..', '..', 'Resources', 'maps', 'task1')
    if not os.path.isdir(storeMapsDir):
        os.makedirs(storeMapsDir)

    selectedPatternIDs = []
    numOfSelectedPatterns = 0

    start_time = time.time()
    maxSecondsToWait = 120

    while True:

        if numOfSelectedPatterns == K:
            print('Finished plotting ' + K.__str__() +
                  ' distinct random patterns.')
            break
        elif (time.time() - start_time) > maxSecondsToWait:
            print('The program could not find ' + K.__str__() +
                  ' distinct random patterns in the specified time: ' +
                  maxSecondsToWait.__str__())
            break

        randomTrain = random.randint(0, trainListSize - 1)
        curPatternID = journeyPatternIDs[randomTrain]
        if curPatternID not in selectedPatternIDs:
            selectedPatternIDs.append(curPatternID)
            # plot the new pattern
            print('Going to plot a new random train..')
            longitudes, latitudes = GetCoordinates.getCoordinates(
                trainTrajs[randomTrain])
            fileName = "train" + randomTrain.__str__(
            ) + "_Pattern_" + curPatternID + ".html"
            GmPlot.gmPlot(latitudes, longitudes,
                          os.path.join(storeMapsDir, fileName))
            numOfSelectedPatterns += 1
Esempio n. 3
0
def runA1andA2(K, dynamic_datasets_path):
    dataSets = readDatasets.read_dataset(True, True, False,
                                         dynamic_datasets_path)

    trainSet = dataSets[0]
    testSet = dataSets[1]

    maxWarpingWindowPercentage = 0.11
    makeListOfAllNeighbors = False
    plotPatterns = True

    task2A1.findKnearestNeighbors(K, maxWarpingWindowPercentage, plotPatterns,
                                  makeListOfAllNeighbors, trainSet, testSet)

    # This doesn't take any dataSets as parameters.. it gets them on its own
    task2A2.runLCSS(K, dynamic_datasets_path)
Esempio n. 4
0
def data_visualization(K):

    dataSets = readDatasets.read_dataset(True, False, False)
    trainSet = dataSets[0]

    #print trainSet.shape[0]  # DEBUG!
    #print trainSet['Trajectory']  # DEBUG!

    journeyPatternIDs, trainTrajs, trainListSize = TrainData.getListsOfTrainData(
        trainSet)

    storeMapsDir = "../../Resources/maps/task1"
    if not os.path.isdir(storeMapsDir):
        os.makedirs(storeMapsDir)

    selectedPatternIDs = []
    numOfSelectedPatterns = 0

    start_time = time.time()
    maxSecondsToWait = 120

    while True:

        if numOfSelectedPatterns == K:
            print 'Finished plotting ' + K.__str__(
            ) + ' distinct random patterns.'
            break
        elif (time.time() - start_time) > maxSecondsToWait:
            print 'The program could not find ' + K.__str__(
            ) + ' distinct random patterns in the specified time: ' + maxSecondsToWait.__str__(
            )
            break

        randomTrain = random.randint(0, trainListSize - 1)
        curPatternID = journeyPatternIDs[randomTrain]
        if curPatternID not in selectedPatternIDs:
            selectedPatternIDs.append(curPatternID)
            # plot the new pattern
            print 'Going to plot a new random train..'
            longtitutes, latitudes = GetCoordinates.getCoordinates(
                trainTrajs[randomTrain])
            GmPlot.gmPlot(
                latitudes, longtitutes, storeMapsDir + "/train" +
                randomTrain.__str__() + "_Pattern_" + curPatternID + ".html")
            numOfSelectedPatterns += 1
Esempio n. 5
0
def runLCSStest():
    # get two lists
    # f = open("lists.txt")
    # contents = f.read().split("\n")
    # list1 = [int(i) for i in contents[0].split(",")]
    # list2 = [int(i) for i in contents[1].split(",")]

    dataSets = readDatasets.read_dataset(True, False, True)
    list1 = dataSets[0]
    list2 = dataSets[1]

    lists = lcs(list1, list2, True)

    # here we have all most common subs

    # so now we pic the top 5 and plot them....

    for l in lists:
        print(l)
def runLCSS(K, dynamic_datasets_path):
    print('\nLCSS start..')

    dataSets = readDatasets.read_dataset(True, False, True,
                                         dynamic_datasets_path)

    trainSet = dataSets[0]
    testSetA2 = dataSets[1]

    journeyPatternIDs, trainTrajs, trainListSize = TrainData.getListsOfTrainData(
        trainSet)

    storeMapsDir = os.path.join('..', '..', 'Resources', 'maps', 'task2A2')
    if not os.path.isdir(storeMapsDir):
        os.makedirs(storeMapsDir)

    kMaxs = KMaxs(K)

    start_time = time.time()
    lastTime = start_time  # For in-the-middle elapsed-time.

    testNum = 0
    for trajectoryTest in testSetA2['Trajectory']:
        # print trajectoryTest

        testNum += 1

        # if testNum <= 2:
        #     continue
        # if testNum == 2:
        #     break

        nearestNeighbors = []
        sorted_subSeqsSizes = []

        print('\nChecking for ' + K.__str__() +
              ' longest common sub-sequences of test ' + testNum.__str__())

        # print trajectoryTest # DEBUG!
        iterations = 0
        sorted_subSequences = []

        for i in range(
                0,
                trainListSize):  # IDs and Trajectories are of the same size.
            # print i

            trajectoryTrain = trainTrajs[i]

            LongestCS = lcs(trajectoryTrain, trajectoryTest)

            if not LongestCS:
                continue
            # else:
            #     print i.__str__() + ') LCS length: ' + len(LongestCS).__str__()

            # if iterations == 10:
            #     break
            # else:
            #     iterations += 1

            kMaxs.checkMinLengthAndInsert([i, LongestCS, len(LongestCS)])

        curTime = time.time()
        curElapsedTime = curTime - lastTime
        lastTime = curTime

        print('\nTest: ' + testNum.__str__() + ') finished in ' +
              time.strftime("%H:%M:%S", time.gmtime(curElapsedTime)))

        # Plot test
        full_longitudes, full_latitudes = GetCoordinates.getCoordinates(
            trajectoryTest)
        fileName = "lcss" + testNum.__str__(
        ) + "-test-Time(sec)_" + curElapsedTime.__str__() + ".html"
        GmPlot.gmPlot(full_latitudes,
                      full_longitudes,
                      os.path.join(storeMapsDir, fileName),
                      zoom=13)

        # So now we pic the top 5 and we plot them....
        sorted_subSequences = sorted(kMaxs.getArrayList(),
                                     reverse=True,
                                     key=lambda tup: tup[2])
        kMaxs.resetArrayList(
        )  # Reset arrayList before going to the next testSet.

        for i in range(0, len(sorted_subSequences)):
            if i == 5: break

            print("Train " + sorted_subSequences[i][0].__str__() + ") PatternID: " \
                  + journeyPatternIDs[sorted_subSequences[i][0]].__str__() \
                  + ", MatchingPoints: " + sorted_subSequences[i][2].__str__() + ".html")

            curSubSeqTrajectory = trainTrajs[sorted_subSequences[i][0]]
            full_longitudes, full_latitudes = GetCoordinates.getCoordinates(
                curSubSeqTrajectory)

            curSubSeqTrajectory = sorted_subSequences[i][1]
            sub_longitudes, sub_latitudes = GetCoordinates.getCoordinates(
                curSubSeqTrajectory)
            fileName = "lcss" + testNum.__str__() + "-train" \
                       + sorted_subSequences[i][0].__str__() + "_PatternID_" \
                       + journeyPatternIDs[sorted_subSequences[i][0]].__str__() \
                       + "-MatchingPoints_" + sorted_subSequences[i][2].__str__() + ".html"
            GmPlot.gmPlotOfColours(full_latitudes, full_longitudes,
                                   sub_latitudes, sub_longitudes,
                                   os.path.join(storeMapsDir, fileName))

    print("\nElapsed time of KNNwithLCSS for 'test_set_a2': ",
          time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)),
          'mins')
Esempio n. 7
0
                    storeMapsDir + "/dtw" + testNum.__str__() + "-train" +
                    sorted_nearestNeighbors[i][0].__str__() + "-PatternID_" +
                    sorted_nearestNeighbors[i][1].__str__() + "-DTW_" +
                    sorted_nearestNeighbors[i][2].__str__() + ".html",
                    zoom=13)

        if makeListOfAllNeighbors:
            # Make a list with all the neighbours for all the tests
            sorted_nearestNeighbors_forAllTests.append(
                sorted_nearestNeighbors_forTest)

    print("\nElapsed time of KNNwithDTW for 'test_set_a1': ",
          time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)))

    return sorted_nearestNeighbors_forAllTests


if __name__ == '__main__':

    dataSets = readDatasets.read_dataset(True, True, False)

    trainSet = dataSets[0]
    testSetA1 = dataSets[1]

    K = 5
    plotPatterns = True
    makeListOfAllNeighbors = False
    maxWarpingWindowPercentage = 0.11
    findKnearestNeighbors(K, maxWarpingWindowPercentage, plotPatterns,
                          makeListOfAllNeighbors, trainSet, testSetA1)
                              os.path.join(storeMapsDir, fileName),
                              zoom=13)

        if makeListOfAllNeighbors:
            # Make a list with all the neighbours for all the tests
            sorted_nearestNeighbors_forAllTests.append(
                sorted_nearestNeighbors_forTest)

    print("\nElapsed time of KNNwithDTW for 'test_set_a1': ",
          time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)))

    return sorted_nearestNeighbors_forAllTests


if __name__ == '__main__':
    dynamic_datasets_path = os.path.join('..', '..')
    dataSets = readDatasets.read_dataset(True, True, False,
                                         dynamic_datasets_path)

    trainSet = dataSets[0]
    testSetA1 = dataSets[1]

    K = 5
    plotPatterns = True
    makeListOfAllNeighbors = False
    maxWarpingWindowPercentage = 0.11
    findKnearestNeighbors(K, maxWarpingWindowPercentage, plotPatterns,
                          makeListOfAllNeighbors, trainSet, testSetA1)

    exit()