Exemple #1
0
def data_visualization(K, dynamic_datasets_path):

    print("Going to visualize bus-trajectories..")
    dataSets = readDatasets.read_dataset(True, False, False,
                                         dynamic_datasets_path)
    trainSet = dataSets[0]

    #print trainSet.shape[0]  # DEBUG!
    #print trainSet['Trajectory']  # DEBUG!

    journeyPatternIDs, trainTrajs, trainListSize = TrainData.getListsOfTrainData(
        trainSet)

    storeMapsDir = os.path.join('..', '..', 'Resources', 'maps', 'task1')
    if not os.path.isdir(storeMapsDir):
        os.makedirs(storeMapsDir)

    selectedPatternIDs = []
    numOfSelectedPatterns = 0

    start_time = time.time()
    maxSecondsToWait = 120

    while True:

        if numOfSelectedPatterns == K:
            print('Finished plotting ' + K.__str__() +
                  ' distinct random patterns.')
            break
        elif (time.time() - start_time) > maxSecondsToWait:
            print('The program could not find ' + K.__str__() +
                  ' distinct random patterns in the specified time: ' +
                  maxSecondsToWait.__str__())
            break

        randomTrain = random.randint(0, trainListSize - 1)
        curPatternID = journeyPatternIDs[randomTrain]
        if curPatternID not in selectedPatternIDs:
            selectedPatternIDs.append(curPatternID)
            # plot the new pattern
            print('Going to plot a new random train..')
            longitudes, latitudes = GetCoordinates.getCoordinates(
                trainTrajs[randomTrain])
            fileName = "train" + randomTrain.__str__(
            ) + "_Pattern_" + curPatternID + ".html"
            GmPlot.gmPlot(latitudes, longitudes,
                          os.path.join(storeMapsDir, fileName))
            numOfSelectedPatterns += 1
Exemple #2
0
def data_visualization(K):

    dataSets = readDatasets.read_dataset(True, False, False)
    trainSet = dataSets[0]

    #print trainSet.shape[0]  # DEBUG!
    #print trainSet['Trajectory']  # DEBUG!

    journeyPatternIDs, trainTrajs, trainListSize = TrainData.getListsOfTrainData(
        trainSet)

    storeMapsDir = "../../Resources/maps/task1"
    if not os.path.isdir(storeMapsDir):
        os.makedirs(storeMapsDir)

    selectedPatternIDs = []
    numOfSelectedPatterns = 0

    start_time = time.time()
    maxSecondsToWait = 120

    while True:

        if numOfSelectedPatterns == K:
            print 'Finished plotting ' + K.__str__(
            ) + ' distinct random patterns.'
            break
        elif (time.time() - start_time) > maxSecondsToWait:
            print 'The program could not find ' + K.__str__(
            ) + ' distinct random patterns in the specified time: ' + maxSecondsToWait.__str__(
            )
            break

        randomTrain = random.randint(0, trainListSize - 1)
        curPatternID = journeyPatternIDs[randomTrain]
        if curPatternID not in selectedPatternIDs:
            selectedPatternIDs.append(curPatternID)
            # plot the new pattern
            print 'Going to plot a new random train..'
            longtitutes, latitudes = GetCoordinates.getCoordinates(
                trainTrajs[randomTrain])
            GmPlot.gmPlot(
                latitudes, longtitutes, storeMapsDir + "/train" +
                randomTrain.__str__() + "_Pattern_" + curPatternID + ".html")
            numOfSelectedPatterns += 1
def runLCSS(K, dynamic_datasets_path):
    print('\nLCSS start..')

    dataSets = readDatasets.read_dataset(True, False, True,
                                         dynamic_datasets_path)

    trainSet = dataSets[0]
    testSetA2 = dataSets[1]

    journeyPatternIDs, trainTrajs, trainListSize = TrainData.getListsOfTrainData(
        trainSet)

    storeMapsDir = os.path.join('..', '..', 'Resources', 'maps', 'task2A2')
    if not os.path.isdir(storeMapsDir):
        os.makedirs(storeMapsDir)

    kMaxs = KMaxs(K)

    start_time = time.time()
    lastTime = start_time  # For in-the-middle elapsed-time.

    testNum = 0
    for trajectoryTest in testSetA2['Trajectory']:
        # print trajectoryTest

        testNum += 1

        # if testNum <= 2:
        #     continue
        # if testNum == 2:
        #     break

        nearestNeighbors = []
        sorted_subSeqsSizes = []

        print('\nChecking for ' + K.__str__() +
              ' longest common sub-sequences of test ' + testNum.__str__())

        # print trajectoryTest # DEBUG!
        iterations = 0
        sorted_subSequences = []

        for i in range(
                0,
                trainListSize):  # IDs and Trajectories are of the same size.
            # print i

            trajectoryTrain = trainTrajs[i]

            LongestCS = lcs(trajectoryTrain, trajectoryTest)

            if not LongestCS:
                continue
            # else:
            #     print i.__str__() + ') LCS length: ' + len(LongestCS).__str__()

            # if iterations == 10:
            #     break
            # else:
            #     iterations += 1

            kMaxs.checkMinLengthAndInsert([i, LongestCS, len(LongestCS)])

        curTime = time.time()
        curElapsedTime = curTime - lastTime
        lastTime = curTime

        print('\nTest: ' + testNum.__str__() + ') finished in ' +
              time.strftime("%H:%M:%S", time.gmtime(curElapsedTime)))

        # Plot test
        full_longitudes, full_latitudes = GetCoordinates.getCoordinates(
            trajectoryTest)
        fileName = "lcss" + testNum.__str__(
        ) + "-test-Time(sec)_" + curElapsedTime.__str__() + ".html"
        GmPlot.gmPlot(full_latitudes,
                      full_longitudes,
                      os.path.join(storeMapsDir, fileName),
                      zoom=13)

        # So now we pic the top 5 and we plot them....
        sorted_subSequences = sorted(kMaxs.getArrayList(),
                                     reverse=True,
                                     key=lambda tup: tup[2])
        kMaxs.resetArrayList(
        )  # Reset arrayList before going to the next testSet.

        for i in range(0, len(sorted_subSequences)):
            if i == 5: break

            print("Train " + sorted_subSequences[i][0].__str__() + ") PatternID: " \
                  + journeyPatternIDs[sorted_subSequences[i][0]].__str__() \
                  + ", MatchingPoints: " + sorted_subSequences[i][2].__str__() + ".html")

            curSubSeqTrajectory = trainTrajs[sorted_subSequences[i][0]]
            full_longitudes, full_latitudes = GetCoordinates.getCoordinates(
                curSubSeqTrajectory)

            curSubSeqTrajectory = sorted_subSequences[i][1]
            sub_longitudes, sub_latitudes = GetCoordinates.getCoordinates(
                curSubSeqTrajectory)
            fileName = "lcss" + testNum.__str__() + "-train" \
                       + sorted_subSequences[i][0].__str__() + "_PatternID_" \
                       + journeyPatternIDs[sorted_subSequences[i][0]].__str__() \
                       + "-MatchingPoints_" + sorted_subSequences[i][2].__str__() + ".html"
            GmPlot.gmPlotOfColours(full_latitudes, full_longitudes,
                                   sub_latitudes, sub_longitudes,
                                   os.path.join(storeMapsDir, fileName))

    print("\nElapsed time of KNNwithLCSS for 'test_set_a2': ",
          time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)),
          'mins')
Exemple #4
0
def findKnearestNeighbors(K, maxWarpingWindowPercentage, plotPatterns,
                          makeListOfAllNeighbors, trainSet, testSet):

    print('KNN-with-DTW starts..')

    journeyPatternIDs, trainTrajs, trainListSize = TrainData.getListsOfTrainData(
        trainSet)

    if plotPatterns:
        storeMapsDir = "../../Resources/maps/task2A1"
        if not os.path.isdir(storeMapsDir):
            os.makedirs(storeMapsDir)

    dtw = Dtw(max_warping_window_percentage=maxWarpingWindowPercentage)
    kMins = KMins(K)

    sorted_nearestNeighbors_forAllTests = []
    inf_costs_count = 0
    testNum = 0

    start_time = time.time()
    lastTime = start_time  # For in-the-middle elapsed-time.

    for trajectoryTest in testSet['Trajectory']:

        inf_costs_count = 0

        testNum += 1
        # if testNum != 2:
        #     continue
        # if testNum <= 4:
        #     continue
        # if testNum == 3:
        #     print 'InfCount: ' + inf_costs_count.__str__()
        #     break

        print('\nChecking for ' + K.__str__() + ' nearest-neighbors of test ' +
              testNum.__str__() + '..')

        # print trajectoryTest # DEBUG!
        min_cost = 1000
        minJourneyPatternId = ''
        min_i = 0

        for i in range(
                0,
                trainListSize):  # IDs and Trajectories are of the same size.

            # if i == 1500:
            #     break

            curPatternID = journeyPatternIDs[i]
            trajectoryTrain = trainTrajs[i]

            cost = dtw._dtw_distance(trajectoryTest, trajectoryTrain)

            if math.isinf(cost):
                # diff = abs(len(trajectoryTrain) - len(trajectoryTest))
                # print 'returned inf while having a diff of: ' + diff.__str__()
                #print 'We have an inf cost in DTW: test:' + testNum.__str__() + ' - trainPatternID: ' + curPatternID
                inf_costs_count += 1
                continue

            kMins.checkMaxCostAndInsert([i, curPatternID, cost])

            # if not makeListOfAllNeighbors and cost <= 25.5:
            #     print testNum.__str__() + '-' + i.__str__() + ') Cost (lessOrEqual to 25.5): ' + cost.__str__()\
            #           + ' from  journeyPatternID: ' + minJourneyPatternId.__str__()

            if cost < min_cost:
                min_cost = cost
                min_i = i
                minJourneyPatternId = curPatternID
                print(testNum.__str__() + '-' + min_i.__str__() + ') Found new minCost: ' + min_cost.__str__()\
                      + ' from journeyPatternID: ' + minJourneyPatternId.__str__())

        curTime = time.time()
        curElapsedTime = curTime - lastTime
        lastTime = curTime

        sorted_nearestNeighbors = sorted(kMins.getArrayList(),
                                         key=lambda tup: tup[2])
        kMins.resetArrayList(
        )  # Reset arrayList before going to the next testSet.

        print('\nTest: ' + testNum.__str__() + ') finished in ' + time.strftime("%H:%M:%S", time.gmtime(curElapsedTime))\
            + '\nMax warping window percentage: ' + maxWarpingWindowPercentage.__str__() + '\n\'Inf\' costs found in this test: ' + inf_costs_count.__str__() \
            + '\nMin_journeyPatternId: ' + minJourneyPatternId.__str__() + ' Min_i: ' + min_i.__str__() + ' Min_cost: ' + min_cost.__str__() \
            + '\nSorted mins: ')

        if plotPatterns:
            # Plot test
            longtitutes, latitudes = GetCoordinates.getCoordinates(
                trajectoryTest)
            GmPlot.gmPlot(latitudes,
                          longtitutes,
                          storeMapsDir + "/dtw" + testNum.__str__() +
                          "-test-Time(sec)_" + curElapsedTime.__str__() +
                          ".html",
                          zoom=13)

        sorted_nearestNeighbors_forTest = []
        # Plot trains
        for i in range(0, len(sorted_nearestNeighbors)):

            print("i: ", sorted_nearestNeighbors[i][0], ", PatternID: ", sorted_nearestNeighbors[i][1]\
                , ", DTW-cost: ", sorted_nearestNeighbors[i][2])

            # Make a list with all the neighbours
            if makeListOfAllNeighbors:
                sorted_nearestNeighbors_forTest.append(
                    sorted_nearestNeighbors[i][1])
            if plotPatterns:
                curTrainTrajectory = trainTrajs[sorted_nearestNeighbors[i][0]]
                longtitutes, latitudes = GetCoordinates.getCoordinates(
                    curTrainTrajectory)
                GmPlot.gmPlot(
                    latitudes,
                    longtitutes,
                    storeMapsDir + "/dtw" + testNum.__str__() + "-train" +
                    sorted_nearestNeighbors[i][0].__str__() + "-PatternID_" +
                    sorted_nearestNeighbors[i][1].__str__() + "-DTW_" +
                    sorted_nearestNeighbors[i][2].__str__() + ".html",
                    zoom=13)

        if makeListOfAllNeighbors:
            # Make a list with all the neighbours for all the tests
            sorted_nearestNeighbors_forAllTests.append(
                sorted_nearestNeighbors_forTest)

    print("\nElapsed time of KNNwithDTW for 'test_set_a1': ",
          time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)))

    return sorted_nearestNeighbors_forAllTests