def data_visualization(K, dynamic_datasets_path): print("Going to visualize bus-trajectories..") dataSets = readDatasets.read_dataset(True, False, False, dynamic_datasets_path) trainSet = dataSets[0] #print trainSet.shape[0] # DEBUG! #print trainSet['Trajectory'] # DEBUG! journeyPatternIDs, trainTrajs, trainListSize = TrainData.getListsOfTrainData( trainSet) storeMapsDir = os.path.join('..', '..', 'Resources', 'maps', 'task1') if not os.path.isdir(storeMapsDir): os.makedirs(storeMapsDir) selectedPatternIDs = [] numOfSelectedPatterns = 0 start_time = time.time() maxSecondsToWait = 120 while True: if numOfSelectedPatterns == K: print('Finished plotting ' + K.__str__() + ' distinct random patterns.') break elif (time.time() - start_time) > maxSecondsToWait: print('The program could not find ' + K.__str__() + ' distinct random patterns in the specified time: ' + maxSecondsToWait.__str__()) break randomTrain = random.randint(0, trainListSize - 1) curPatternID = journeyPatternIDs[randomTrain] if curPatternID not in selectedPatternIDs: selectedPatternIDs.append(curPatternID) # plot the new pattern print('Going to plot a new random train..') longitudes, latitudes = GetCoordinates.getCoordinates( trainTrajs[randomTrain]) fileName = "train" + randomTrain.__str__( ) + "_Pattern_" + curPatternID + ".html" GmPlot.gmPlot(latitudes, longitudes, os.path.join(storeMapsDir, fileName)) numOfSelectedPatterns += 1
def data_visualization(K): dataSets = readDatasets.read_dataset(True, False, False) trainSet = dataSets[0] #print trainSet.shape[0] # DEBUG! #print trainSet['Trajectory'] # DEBUG! journeyPatternIDs, trainTrajs, trainListSize = TrainData.getListsOfTrainData( trainSet) storeMapsDir = "../../Resources/maps/task1" if not os.path.isdir(storeMapsDir): os.makedirs(storeMapsDir) selectedPatternIDs = [] numOfSelectedPatterns = 0 start_time = time.time() maxSecondsToWait = 120 while True: if numOfSelectedPatterns == K: print 'Finished plotting ' + K.__str__( ) + ' distinct random patterns.' break elif (time.time() - start_time) > maxSecondsToWait: print 'The program could not find ' + K.__str__( ) + ' distinct random patterns in the specified time: ' + maxSecondsToWait.__str__( ) break randomTrain = random.randint(0, trainListSize - 1) curPatternID = journeyPatternIDs[randomTrain] if curPatternID not in selectedPatternIDs: selectedPatternIDs.append(curPatternID) # plot the new pattern print 'Going to plot a new random train..' longtitutes, latitudes = GetCoordinates.getCoordinates( trainTrajs[randomTrain]) GmPlot.gmPlot( latitudes, longtitutes, storeMapsDir + "/train" + randomTrain.__str__() + "_Pattern_" + curPatternID + ".html") numOfSelectedPatterns += 1
def runLCSS(K, dynamic_datasets_path): print('\nLCSS start..') dataSets = readDatasets.read_dataset(True, False, True, dynamic_datasets_path) trainSet = dataSets[0] testSetA2 = dataSets[1] journeyPatternIDs, trainTrajs, trainListSize = TrainData.getListsOfTrainData( trainSet) storeMapsDir = os.path.join('..', '..', 'Resources', 'maps', 'task2A2') if not os.path.isdir(storeMapsDir): os.makedirs(storeMapsDir) kMaxs = KMaxs(K) start_time = time.time() lastTime = start_time # For in-the-middle elapsed-time. testNum = 0 for trajectoryTest in testSetA2['Trajectory']: # print trajectoryTest testNum += 1 # if testNum <= 2: # continue # if testNum == 2: # break nearestNeighbors = [] sorted_subSeqsSizes = [] print('\nChecking for ' + K.__str__() + ' longest common sub-sequences of test ' + testNum.__str__()) # print trajectoryTest # DEBUG! iterations = 0 sorted_subSequences = [] for i in range( 0, trainListSize): # IDs and Trajectories are of the same size. # print i trajectoryTrain = trainTrajs[i] LongestCS = lcs(trajectoryTrain, trajectoryTest) if not LongestCS: continue # else: # print i.__str__() + ') LCS length: ' + len(LongestCS).__str__() # if iterations == 10: # break # else: # iterations += 1 kMaxs.checkMinLengthAndInsert([i, LongestCS, len(LongestCS)]) curTime = time.time() curElapsedTime = curTime - lastTime lastTime = curTime print('\nTest: ' + testNum.__str__() + ') finished in ' + time.strftime("%H:%M:%S", time.gmtime(curElapsedTime))) # Plot test full_longitudes, full_latitudes = GetCoordinates.getCoordinates( trajectoryTest) fileName = "lcss" + testNum.__str__( ) + "-test-Time(sec)_" + curElapsedTime.__str__() + ".html" GmPlot.gmPlot(full_latitudes, full_longitudes, os.path.join(storeMapsDir, fileName), zoom=13) # So now we pic the top 5 and we plot them.... sorted_subSequences = sorted(kMaxs.getArrayList(), reverse=True, key=lambda tup: tup[2]) kMaxs.resetArrayList( ) # Reset arrayList before going to the next testSet. for i in range(0, len(sorted_subSequences)): if i == 5: break print("Train " + sorted_subSequences[i][0].__str__() + ") PatternID: " \ + journeyPatternIDs[sorted_subSequences[i][0]].__str__() \ + ", MatchingPoints: " + sorted_subSequences[i][2].__str__() + ".html") curSubSeqTrajectory = trainTrajs[sorted_subSequences[i][0]] full_longitudes, full_latitudes = GetCoordinates.getCoordinates( curSubSeqTrajectory) curSubSeqTrajectory = sorted_subSequences[i][1] sub_longitudes, sub_latitudes = GetCoordinates.getCoordinates( curSubSeqTrajectory) fileName = "lcss" + testNum.__str__() + "-train" \ + sorted_subSequences[i][0].__str__() + "_PatternID_" \ + journeyPatternIDs[sorted_subSequences[i][0]].__str__() \ + "-MatchingPoints_" + sorted_subSequences[i][2].__str__() + ".html" GmPlot.gmPlotOfColours(full_latitudes, full_longitudes, sub_latitudes, sub_longitudes, os.path.join(storeMapsDir, fileName)) print("\nElapsed time of KNNwithLCSS for 'test_set_a2': ", time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)), 'mins')
def findKnearestNeighbors(K, maxWarpingWindowPercentage, plotPatterns, makeListOfAllNeighbors, trainSet, testSet): print('KNN-with-DTW starts..') journeyPatternIDs, trainTrajs, trainListSize = TrainData.getListsOfTrainData( trainSet) if plotPatterns: storeMapsDir = "../../Resources/maps/task2A1" if not os.path.isdir(storeMapsDir): os.makedirs(storeMapsDir) dtw = Dtw(max_warping_window_percentage=maxWarpingWindowPercentage) kMins = KMins(K) sorted_nearestNeighbors_forAllTests = [] inf_costs_count = 0 testNum = 0 start_time = time.time() lastTime = start_time # For in-the-middle elapsed-time. for trajectoryTest in testSet['Trajectory']: inf_costs_count = 0 testNum += 1 # if testNum != 2: # continue # if testNum <= 4: # continue # if testNum == 3: # print 'InfCount: ' + inf_costs_count.__str__() # break print('\nChecking for ' + K.__str__() + ' nearest-neighbors of test ' + testNum.__str__() + '..') # print trajectoryTest # DEBUG! min_cost = 1000 minJourneyPatternId = '' min_i = 0 for i in range( 0, trainListSize): # IDs and Trajectories are of the same size. # if i == 1500: # break curPatternID = journeyPatternIDs[i] trajectoryTrain = trainTrajs[i] cost = dtw._dtw_distance(trajectoryTest, trajectoryTrain) if math.isinf(cost): # diff = abs(len(trajectoryTrain) - len(trajectoryTest)) # print 'returned inf while having a diff of: ' + diff.__str__() #print 'We have an inf cost in DTW: test:' + testNum.__str__() + ' - trainPatternID: ' + curPatternID inf_costs_count += 1 continue kMins.checkMaxCostAndInsert([i, curPatternID, cost]) # if not makeListOfAllNeighbors and cost <= 25.5: # print testNum.__str__() + '-' + i.__str__() + ') Cost (lessOrEqual to 25.5): ' + cost.__str__()\ # + ' from journeyPatternID: ' + minJourneyPatternId.__str__() if cost < min_cost: min_cost = cost min_i = i minJourneyPatternId = curPatternID print(testNum.__str__() + '-' + min_i.__str__() + ') Found new minCost: ' + min_cost.__str__()\ + ' from journeyPatternID: ' + minJourneyPatternId.__str__()) curTime = time.time() curElapsedTime = curTime - lastTime lastTime = curTime sorted_nearestNeighbors = sorted(kMins.getArrayList(), key=lambda tup: tup[2]) kMins.resetArrayList( ) # Reset arrayList before going to the next testSet. print('\nTest: ' + testNum.__str__() + ') finished in ' + time.strftime("%H:%M:%S", time.gmtime(curElapsedTime))\ + '\nMax warping window percentage: ' + maxWarpingWindowPercentage.__str__() + '\n\'Inf\' costs found in this test: ' + inf_costs_count.__str__() \ + '\nMin_journeyPatternId: ' + minJourneyPatternId.__str__() + ' Min_i: ' + min_i.__str__() + ' Min_cost: ' + min_cost.__str__() \ + '\nSorted mins: ') if plotPatterns: # Plot test longtitutes, latitudes = GetCoordinates.getCoordinates( trajectoryTest) GmPlot.gmPlot(latitudes, longtitutes, storeMapsDir + "/dtw" + testNum.__str__() + "-test-Time(sec)_" + curElapsedTime.__str__() + ".html", zoom=13) sorted_nearestNeighbors_forTest = [] # Plot trains for i in range(0, len(sorted_nearestNeighbors)): print("i: ", sorted_nearestNeighbors[i][0], ", PatternID: ", sorted_nearestNeighbors[i][1]\ , ", DTW-cost: ", sorted_nearestNeighbors[i][2]) # Make a list with all the neighbours if makeListOfAllNeighbors: sorted_nearestNeighbors_forTest.append( sorted_nearestNeighbors[i][1]) if plotPatterns: curTrainTrajectory = trainTrajs[sorted_nearestNeighbors[i][0]] longtitutes, latitudes = GetCoordinates.getCoordinates( curTrainTrajectory) GmPlot.gmPlot( latitudes, longtitutes, storeMapsDir + "/dtw" + testNum.__str__() + "-train" + sorted_nearestNeighbors[i][0].__str__() + "-PatternID_" + sorted_nearestNeighbors[i][1].__str__() + "-DTW_" + sorted_nearestNeighbors[i][2].__str__() + ".html", zoom=13) if makeListOfAllNeighbors: # Make a list with all the neighbours for all the tests sorted_nearestNeighbors_forAllTests.append( sorted_nearestNeighbors_forTest) print("\nElapsed time of KNNwithDTW for 'test_set_a1': ", time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))) return sorted_nearestNeighbors_forAllTests