def runClassification(K, dynamic_datasets_path): print('Start KNN Classification..') dataSets = readDatasets.read_dataset(True, False, True, dynamic_datasets_path) trainSet = dataSets[0] testSet = dataSets[1] makeListsOfNeighborsForAllTests = True plotPatterns = False # We just want the KNN, not the html-maps. maxWarpingWindowPercentage = 0.33 # For testSet_a2, we need a bigger window to get the right patternIDs. ### Run KNN for test_a2 neighborsTestsLists = findKnearestNeighbors( K, maxWarpingWindowPercentage, plotPatterns, makeListsOfNeighborsForAllTests, trainSet, testSet) testData = getVotes(neighborsTestsLists) write_predictions_to_csv(testData) ### Run cross-validation. crossValidation(trainSet, K, maxWarpingWindowPercentage, num_folds=10)
def data_visualization(K, dynamic_datasets_path): print("Going to visualize bus-trajectories..") dataSets = readDatasets.read_dataset(True, False, False, dynamic_datasets_path) trainSet = dataSets[0] #print trainSet.shape[0] # DEBUG! #print trainSet['Trajectory'] # DEBUG! journeyPatternIDs, trainTrajs, trainListSize = TrainData.getListsOfTrainData( trainSet) storeMapsDir = os.path.join('..', '..', 'Resources', 'maps', 'task1') if not os.path.isdir(storeMapsDir): os.makedirs(storeMapsDir) selectedPatternIDs = [] numOfSelectedPatterns = 0 start_time = time.time() maxSecondsToWait = 120 while True: if numOfSelectedPatterns == K: print('Finished plotting ' + K.__str__() + ' distinct random patterns.') break elif (time.time() - start_time) > maxSecondsToWait: print('The program could not find ' + K.__str__() + ' distinct random patterns in the specified time: ' + maxSecondsToWait.__str__()) break randomTrain = random.randint(0, trainListSize - 1) curPatternID = journeyPatternIDs[randomTrain] if curPatternID not in selectedPatternIDs: selectedPatternIDs.append(curPatternID) # plot the new pattern print('Going to plot a new random train..') longitudes, latitudes = GetCoordinates.getCoordinates( trainTrajs[randomTrain]) fileName = "train" + randomTrain.__str__( ) + "_Pattern_" + curPatternID + ".html" GmPlot.gmPlot(latitudes, longitudes, os.path.join(storeMapsDir, fileName)) numOfSelectedPatterns += 1
def runA1andA2(K, dynamic_datasets_path): dataSets = readDatasets.read_dataset(True, True, False, dynamic_datasets_path) trainSet = dataSets[0] testSet = dataSets[1] maxWarpingWindowPercentage = 0.11 makeListOfAllNeighbors = False plotPatterns = True task2A1.findKnearestNeighbors(K, maxWarpingWindowPercentage, plotPatterns, makeListOfAllNeighbors, trainSet, testSet) # This doesn't take any dataSets as parameters.. it gets them on its own task2A2.runLCSS(K, dynamic_datasets_path)
def data_visualization(K): dataSets = readDatasets.read_dataset(True, False, False) trainSet = dataSets[0] #print trainSet.shape[0] # DEBUG! #print trainSet['Trajectory'] # DEBUG! journeyPatternIDs, trainTrajs, trainListSize = TrainData.getListsOfTrainData( trainSet) storeMapsDir = "../../Resources/maps/task1" if not os.path.isdir(storeMapsDir): os.makedirs(storeMapsDir) selectedPatternIDs = [] numOfSelectedPatterns = 0 start_time = time.time() maxSecondsToWait = 120 while True: if numOfSelectedPatterns == K: print 'Finished plotting ' + K.__str__( ) + ' distinct random patterns.' break elif (time.time() - start_time) > maxSecondsToWait: print 'The program could not find ' + K.__str__( ) + ' distinct random patterns in the specified time: ' + maxSecondsToWait.__str__( ) break randomTrain = random.randint(0, trainListSize - 1) curPatternID = journeyPatternIDs[randomTrain] if curPatternID not in selectedPatternIDs: selectedPatternIDs.append(curPatternID) # plot the new pattern print 'Going to plot a new random train..' longtitutes, latitudes = GetCoordinates.getCoordinates( trainTrajs[randomTrain]) GmPlot.gmPlot( latitudes, longtitutes, storeMapsDir + "/train" + randomTrain.__str__() + "_Pattern_" + curPatternID + ".html") numOfSelectedPatterns += 1
def runLCSStest(): # get two lists # f = open("lists.txt") # contents = f.read().split("\n") # list1 = [int(i) for i in contents[0].split(",")] # list2 = [int(i) for i in contents[1].split(",")] dataSets = readDatasets.read_dataset(True, False, True) list1 = dataSets[0] list2 = dataSets[1] lists = lcs(list1, list2, True) # here we have all most common subs # so now we pic the top 5 and plot them.... for l in lists: print(l)
def runLCSS(K, dynamic_datasets_path): print('\nLCSS start..') dataSets = readDatasets.read_dataset(True, False, True, dynamic_datasets_path) trainSet = dataSets[0] testSetA2 = dataSets[1] journeyPatternIDs, trainTrajs, trainListSize = TrainData.getListsOfTrainData( trainSet) storeMapsDir = os.path.join('..', '..', 'Resources', 'maps', 'task2A2') if not os.path.isdir(storeMapsDir): os.makedirs(storeMapsDir) kMaxs = KMaxs(K) start_time = time.time() lastTime = start_time # For in-the-middle elapsed-time. testNum = 0 for trajectoryTest in testSetA2['Trajectory']: # print trajectoryTest testNum += 1 # if testNum <= 2: # continue # if testNum == 2: # break nearestNeighbors = [] sorted_subSeqsSizes = [] print('\nChecking for ' + K.__str__() + ' longest common sub-sequences of test ' + testNum.__str__()) # print trajectoryTest # DEBUG! iterations = 0 sorted_subSequences = [] for i in range( 0, trainListSize): # IDs and Trajectories are of the same size. # print i trajectoryTrain = trainTrajs[i] LongestCS = lcs(trajectoryTrain, trajectoryTest) if not LongestCS: continue # else: # print i.__str__() + ') LCS length: ' + len(LongestCS).__str__() # if iterations == 10: # break # else: # iterations += 1 kMaxs.checkMinLengthAndInsert([i, LongestCS, len(LongestCS)]) curTime = time.time() curElapsedTime = curTime - lastTime lastTime = curTime print('\nTest: ' + testNum.__str__() + ') finished in ' + time.strftime("%H:%M:%S", time.gmtime(curElapsedTime))) # Plot test full_longitudes, full_latitudes = GetCoordinates.getCoordinates( trajectoryTest) fileName = "lcss" + testNum.__str__( ) + "-test-Time(sec)_" + curElapsedTime.__str__() + ".html" GmPlot.gmPlot(full_latitudes, full_longitudes, os.path.join(storeMapsDir, fileName), zoom=13) # So now we pic the top 5 and we plot them.... sorted_subSequences = sorted(kMaxs.getArrayList(), reverse=True, key=lambda tup: tup[2]) kMaxs.resetArrayList( ) # Reset arrayList before going to the next testSet. for i in range(0, len(sorted_subSequences)): if i == 5: break print("Train " + sorted_subSequences[i][0].__str__() + ") PatternID: " \ + journeyPatternIDs[sorted_subSequences[i][0]].__str__() \ + ", MatchingPoints: " + sorted_subSequences[i][2].__str__() + ".html") curSubSeqTrajectory = trainTrajs[sorted_subSequences[i][0]] full_longitudes, full_latitudes = GetCoordinates.getCoordinates( curSubSeqTrajectory) curSubSeqTrajectory = sorted_subSequences[i][1] sub_longitudes, sub_latitudes = GetCoordinates.getCoordinates( curSubSeqTrajectory) fileName = "lcss" + testNum.__str__() + "-train" \ + sorted_subSequences[i][0].__str__() + "_PatternID_" \ + journeyPatternIDs[sorted_subSequences[i][0]].__str__() \ + "-MatchingPoints_" + sorted_subSequences[i][2].__str__() + ".html" GmPlot.gmPlotOfColours(full_latitudes, full_longitudes, sub_latitudes, sub_longitudes, os.path.join(storeMapsDir, fileName)) print("\nElapsed time of KNNwithLCSS for 'test_set_a2': ", time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time)), 'mins')
storeMapsDir + "/dtw" + testNum.__str__() + "-train" + sorted_nearestNeighbors[i][0].__str__() + "-PatternID_" + sorted_nearestNeighbors[i][1].__str__() + "-DTW_" + sorted_nearestNeighbors[i][2].__str__() + ".html", zoom=13) if makeListOfAllNeighbors: # Make a list with all the neighbours for all the tests sorted_nearestNeighbors_forAllTests.append( sorted_nearestNeighbors_forTest) print("\nElapsed time of KNNwithDTW for 'test_set_a1': ", time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))) return sorted_nearestNeighbors_forAllTests if __name__ == '__main__': dataSets = readDatasets.read_dataset(True, True, False) trainSet = dataSets[0] testSetA1 = dataSets[1] K = 5 plotPatterns = True makeListOfAllNeighbors = False maxWarpingWindowPercentage = 0.11 findKnearestNeighbors(K, maxWarpingWindowPercentage, plotPatterns, makeListOfAllNeighbors, trainSet, testSetA1)
os.path.join(storeMapsDir, fileName), zoom=13) if makeListOfAllNeighbors: # Make a list with all the neighbours for all the tests sorted_nearestNeighbors_forAllTests.append( sorted_nearestNeighbors_forTest) print("\nElapsed time of KNNwithDTW for 'test_set_a1': ", time.strftime("%H:%M:%S", time.gmtime(time.time() - start_time))) return sorted_nearestNeighbors_forAllTests if __name__ == '__main__': dynamic_datasets_path = os.path.join('..', '..') dataSets = readDatasets.read_dataset(True, True, False, dynamic_datasets_path) trainSet = dataSets[0] testSetA1 = dataSets[1] K = 5 plotPatterns = True makeListOfAllNeighbors = False maxWarpingWindowPercentage = 0.11 findKnearestNeighbors(K, maxWarpingWindowPercentage, plotPatterns, makeListOfAllNeighbors, trainSet, testSetA1) exit()