def main(): vessel_type = raw_input( "Please input the vessel type to search for raw .csv files named with IMO:" ) n = raw_input( "Please specify how many .csv files you wish to copy over(Press Enter for All):" ) capacity = sys.maxint if (n != ''): capacity = int(n) utils.queryPath("{vessel_type}/input".format(vessel_type=vessel_type)) dict_IMO = searchInputsForVesselType(vessel_type) print "number of IMOs with the specified type of {vessel_type} found:".format( vessel_type=vessel_type), len(dict_IMO) data_source = 'dataSource/dynamic/' count = 0 for file_name in os.listdir(data_source): if (file_name.find(".csv") != -1): if (file_name[:file_name.find(".csv")] in dict_IMO): """copy over to the input folder under vessel_type folder""" copyfile('{data_source}/{file_name}'.format(data_source = data_source, file_name = file_name), \ "{vessel_type}/input/{file_name}".format(vessel_type = vessel_type, file_name = file_name)) count += 1 if (count >= capacity): break
def clusterTrajectories(trajectories, fname, path, metric_func=trajectoryDissimilarityL2, user_distance_matrix=None, criterion='distance'): """ trajectories: the trajectories need to be in XY coordinates """ plot_path = utils.queryPath(path + "/plots") if (user_distance_matrix is None): distance_matrix = getTrajectoryDistanceMatrix(trajectories, metric_func) writeToCSV.saveData(distance_matrix, path + "/" + fname) # save the distance_matrix else: distance_matrix = user_distance_matrix assert len(distance_matrix) == len( trajectories ), "distance_matrix (n, n) and trajectories(n) should have same number of samples" print "distance_matrix:\n", distance_matrix v = DIST.squareform(distance_matrix) cluster_result = HAC.linkage(v, method="average") dg = HAC.dendrogram(cluster_result) plt.xlabel("cluster_dengrogram_{fname}".format(fname=fname)) plt.savefig("{path}/cluster_dengrogram_{fname}.png".format(fname=fname, path=plot_path)) plt.clf() if (criterion == 'distance'): if (metric_func == trajectoryDissimilarityL2): this_cluster_label = HAC.fcluster( Z=cluster_result, t=1 * 1000, criterion='distance') # distance for l2 measure elif (metric_func == trajectoryDissimilarityCenterMass): this_cluster_label = HAC.fcluster( Z=cluster_result, t=1.5, criterion='distance') # distance for center of mass measure elif (criterion == 'inconsistent'): this_cluster_label = HAC.fcluster(Z=cluster_result, t=0.8, criterion='inconsistent') print "this_cluster_label:", this_cluster_label, "number of clusters:", len( set(this_cluster_label)) """Plot the representative trajectories""" plotRepresentativeTrajectory(this_cluster_label, trajectories, \ fname = "cluster_centroids_{n}_classes".format(n = len(set(this_cluster_label))), \ path = plot_path, show = False) return this_cluster_label, [this_cluster_label], []
def main(): vessel_type = raw_input("Please input the vessel type to search for raw .csv files named with IMO:") n = raw_input("Please specify how many .csv files you wish to copy over(Press Enter for All):") capacity = sys.maxint if (n != ''): capacity = int(n) utils.queryPath("{vessel_type}/input".format(vessel_type = vessel_type)) dict_IMO = searchInputsForVesselType(vessel_type) print "number of IMOs with the specified type of {vessel_type} found:".format(vessel_type = vessel_type), len(dict_IMO) data_source = 'dataSource/dynamic/' count = 0 for file_name in os.listdir(data_source): if (file_name.find(".csv") != -1): if (file_name[:file_name.find(".csv")] in dict_IMO): """copy over to the input folder under vessel_type folder""" copyfile('{data_source}/{file_name}'.format(data_source = data_source, file_name = file_name), \ "{vessel_type}/input/{file_name}".format(vessel_type = vessel_type, file_name = file_name)) count += 1 if (count >= capacity): break
def checkStatus(): if checkIfRunning(os.environ["COIN"].lower()): getUsedPort(os.environ["COIN"].lower()) stop(os.environ["COIN"].lower()) else: os.environ["PORT"] = utils.queryPort("Port to start: ") os.environ["BLOCKCHAIN_PATH"] = utils.queryPath( os.environ["COIN"].lower()) os.environ["SSL_PORT"] = utils.queryPort("Port to start (SSL): ") utils.askSSL() os.environ["STAGE"] = "PRO" setup(os.environ["COIN"].lower())
def clusterTrajectories( trajectories, fname, path, metric_func=trajectoryDissimilarityL2, user_distance_matrix=None, criterion="distance" ): """ trajectories: the trajectories need to be in XY coordinates """ plot_path = utils.queryPath(path + "/plots") if user_distance_matrix is None: distance_matrix = getTrajectoryDistanceMatrix(trajectories, metric_func) writeToCSV.saveData(distance_matrix, path + "/" + fname) # save the distance_matrix else: distance_matrix = user_distance_matrix assert len(distance_matrix) == len( trajectories ), "distance_matrix (n, n) and trajectories(n) should have same number of samples" print "distance_matrix:\n", distance_matrix v = DIST.squareform(distance_matrix) cluster_result = HAC.linkage(v, method="average") dg = HAC.dendrogram(cluster_result) plt.xlabel("cluster_dengrogram_{fname}".format(fname=fname)) plt.savefig("{path}/cluster_dengrogram_{fname}.png".format(fname=fname, path=plot_path)) plt.clf() if criterion == "distance": if metric_func == trajectoryDissimilarityL2: this_cluster_label = HAC.fcluster( Z=cluster_result, t=1 * 1000, criterion="distance" ) # distance for l2 measure elif metric_func == trajectoryDissimilarityCenterMass: this_cluster_label = HAC.fcluster( Z=cluster_result, t=1.5, criterion="distance" ) # distance for center of mass measure elif criterion == "inconsistent": this_cluster_label = HAC.fcluster(Z=cluster_result, t=0.8, criterion="inconsistent") print "this_cluster_label:", this_cluster_label, "number of clusters:", len(set(this_cluster_label)) """Plot the representative trajectories""" plotRepresentativeTrajectory( this_cluster_label, trajectories, fname="cluster_centroids_{n}_classes".format(n=len(set(this_cluster_label))), path=plot_path, show=False, ) return this_cluster_label, [this_cluster_label], []
def main(): # assume that the input data are sorted utils.GEOSCALE = 600000.0 record_mmsi = True # filename = "3916119.csv" # filename = "1000019.csv" # filename = "9261126.csv" # ship type is 40, High Speed Craft # fileNames = ["8514019.csv", "9116943.csv", "9267118.csv", "9443140.csv", "9383986.csv", "9343340.csv", "9417464.csv", "9664225.csv", "9538440.csv", "9327138.csv"] root_folder = raw_input("Input name for root_folder:") out_sample_test = raw_input("Out Sample Test?(y/n)") == "y" input_path = "{folder}/input/".format( folder=root_folder + ("/out_sample_test" if (out_sample_test) else "")) fileNames = [] for input_filename in os.listdir(input_path): if (input_filename.find(".csv") != -1): fileNames.append(input_filename) if (out_sample_test): foldername = "{root_folder}/out_sample_test/cleanedData".format( root_folder=root_folder) else: foldername = "{root_folder}/cleanedData".format( root_folder=root_folder) utils.queryPath(foldername) aggregateData = None for index in range(0, len(fileNames)): filename = fileNames[index] data = [] countSpeedGreaterThan10 = 0 maxSpeed = 0 with open( '{input_path}/{filename}'.format(input_path=input_path, filename=filename), 'rU') as csvfile: reader = csv.DictReader(csvfile, dialect=csv.excel_tab, delimiter=',') # skip the first iteration # iterrows = iter(reader) # next(iterrows) for row in reader: if (int(row["message_type"]) == 1 or int(row["message_type"]) == 2 or int(row["message_type"]) == 3): # if the Lat Lon info is not available, skip if (float(row["latitude"]) / utils.GEOSCALE == 91 or float(row["longitude"]) / utils.GEOSCALE == 181): continue time_str = row['timeStamp'] timestamp = time.strptime( time_str, "%Y-%m-%dT%H:%M:%SZ") # parse the time dt_seconds = datetime.datetime( *timestamp[:6]).strftime("%s") # print x, y #Speed over ground speedOverGround = float(row["speed_over_ground"]) / 10.0 if (speedOverGround > 1): countSpeedGreaterThan10 += 1 if (speedOverGround > maxSpeed and speedOverGround != 102.3): #1023 indicates speed not available maxSpeed = speedOverGround trajectory_point = [ \ int(row["navigation_status"]), \ float(row["rate_of_turn"]), \ speedOverGround,float(row["latitude"])/utils.GEOSCALE, \ float(row["longitude"])/utils.GEOSCALE, \ float(row["course_over_ground"]), \ float(row["true_heading"]), \ int(dt_seconds)] if (record_mmsi): trajectory_point.append(long(row["mmsi"])) data.append(trajectory_point) data = np.asarray(data) print "before cleaning data.shape:", data.shape # Clean out data that has trasient movement that does not make sense: for example, shift 1 km in 1 minute or soemthing print "Count of Time instances that speed is > 10 knot:", countSpeedGreaterThan10 print "maxSpeed:", maxSpeed i = 1 while (i < data.shape[0]): print "checking:", i if (isErrorData(data[i - 1], data[i], maxSpeed)): data = np.delete(data, i, 0) else: i += 1 print "after cleaning error data.shape:", data.shape if (aggregateData is None): aggregateData = data else: aggregateData = np.concatenate((aggregateData, data), axis=0) # writeToCSV.saveArray(data, "{foldername}/{f}".format(foldername = foldername, f = filename[0:filename.find(".")])) writeToCSV.writeDataToCSVWithMMSI( data, path=foldername, file_name=filename[0:filename.find(".")]) print "aggregateData.shape:", aggregateData.shape # writeToCSV.saveArray(aggregateData, "{foldername}/{f}".format(foldername = foldername, f = "aggregateData")) # writeToCSV.writeDataToCSV(aggregateData, foldername, "aggregateData") writeToCSV.writeDataToCSVWithMMSI(aggregateData, foldername, "aggregateData_with_mmsi") # xy_coordinate = [item[3:5] for item in data] # xy_coordinate = np.asarray(xy_coordinate) # print xy_coordinate.shape c # plt.scatter([item[3] for item in data], [item[4] for item in data]) # plt.savefig("vessel_points.png") # plt.show() return
def main(): root_folder = raw_input("Input the root_folder name:") """ Firstly, extract all .csv input file names from {root_folder}/input/*.csv """ # filenames = ["8514019.csv", "9116943.csv", "9267118.csv", "9443140.csv", "9383986.csv", "9343340.csv", "9417464.csv", "9664225.csv", "9538440.csv", "9327138.csv"] # filenames = ["9664225.csv"] # filenames = ["8514019.csv"] filenames = [] for input_filename in os.listdir("{root_folder}/input/".format(root_folder = root_folder)): if (input_filename.find(".csv") != -1): filenames.append(input_filename) """ Get min distance between vessels """ need_compute_mindistance = raw_input("Need to compute min_distance_matrix for vessel interaction? (y/n) :") == 'y' if (need_compute_mindistance): """sort the aggregateData with MMSI based on TS""" data_with_mmsi = writeToCSV.readDataFromCSVWithMMSI(path = root_folder + "/cleanedData", filename = "aggregateData_with_mmsi.csv") data_with_mmsi_sorted = compute_mindistance.sortDataBasedOnTS(data_with_mmsi) writeToCSV.writeDataToCSVWithMMSI(data_with_mmsi_sorted, root_folder + "/cleanedData", "aggregateData_with_mmsi_sorted") """Apply the computing of min distance using a timed window""" data_with_mmsi_sorted = writeToCSV.readDataFromCSVWithMMSI(path = root_folder + "/cleanedData", filename = "aggregateData_with_mmsi_sorted.csv") mmsi_set = compute_mindistance.getSetOfMMSI(data_with_mmsi_sorted) print mmsi_set print list(mmsi_set) start_time = time.time() mmsi_list_dict, min_distance_matrix, vessel_distance_speed_dict = \ compute_mindistance.computeVesselMinDistanceMatrix(data_with_mmsi_sorted, TIME_WINDOW = 1800) writeToCSV.saveData([{ \ 'mmsi_list_dict': mmsi_list_dict, \ 'min_distance_matrix': min_distance_matrix, \ 'vessel_distance_speed_dict': vessel_distance_speed_dict }], filename = root_folder + "/cleanedData" + "/min_distance_matrix_with_mmsi_time_window_1800_sec") print "time spent:", time.time() - start_time """From already computed""" # min_distance_matrix_result = writeToCSV.loadData(\ # root_folder + "/cleanedData" + "/min_distance_matrix_with_mmsi_time_window_1800_sec.npz") # print "min_distance_matrix_result type:\n", type(min_distance_matrix_result) # mmsi_list_dict = min_distance_matrix_result[0]["mmsi_list_dict"] # min_distance_matrix = min_distance_matrix_result[0]["min_distance_matrix"] # vessel_distance_speed_dict = min_distance_matrix_result[0]["vessel_distance_speed_dict"] # print "min_distance_matrix loaded:\n", min_distance_matrix # min_of_min_distance = sys.maxint # for i in range(0, min_distance_matrix.shape[0]): # for j in range(i + 1, min_distance_matrix.shape[1]): # if (min_distance_matrix[i][j] < min_of_min_distance): # min_of_min_distance = min_distance_matrix[i][j] # print "min_distance_matrix min of 10 tankers:", min_of_min_distance """write min distance records for Agent Based Simulator""" writeToCSV.writeVesselSpeedToDistance(\ path = utils.queryPath(root_folder+"LearningResult"),\ file_name = "vessel_speed_to_distance", \ vessel_distance_speed_dict = vessel_distance_speed_dict) writeToCSV.writeVesselMinDistanceMatrix(\ path = utils.queryPath(root_folder+"LearningResult"), \ file_name = "vessel_min_distance_matrix", \ mmsi_list_dict = mmsi_list_dict, \ min_distance_matrix = min_distance_matrix) writeToCSV.writeMMSIs(\ path = utils.queryPath(root_folder+"LearningResult"), \ file_name = "mmsi_list", \ mmsi_list = [key for key, index in mmsi_list_dict.iteritems()]) """ Test Clustering """ # trajectories_to_cluster = writeToCSV.loadData(root_folder + "/" + "all_OD_trajectories_with_1D_data_refined.npz") # # trajectories_to_cluster = writeToCSV.loadData(root_folder + "/" + "all_OD_trajectories_cleaned.npz") # # trajectories_to_cluster = writeToCSV.loadData(root_folder + "/" + "all_OD_trajectories_9664225.npz") # print "trajectories_to_cluster.shape: ", trajectories_to_cluster.shape # print "type(trajectories_to_cluster): ", type(trajectories_to_cluster) # print "len(trajectories_to_cluster): ", len(trajectories_to_cluster) # # convert Lat, Lon to XY for clustering # all_OD_trajectories_XY = convertListOfTrajectoriesToXY(utils.CENTER_LAT_SG, utils.CENTER_LON_SG, trajectories_to_cluster) # executeClustering(root_folder = root_folder, \ # all_OD_trajectories_XY = all_OD_trajectories_XY, \ # reference_lat = utils.CENTER_LAT_SG, \ # reference_lon = utils.CENTER_LON_SG, \ # filenames = filenames) # raise ValueError("purpose stop for testing clustering") """ plot out the value space of the features, speed, accelerations, etc, for the aggregateData """ # filename = "aggregateData.npz" # path = "tankers/cleanedData" # data = writeToCSV.loadArray("{p}/{f}".format(p = path, f=filename)) # for trajectory in trajectories_to_cluster: # plotter.plotFeatureSpace(trajectory) # raise ValueError("For plotting feature space only") """ Read the cleaned .csv input files form {root_folder}/cleanedData/ Extract endpoints """ endpoints = None all_OD_trajectories = [] utils.queryPath("{root_folder}/endpoints".format(root_folder = root_folder)) utils.queryPath("{root_folder}/trajectories".format(root_folder = root_folder)) for i in range(0, len(filenames)): this_vessel_trajectory_points = writeToCSV.readDataFromCSV(root_folder + "/cleanedData", filenames[i]) # Extract end points, along with MMSI this_vessel_endpoints = np.asarray(extractEndPoints(writeToCSV.readDataFromCSVWithMMSI(root_folder + "/cleanedData", filenames[i]))) # Save end points, along with MMSI writeToCSV.writeDataToCSVWithMMSI( \ this_vessel_endpoints, \ root_folder + "/endpoints", \ "{filename}_endpoints".format(filename = filenames[i][:filenames[i].find(".")])) print "this_vessel_endpoints.shape:", this_vessel_endpoints.shape # Append to the total end points if(endpoints is None): endpoints = this_vessel_endpoints else: endpoints = np.concatenate((endpoints, this_vessel_endpoints), axis=0) for s in range (0, len(this_vessel_endpoints) - 1): originLatitude = this_vessel_endpoints[s][utils.dataDict["latitude"]] originLongtitude = this_vessel_endpoints[s][utils.dataDict["longitude"]] origin_ts = this_vessel_endpoints[s][utils.dataDict["ts"]] endLatitude = this_vessel_endpoints[s + 1][utils.dataDict["latitude"]] endLongtitude = this_vessel_endpoints[s + 1][utils.dataDict["longitude"]] end_ts = this_vessel_endpoints[s + 1][utils.dataDict["ts"]] """Extracting trajectory between a pair of OD""" print "\n\nextracting endpoints between ", s, " and ", s + 1 OD_trajectories, OD_trajectories_lat_lon = extractTrajectoriesUntilOD(\ this_vessel_trajectory_points, \ origin_ts, \ originLatitude, \ originLongtitude, \ end_ts, \ endLatitude, \ endLongtitude, \ show = False, save = True, clean = False, \ fname = filenames[i][:filenames[i].find(".")] + "_trajectory_between_endpoint{s}_and{e}".format(s = s, e = s + 1)) # there will be one trajectory between each OD assert (len(OD_trajectories) > 0), "OD_trajectories extracted must have length > 0" print "number of trajectory points extracted : ", len(OD_trajectories[0]) if(len(OD_trajectories[0]) > 2): # more than just the origin and destination endpoints along the trajectory writeToCSV.writeDataToCSV( \ data = OD_trajectories_lat_lon[0], path = root_folder + "/trajectories", \ file_name = "{filename}_trajectory_endpoint_{s}_to_{e}".format(filename = filenames[i][:filenames[i].find(".")], \ s = s, \ e = s + 1)) """ Interpolation based on pure geographical trajectory, ignore temporal information """ interpolated_OD_trajectories = interpolator.geographicalTrajetoryInterpolation(OD_trajectories) plotter.plotListOfTrajectories( \ interpolated_OD_trajectories, \ show = False, \ clean = True, \ save = True, \ fname = filenames[i][:filenames[i].find(".")] + "_interpolated_algo_3_between_endpoint{s}_and{e}".format(\ s = s, \ e = s + 1)) """ Interpolation of 1D data: speed, rate_of_turn, etc; interpolated_OD_trajectories / OD_trajectories are both in X, Y coordinates """ if(len(interpolated_OD_trajectories) > 0): interpolated_OD_trajectories[0] = interpolator.interpolate1DFeatures( \ interpolated_OD_trajectories[0], \ OD_trajectories[0]) # change X, Y coordinate to Lat, Lon interpolated_OD_trajectories_lat_lon = convertListOfTrajectoriesToLatLon( \ originLatitude, originLongtitude, interpolated_OD_trajectories) if(len(interpolated_OD_trajectories_lat_lon) > 0): # since there should be only one trajectory between each pair of OD all_OD_trajectories.append(interpolated_OD_trajectories_lat_lon[0]) else: print "no trajectories extracted between endpoints ", s , " and ", s + 1 plt.clf() assert (not endpoints is None), "Error!: No endpoints extracted from the historial data of vessels" + "_".join(filenames) print "Final endpoints.shape:", endpoints.shape print "number of interpolated all_OD_trajectories:", len(all_OD_trajectories) """ save the augmented trajectories between endpoints as npz data file and the plot """ # remove error trajectories that are too far from Singapore all_OD_trajectories = utils.removeErrorTrajectoryFromList(all_OD_trajectories) writeToCSV.saveData(all_OD_trajectories, root_folder + "/all_OD_trajectories_with_1D_data") # convert Lat, Lon to XY for displaying all_OD_trajectories_XY = convertListOfTrajectoriesToXY(utils.CENTER_LAT_SG, utils.CENTER_LON_SG, all_OD_trajectories) plotter.plotListOfTrajectories(all_OD_trajectories_XY, show = False, clean = True, save = True, \ fname = "{root_folder}_all_OD_trajectories".format(root_folder = root_folder)) """ Execute Clustering """ executeClustering(root_folder = root_folder, \ all_OD_trajectories_XY = all_OD_trajectories_XY, \ reference_lat = utils.CENTER_LAT_SG, \ reference_lon = utils.CENTER_LON_SG, \ filenames = filenames)
def executeClustering(root_folder, all_OD_trajectories_XY, reference_lat, reference_lon, filenames): fname = "{root_folder}_dissimilarity_l2_cophenetic_distance".format(root_folder = root_folder) # fname = "10_tankers_dissimilarity_l2_inconsistent_refined_endpoints" # fname = "10_tankers_dissimilarity_l2_cophenetic_distance_refined_endpoints" # fname = "10_tankers_dissimilarity_center_mass_cophenetic_distance_refined_endpoints" # fname = "10_tankers_dissimilarity_l2_inconsistent" # fname = "10_tankers_dissimilarity_l2_all_K" # fname = "10_tankers_dissimilarity_center_mass" # fname = "10_tankers_dissimilarity_center_mass_cophenetic_distance_cleaned" # fname = "10_tankers_dissimilarity_center_mass_inconsistent_cleaned" opt_cluster_label , cluster_labels, CH_indexes = clustering_worker.clusterTrajectories( \ trajectories = all_OD_trajectories_XY, \ fname = fname, \ path = utils.queryPath("tankers/cluster_result/{folder}".format(folder = fname)), \ metric_func = clustering_worker.trajectoryDissimilarityL2, \ # metric_func = clustering_worker.trajectoryDissimilarityCenterMass, \ # user_distance_matrix = writeToCSV.loadData(root_folder + \ # "/cluster_result/10_tankers_dissimilarity_center_mass/10_tankers_dissimilarity_center_mass_cleaned.npz"), \ # user_distance_matrix = writeToCSV.loadData(root_folder + \ # "/cluster_result/10_tankers_dissimilarity_l2_cophenetic_distance_cleaned/10_tankers_dissimilarity_l2_cophenetic_distance_cleaned.npz"), \ # user_distance_matrix = writeToCSV.loadData(root_folder + \ # "/cluster_result/10_tankers_dissimilarity_l2_cophenetic_distance_refined_endpoints" + \ # "/10_tankers_dissimilarity_l2_cophenetic_distance_refined_endpoints.npz"), \ # user_distance_matrix = writeToCSV.loadData(root_folder + \ # "/cluster_result/10_tankers_dissimilarity_center_mass_cophenetic_distance_refined_endpoints" + \ # "/10_tankers_dissimilarity_center_mass_cophenetic_distance_refined_endpoints.npz"), \ criterion = 'distance') print "opt_cluster_label:", opt_cluster_label print "opt_num_cluster:", len(set(opt_cluster_label)) # print "distance between 1 and 4, should be quite small:", clustering_worker.trajectoryDissimilarityL2( \ # all_OD_trajectories_XY[1], all_OD_trajectories_XY[4]) # print "distance between 0 and 4, should be quite large:", clustering_worker.trajectoryDissimilarityL2( \ # all_OD_trajectories_XY[0], all_OD_trajectories_XY[4]) # print "center of mass measure distance between 1 and 4, should be quite small:", clustering_worker.trajectoryDissimilarityCenterMass( \ # all_OD_trajectories_XY[1], all_OD_trajectories_XY[4]) # print "center of mass measure distance between 0 and 4, should be quite large:", clustering_worker.trajectoryDissimilarityCenterMass( \ # all_OD_trajectories_XY[0], all_OD_trajectories_XY[4]) # print "matrix:\n", clustering_worker.getTrajectoryDistanceMatrix(\ # all_OD_trajectories_XY, \ # metric_func = clustering_worker.trajectoryDissimilarityL2) # plotter.plotListOfTrajectories(all_OD_trajectories_XY, show = True, clean = True, save = False, fname = "") """Construct the endpoints to representative trajectory mapping""" endpoints = None for filename in filenames: this_vessel_endpoints = writeToCSV.readDataFromCSVWithMMSI( \ root_folder + "/endpoints", \ "{filename}_endpoints.csv".format(filename = filename[:filename.find(".")])) # Append to the total end points if(endpoints is None): endpoints = this_vessel_endpoints else: endpoints = np.concatenate((endpoints, this_vessel_endpoints), axis=0) cluster_centroids = clustering_worker.getClusterCentroids(opt_cluster_label, all_OD_trajectories_XY) cluster_centroids_lat_lon = {} # [cluster_label : centroid] dictionary for cluster_label, centroid in cluster_centroids.iteritems(): cluster_centroids_lat_lon[cluster_label] = convertListOfTrajectoriesToLatLon(reference_lat, reference_lon, \ [copy.deepcopy(centroid)])[0] # writeToCSV.writeDataToCSV(np.asarray(cluster_centroids_lat_lon[cluster_label]), root_folder + "/cleanedData/DEBUGGING", \ # "refined_centroid_{i}".format(i = cluster_label)) # flatten cluster_centroids_lat_lon_flattened = [point for cluster_label, centroid in cluster_centroids_lat_lon.iteritems() \ for point in centroid] writeToCSV.writeDataToCSV(np.asarray(cluster_centroids_lat_lon_flattened), root_folder + "/cleanedData", \ "centroids_" + fname) """array of centroids written to .npz""" writeToCSV.saveData([centroid for cluster_label, centroid in cluster_centroids_lat_lon.iteritems()], \ root_folder + "/cleanedData/centroids_arr") # raise ValueError("purpose stop for clusering only") """DEBUGGING,using unrefined data""" # point_to_examine = (1.2625833, 103.6827) # point_to_examine_XY = utils.LatLonToXY(reference_lat,reference_lon,point_to_examine[0], point_to_examine[1]) # augmented_trajectories_from_point_to_examine_index = [] # augmented_trajectories_from_point_to_examine = [] # for i in range(0, len(all_OD_trajectories_XY)): # trajectory = all_OD_trajectories_XY[i] # if (np.linalg.norm([ \ # point_to_examine_XY[0] - trajectory[0][utils.data_dict_x_y_coordinate["x"]], \ # point_to_examine_XY[1] - trajectory[0][utils.data_dict_x_y_coordinate["y"]]], 2) < utils.NEIGHBOURHOOD_ENDPOINT): # augmented_trajectories_from_point_to_examine_index.append(i) # augmented_trajectories_from_point_to_examine.append(trajectory) # print "augmented_trajectories_from_point_to_examine_index:", augmented_trajectories_from_point_to_examine_index, \ # "starting pos:", trajectory[0][utils.data_dict_x_y_coordinate["x"]], trajectory[0][utils.data_dict_x_y_coordinate["y"]] # print "augmented_trajectories_from_point_to_examine_index:", augmented_trajectories_from_point_to_examine_index # augmented_trajectories_from_point_to_examine = convertListOfTrajectoriesToLatLon(reference_lat, reference_lon, copy.deepcopy(augmented_trajectories_from_point_to_examine)) # for t in range(0, len(augmented_trajectories_from_point_to_examine)): # writeToCSV.writeDataToCSV(np.asarray(augmented_trajectories_from_point_to_examine[t]), root_folder + "/cleanedData/DEBUGGING", \ # "DEBUGGING_augmented_{t}".format(t = augmented_trajectories_from_point_to_examine_index[t])) # augmented_trajectories_from_point_to_examine_clusters = [] # for i in augmented_trajectories_from_point_to_examine_index: # augmented_trajectories_from_point_to_examine_clusters.append(opt_cluster_label[i]) # augmented_trajectories_from_point_to_examine_clusters_unique = list(set(augmented_trajectories_from_point_to_examine_clusters)) # class_trajectories_dict = clustering_worker.formClassTrajectoriesDict(opt_cluster_label, all_OD_trajectories_XY) # for i in augmented_trajectories_from_point_to_examine_clusters_unique: # writeToCSV.writeDataToCSV(np.asarray(cluster_centroids_lat_lon[i]), root_folder + "/cleanedData/DEBUGGING", \ # "DEBUGGING_centroid_{i}".format(i = i)) # print "cluster_centroids[{i}], starting point:".format(i = i), cluster_centroids[i][0] # """save all trajectories under this cluster i """ # class_trajectories = class_trajectories_dict[i] # class_trajectories_lat_lon = convertListOfTrajectoriesToLatLon(reference_lat, reference_lon, copy.deepcopy(class_trajectories)) # for j in range(0, len(class_trajectories_lat_lon)): # print "class_trajectories[{i}], starting point:".format(i = i), class_trajectories[j][0] # writeToCSV.writeDataToCSV(np.asarray(class_trajectories_lat_lon[j]), \ # utils.queryPath(root_folder + "/cleanedData/DEBUGGING/CLASS{i}".format(i = i)) , \ # "DEBUGGING_class_{i}_trajectory_{j}".format(i = i , j = j)) """END DEBUGGING""" endpoints_cluster_dict = endPointsToRepresentativeTrajectoryMapping(\ endpoints, \ all_OD_trajectories_XY , \ opt_cluster_label, \ reference_lat, \ reference_lon) empty_endpoints = [] augmented_index_to_extra_label_mapping = {} # mapping from normal index to appended index in all_protocol_trajectories cluster_label_to_cluster_size = {} # 'cluster size' of the appended augmented trajectory in all_protocol_trajectories all_protocol_trajectories = [] # indexed by cluster label (offset by 1, cluster 1 -> all_protocol_trajectories[0]) for label in range(np.min(opt_cluster_label), np.max(opt_cluster_label) + 1): assert (label in cluster_centroids_lat_lon), "{label} is supposed to be in the cluster_centroids_lat_lon dict".format(label = label) all_protocol_trajectories.append(cluster_centroids_lat_lon[label]) cluster_label_to_cluster_size[label - 1] = len(np.where(opt_cluster_label == label)[0]) assert(np.sum([size for label, size in cluster_label_to_cluster_size.iteritems()]) == len(opt_cluster_label)), "sum of individual label size should == total count" """ assign augmented trajectories to empty endpoints: True/False """ assign_augmented_to_empty_enpoints_flag = False DEBUG_APPEND_INDEXS = [] if (assign_augmented_to_empty_enpoints_flag): for endpoint_str, endpoint_tuple_list in endpoints_cluster_dict.iteritems(): endpoint_starting_clusters = [item.cluster for item in endpoint_tuple_list] # get the list of cluster_labels of centroids to a certain endpoint if (len(endpoint_starting_clusters) == 0): """If no centroid assigned, then assign the original augmented trajectory""" this_empty_endpoint = lookForEndPoints(endpoints, endpoint_str) # endpoints is in lat, lon if (this_empty_endpoint is None): raise ValueError("Error! should always be able to map back endpoints, but {p} is not found".format(p = endpoint_str)) empty_endpoints.append(this_empty_endpoint) point_to_examine_XY = utils.LatLonToXY(reference_lat,reference_lon, \ this_empty_endpoint[utils.dataDict["latitude"]], this_empty_endpoint[utils.dataDict["longitude"]]) augmented_trajectories_from_point_to_examine_index = [] augmented_trajectories_from_point_to_examine = [] for i in range(0, len(all_OD_trajectories_XY)): trajectory = all_OD_trajectories_XY[i] if (np.linalg.norm([ \ point_to_examine_XY[0] - trajectory[0][utils.data_dict_x_y_coordinate["x"]], \ point_to_examine_XY[1] - trajectory[0][utils.data_dict_x_y_coordinate["y"]]], 2) < utils.NEIGHBOURHOOD_ENDPOINT): augmented_trajectories_from_point_to_examine_index.append(i) augmented_trajectories_from_point_to_examine.append(trajectory) # print "this found augmented_trajectories_from_point_to_examine_index:", \ # augmented_trajectories_from_point_to_examine_index, \ # "starting pos:", \ # trajectory[0][utils.data_dict_x_y_coordinate["x"]], \ # trajectory[0][utils.data_dict_x_y_coordinate["y"]] print "all indexes (w.r.t all_OD_trajectories_XY) for this_empty_endpoint:", augmented_trajectories_from_point_to_examine_index DEBUG_APPEND_INDEXS.append(augmented_trajectories_from_point_to_examine_index) """Append augmented_trajectories_from_point_to_examine to end of array of centroids and give extra label""" for augmented_index in augmented_trajectories_from_point_to_examine_index: if (not augmented_index in augmented_index_to_extra_label_mapping): # if this normal trajectory is not appened, append it and mark in the augmented_index_to_extra_label_mapping augmented_index_to_extra_label_mapping[augmented_index] = len(all_protocol_trajectories) cluster_label_to_cluster_size[augmented_index_to_extra_label_mapping[augmented_index]] = 1 all_protocol_trajectories.append(\ convertListOfTrajectoriesToLatLon(reference_lat, reference_lon, \ [copy.deepcopy(all_OD_trajectories_XY[augmented_index])])[0]) else: cluster_label_to_cluster_size[augmented_index_to_extra_label_mapping[augmented_index]] += 1 endpoints_cluster_dict[endpoint_str].append(utils.ClusterCentroidTuple(\ cluster = augmented_index_to_extra_label_mapping[augmented_index], \ centroid = all_protocol_trajectories[augmented_index_to_extra_label_mapping[augmented_index]])) """Asserting and Saving of info for Agent Based Simulator""" assert (len(set([index for index_list in DEBUG_APPEND_INDEXS for index in index_list])) == \ len(all_protocol_trajectories) - len(set(opt_cluster_label))), \ "size of appended augmented trajectories should == len(DEBUG_APPEND_INDEXS)" for index in range(0, len(all_protocol_trajectories)): assert(index in cluster_label_to_cluster_size), "all_protocol_trajectories's index mapping to cluster should be complete" for label, size in cluster_label_to_cluster_size.iteritems(): print "label, size:", label, size print "number of endpoints that do not have clusters assigned to:", len(empty_endpoints) print "total number of endpoints:", len(endpoints) writeToCSV.writeDataToCSVWithMMSI(np.asarray(endpoints), root_folder + "/endpoints", "all_endpoints_with_MMSI") writeToCSV.writeDataToCSV(np.asarray(empty_endpoints), root_folder + "/cleanedData", \ "non_starting_endpoints_{root_folder}_dissimilarity_l2_cophenetic_distance_cleaned".format(root_folder = root_folder)) writeToCSV.saveData([endpoints_cluster_dict], \ filename = root_folder + "/cleanedData" + "/endpoints_cluster_dict" + fname) """write all the all_protocol_trajectories for DEBUGGING purpose""" for i in range(0, len(all_protocol_trajectories)): protocol_trajectory = all_protocol_trajectories[i] writeToCSV.writeDataToCSV(\ np.asarray(protocol_trajectory), \ utils.queryPath(root_folder + "/cleanedData/DEBUGGING/ALL_PROTOCOLS_PATTERN_ONLY"), \ "all_protocol_{i}".format(i = i)) """Save related csv files for Agent Based Simulator""" writeToCSV.writeAllProtocolTrajectories(\ path = utils.queryPath(root_folder+"LearningResult"), \ file_name = "protocol_trajectories_with_cluster_size", \ all_protocol_trajectories = all_protocol_trajectories, \ cluster_label_to_cluster_size = cluster_label_to_cluster_size) writeToCSV.writeEndPointsToProtocolTrajectoriesIndexesWithMMSI(\ path = utils.queryPath(root_folder+"LearningResult"), \ file_name = "endpoints_to_protocol_trajectories", \ endpoints = endpoints, \ endpoints_cluster_dict = endpoints_cluster_dict)
def main(): metric_to_use = int( raw_input("use metric?\n" + "1. l2\n" + "2. center of mass\n")) root_folder = "tankers/out_sample_test" """read centroids""" centroids = None if (metric_to_use == 1): centroids = writeToCSV.loadData( "tankers/cleanedData/centroids_arr_l2.npz") elif (metric_to_use == 2): centroids = writeToCSV.loadData( "tankers/cleanedData/centroids_arr_center_mass.npz") """Extract endpoints, trajectories, augmentation""" filenames = [ "9050462.csv", "9259769.csv", "9327138.csv", "9408475.csv", "9417464.csv", "9548440.csv" ] # for out sample test # filenames = ["9408475.csv"] endpoints = None all_OD_trajectories = [] """Do the augmentation if not yet done""" if (not os.path.exists(root_folder + "/all_OD_trajectories_with_1D_data.npz")): for i in range(0, len(filenames)): this_vessel_trajectory_points = writeToCSV.readDataFromCSV( root_folder + "/cleanedData", filenames[i]) # Extract end points, along with MMSI this_vessel_endpoints = np.asarray( trajectory_modeller.extractEndPoints( writeToCSV.readDataFromCSVWithMMSI( root_folder + "/cleanedData", filenames[i]))) # Save end points, along with MMSI writeToCSV.writeDataToCSVWithMMSI( \ this_vessel_endpoints, \ utils.queryPath(root_folder + "/endpoints"), \ "{filename}_endpoints".format(filename = filenames[i][:filenames[i].find(".")])) print "this_vessel_endpoints.shape:", this_vessel_endpoints.shape # Append to the total end points if (endpoints is None): endpoints = this_vessel_endpoints else: endpoints = np.concatenate((endpoints, this_vessel_endpoints), axis=0) for s in range(0, len(this_vessel_endpoints) - 1): originLatitude = this_vessel_endpoints[s][ utils.dataDict["latitude"]] originLongtitude = this_vessel_endpoints[s][ utils.dataDict["longitude"]] origin_ts = this_vessel_endpoints[s][utils.dataDict["ts"]] endLatitude = this_vessel_endpoints[s + 1][ utils.dataDict["latitude"]] endLongtitude = this_vessel_endpoints[s + 1][ utils.dataDict["longitude"]] end_ts = this_vessel_endpoints[s + 1][utils.dataDict["ts"]] """Extracting trajectory between a pair of OD""" print "\n\nextracting endpoints between ", s, " and ", s + 1 OD_trajectories, OD_trajectories_lat_lon = trajectory_modeller.extractTrajectoriesUntilOD(\ this_vessel_trajectory_points, \ origin_ts, \ originLatitude, \ originLongtitude, \ end_ts, \ endLatitude, \ endLongtitude, \ show = False, save = True, clean = False, \ fname = filenames[i][:filenames[i].find(".")] + "_trajectory_between_endpoint{s}_and{e}".format(s = s, e = s + 1), \ path = utils.queryPath(root_folder + "/plots")) # there will be one trajectory between each OD assert (len(OD_trajectories) > 0), "OD_trajectories extracted must have length > 0" print "number of trajectory points extracted : ", len( OD_trajectories[0]) if ( len(OD_trajectories[0]) > 2 ): # more than just the origin and destination endpoints along the trajectory writeToCSV.writeDataToCSV( \ data = OD_trajectories_lat_lon[0], path = utils.queryPath(root_folder + "/trajectories"), \ file_name = "{filename}_trajectory_endpoint_{s}_to_{e}".format(filename = filenames[i][:filenames[i].find(".")], \ s = s, \ e = s + 1)) """ Interpolation based on pure geographical trajectory, ignore temporal information """ interpolated_OD_trajectories = interpolator.geographicalTrajetoryInterpolation( OD_trajectories) plotter.plotListOfTrajectories( \ interpolated_OD_trajectories, \ show = False, \ clean = True, \ save = True, \ fname = filenames[i][:filenames[i].find(".")] + "_interpolated_algo_3_between_endpoint{s}_and{e}".format(\ s = s, \ e = s + 1), \ path = utils.queryPath(root_folder + "/plots")) """ Interpolation of 1D data: speed, rate_of_turn, etc; interpolated_OD_trajectories / OD_trajectories are both in X, Y coordinates """ if (len(interpolated_OD_trajectories) > 0): interpolated_OD_trajectories[0] = interpolator.interpolate1DFeatures( \ interpolated_OD_trajectories[0], \ OD_trajectories[0]) # change X, Y coordinate to Lat, Lon interpolated_OD_trajectories_lat_lon = trajectory_modeller.convertListOfTrajectoriesToLatLon( \ originLatitude, originLongtitude, interpolated_OD_trajectories) if (len(interpolated_OD_trajectories_lat_lon) > 0): # since there should be only one trajectory between each pair of OD all_OD_trajectories.append( interpolated_OD_trajectories_lat_lon[0]) else: print "no trajectories extracted between endpoints ", s, " and ", s + 1 plt.clf() assert ( not endpoints is None ), "Error!: No endpoints extracted from the historial data of vessels" + "_".join( filenames) print "Final endpoints.shape:", endpoints.shape print "number of interpolated all_OD_trajectories:", len( all_OD_trajectories) all_OD_trajectories = utils.removeErrorTrajectoryFromList( all_OD_trajectories) writeToCSV.saveData(all_OD_trajectories, root_folder + "/all_OD_trajectories_with_1D_data") else: all_OD_trajectories = writeToCSV.loadData( root_folder + "/all_OD_trajectories_with_1D_data.npz") """convert Lat, Lon to XY for displaying""" all_OD_trajectories_XY = trajectory_modeller.convertListOfTrajectoriesToXY( utils.CENTER_LAT_SG, utils.CENTER_LON_SG, all_OD_trajectories) plotter.plotListOfTrajectories(\ all_OD_trajectories_XY, \ show = True, \ clean = True, \ save = False, \ fname = "out_sample_tanker_all_OD_trajectories", path = utils.queryPath(root_folder + "/plots")) """Test distance to cluster centroids""" centroids_XY = trajectory_modeller.convertListOfTrajectoriesToXY(\ utils.CENTER_LAT_SG, utils.CENTER_LON_SG, centroids) for i in range(0, len(all_OD_trajectories_XY)): this_tr_XY = all_OD_trajectories_XY[i] if (metric_to_use == 1): this_tr_centroids_dist, according_pattern_index = minDistanceAgainstCentroids(\ this_tr_XY, centroids_XY, clustering_worker.trajectoryDissimilarityL2) print "augmented trajectories[{i}]".format(i = i), \ "'s best l2 distance is against cluster centroids[{i}], = ".format(i = according_pattern_index), \ this_tr_centroids_dist, ", max allowed distance = ", 1000 elif (metric_to_use == 2): this_tr_centroids_dist, according_pattern_index = minDistanceAgainstCentroids(\ this_tr_XY, centroids_XY, clustering_worker.trajectoryDissimilarityCenterMass) print "augmented trajectories[{i}]".format(i = i), \ "'s best center of mass distance is against cluster centroids[{i}], = ".format(i = according_pattern_index), \ this_tr_centroids_dist, ", max allowed distance = ", 1.5 # plotter.plotFeatureSpace(centroids[according_pattern_index]) # plotter.plotFeatureSpace(\ # trajectory_modeller.convertListOfTrajectoriesToLatLon(utils.CENTER_LAT_SG, utils.CENTER_LON_SG, [this_tr_XY])[0]) return
def main(): # assume that the input data are sorted utils.GEOSCALE = 600000.0 record_mmsi = True # filename = "3916119.csv" # filename = "1000019.csv" # filename = "9261126.csv" # ship type is 40, High Speed Craft # fileNames = ["8514019.csv", "9116943.csv", "9267118.csv", "9443140.csv", "9383986.csv", "9343340.csv", "9417464.csv", "9664225.csv", "9538440.csv", "9327138.csv"] root_folder = raw_input("Input name for root_folder:") out_sample_test = raw_input("Out Sample Test?(y/n)") == "y" input_path = "{folder}/input/".format(folder = root_folder + ("/out_sample_test" if (out_sample_test) else "")) fileNames = [] for input_filename in os.listdir(input_path): if (input_filename.find(".csv") != -1): fileNames.append(input_filename) if (out_sample_test): foldername = "{root_folder}/out_sample_test/cleanedData".format(root_folder = root_folder) else: foldername = "{root_folder}/cleanedData".format(root_folder = root_folder) utils.queryPath(foldername) aggregateData = None for index in range(0, len(fileNames)): filename = fileNames[index] data = [] countSpeedGreaterThan10 = 0 maxSpeed = 0 with open('{input_path}/{filename}'.format(input_path = input_path, filename = filename), 'rU') as csvfile: reader = csv.DictReader(csvfile, dialect=csv.excel_tab, delimiter = ',') # skip the first iteration # iterrows = iter(reader) # next(iterrows) for row in reader: if(int(row["message_type"]) == 1 or int(row["message_type"]) == 2 or int(row["message_type"]) == 3): # if the Lat Lon info is not available, skip if(float(row["latitude"])/utils.GEOSCALE == 91 or float(row["longitude"])/utils.GEOSCALE == 181): continue time_str = row['timeStamp'] timestamp = time.strptime(time_str, "%Y-%m-%dT%H:%M:%SZ") # parse the time dt_seconds = datetime.datetime(*timestamp[:6]).strftime("%s") # print x, y #Speed over ground speedOverGround = float(row["speed_over_ground"])/10.0 if(speedOverGround > 1): countSpeedGreaterThan10 += 1 if(speedOverGround > maxSpeed and speedOverGround != 102.3): #1023 indicates speed not available maxSpeed = speedOverGround trajectory_point = [ \ int(row["navigation_status"]), \ float(row["rate_of_turn"]), \ speedOverGround,float(row["latitude"])/utils.GEOSCALE, \ float(row["longitude"])/utils.GEOSCALE, \ float(row["course_over_ground"]), \ float(row["true_heading"]), \ int(dt_seconds)] if (record_mmsi): trajectory_point.append(long(row["mmsi"])) data.append(trajectory_point) data = np.asarray(data) print "before cleaning data.shape:", data.shape # Clean out data that has trasient movement that does not make sense: for example, shift 1 km in 1 minute or soemthing print "Count of Time instances that speed is > 10 knot:", countSpeedGreaterThan10 print "maxSpeed:", maxSpeed i = 1 while(i < data.shape[0]): print "checking:", i if(isErrorData(data[i-1], data[i], maxSpeed)): data = np.delete(data, i, 0) else: i += 1 print "after cleaning error data.shape:", data.shape if(aggregateData is None): aggregateData = data else: aggregateData = np.concatenate((aggregateData, data), axis=0) # writeToCSV.saveArray(data, "{foldername}/{f}".format(foldername = foldername, f = filename[0:filename.find(".")])) writeToCSV.writeDataToCSVWithMMSI(data, path = foldername, file_name = filename[0:filename.find(".")]) print "aggregateData.shape:", aggregateData.shape # writeToCSV.saveArray(aggregateData, "{foldername}/{f}".format(foldername = foldername, f = "aggregateData")) # writeToCSV.writeDataToCSV(aggregateData, foldername, "aggregateData") writeToCSV.writeDataToCSVWithMMSI(aggregateData, foldername, "aggregateData_with_mmsi") # xy_coordinate = [item[3:5] for item in data] # xy_coordinate = np.asarray(xy_coordinate) # print xy_coordinate.shape c # plt.scatter([item[3] for item in data], [item[4] for item in data]) # plt.savefig("vessel_points.png") # plt.show() return
def main(): metric_to_use = int(raw_input("use metric?\n" + "1. l2\n" + "2. center of mass\n")) root_folder = "tankers/out_sample_test" """read centroids""" centroids = None if metric_to_use == 1: centroids = writeToCSV.loadData("tankers/cleanedData/centroids_arr_l2.npz") elif metric_to_use == 2: centroids = writeToCSV.loadData("tankers/cleanedData/centroids_arr_center_mass.npz") """Extract endpoints, trajectories, augmentation""" filenames = [ "9050462.csv", "9259769.csv", "9327138.csv", "9408475.csv", "9417464.csv", "9548440.csv", ] # for out sample test # filenames = ["9408475.csv"] endpoints = None all_OD_trajectories = [] """Do the augmentation if not yet done""" if not os.path.exists(root_folder + "/all_OD_trajectories_with_1D_data.npz"): for i in range(0, len(filenames)): this_vessel_trajectory_points = writeToCSV.readDataFromCSV(root_folder + "/cleanedData", filenames[i]) # Extract end points, along with MMSI this_vessel_endpoints = np.asarray( trajectory_modeller.extractEndPoints( writeToCSV.readDataFromCSVWithMMSI(root_folder + "/cleanedData", filenames[i]) ) ) # Save end points, along with MMSI writeToCSV.writeDataToCSVWithMMSI( this_vessel_endpoints, utils.queryPath(root_folder + "/endpoints"), "{filename}_endpoints".format(filename=filenames[i][: filenames[i].find(".")]), ) print "this_vessel_endpoints.shape:", this_vessel_endpoints.shape # Append to the total end points if endpoints is None: endpoints = this_vessel_endpoints else: endpoints = np.concatenate((endpoints, this_vessel_endpoints), axis=0) for s in range(0, len(this_vessel_endpoints) - 1): originLatitude = this_vessel_endpoints[s][utils.dataDict["latitude"]] originLongtitude = this_vessel_endpoints[s][utils.dataDict["longitude"]] origin_ts = this_vessel_endpoints[s][utils.dataDict["ts"]] endLatitude = this_vessel_endpoints[s + 1][utils.dataDict["latitude"]] endLongtitude = this_vessel_endpoints[s + 1][utils.dataDict["longitude"]] end_ts = this_vessel_endpoints[s + 1][utils.dataDict["ts"]] """Extracting trajectory between a pair of OD""" print "\n\nextracting endpoints between ", s, " and ", s + 1 OD_trajectories, OD_trajectories_lat_lon = trajectory_modeller.extractTrajectoriesUntilOD( this_vessel_trajectory_points, origin_ts, originLatitude, originLongtitude, end_ts, endLatitude, endLongtitude, show=False, save=True, clean=False, fname=filenames[i][: filenames[i].find(".")] + "_trajectory_between_endpoint{s}_and{e}".format(s=s, e=s + 1), path=utils.queryPath(root_folder + "/plots"), ) # there will be one trajectory between each OD assert len(OD_trajectories) > 0, "OD_trajectories extracted must have length > 0" print "number of trajectory points extracted : ", len(OD_trajectories[0]) if ( len(OD_trajectories[0]) > 2 ): # more than just the origin and destination endpoints along the trajectory writeToCSV.writeDataToCSV( data=OD_trajectories_lat_lon[0], path=utils.queryPath(root_folder + "/trajectories"), file_name="{filename}_trajectory_endpoint_{s}_to_{e}".format( filename=filenames[i][: filenames[i].find(".")], s=s, e=s + 1 ), ) """ Interpolation based on pure geographical trajectory, ignore temporal information """ interpolated_OD_trajectories = interpolator.geographicalTrajetoryInterpolation(OD_trajectories) plotter.plotListOfTrajectories( interpolated_OD_trajectories, show=False, clean=True, save=True, fname=filenames[i][: filenames[i].find(".")] + "_interpolated_algo_3_between_endpoint{s}_and{e}".format(s=s, e=s + 1), path=utils.queryPath(root_folder + "/plots"), ) """ Interpolation of 1D data: speed, rate_of_turn, etc; interpolated_OD_trajectories / OD_trajectories are both in X, Y coordinates """ if len(interpolated_OD_trajectories) > 0: interpolated_OD_trajectories[0] = interpolator.interpolate1DFeatures( interpolated_OD_trajectories[0], OD_trajectories[0] ) # change X, Y coordinate to Lat, Lon interpolated_OD_trajectories_lat_lon = trajectory_modeller.convertListOfTrajectoriesToLatLon( originLatitude, originLongtitude, interpolated_OD_trajectories ) if len(interpolated_OD_trajectories_lat_lon) > 0: # since there should be only one trajectory between each pair of OD all_OD_trajectories.append(interpolated_OD_trajectories_lat_lon[0]) else: print "no trajectories extracted between endpoints ", s, " and ", s + 1 plt.clf() assert not endpoints is None, "Error!: No endpoints extracted from the historial data of vessels" + "_".join( filenames ) print "Final endpoints.shape:", endpoints.shape print "number of interpolated all_OD_trajectories:", len(all_OD_trajectories) all_OD_trajectories = utils.removeErrorTrajectoryFromList(all_OD_trajectories) writeToCSV.saveData(all_OD_trajectories, root_folder + "/all_OD_trajectories_with_1D_data") else: all_OD_trajectories = writeToCSV.loadData(root_folder + "/all_OD_trajectories_with_1D_data.npz") """convert Lat, Lon to XY for displaying""" all_OD_trajectories_XY = trajectory_modeller.convertListOfTrajectoriesToXY( utils.CENTER_LAT_SG, utils.CENTER_LON_SG, all_OD_trajectories ) plotter.plotListOfTrajectories( all_OD_trajectories_XY, show=True, clean=True, save=False, fname="out_sample_tanker_all_OD_trajectories", path=utils.queryPath(root_folder + "/plots"), ) """Test distance to cluster centroids""" centroids_XY = trajectory_modeller.convertListOfTrajectoriesToXY( utils.CENTER_LAT_SG, utils.CENTER_LON_SG, centroids ) for i in range(0, len(all_OD_trajectories_XY)): this_tr_XY = all_OD_trajectories_XY[i] if metric_to_use == 1: this_tr_centroids_dist, according_pattern_index = minDistanceAgainstCentroids( this_tr_XY, centroids_XY, clustering_worker.trajectoryDissimilarityL2 ) print "augmented trajectories[{i}]".format( i=i ), "'s best l2 distance is against cluster centroids[{i}], = ".format( i=according_pattern_index ), this_tr_centroids_dist, ", max allowed distance = ", 1000 elif metric_to_use == 2: this_tr_centroids_dist, according_pattern_index = minDistanceAgainstCentroids( this_tr_XY, centroids_XY, clustering_worker.trajectoryDissimilarityCenterMass ) print "augmented trajectories[{i}]".format( i=i ), "'s best center of mass distance is against cluster centroids[{i}], = ".format( i=according_pattern_index ), this_tr_centroids_dist, ", max allowed distance = ", 1.5 # plotter.plotFeatureSpace(centroids[according_pattern_index]) # plotter.plotFeatureSpace(\ # trajectory_modeller.convertListOfTrajectoriesToLatLon(utils.CENTER_LAT_SG, utils.CENTER_LON_SG, [this_tr_XY])[0]) return