def getSpeeds(data):
    data = np.asarray(data)
    speeds = data[:, utils.dataDict["speed_over_ground"]]
    timeAxis = []
    timeAxis.append(0)
    startTime = data[0][utils.dataDict["ts"]]
    distanceAxis = []
    distanceAxis.append(0)
    startPoint = [
        data[0][utils.dataDict["latitude"]],
        data[0][utils.dataDict["longitude"]]
    ]
    for i in range(1, len(data)):
        dt_secs = data[i][utils.dataDict["ts"]] - startTime
        timeAxis.append(dt_secs)
        distanceAxis.append(
            np.linalg.norm([
                utils.LatLonToXY(startPoint[0], startPoint[1],
                                 data[i][utils.dataDict["latitude"]],
                                 data[i][utils.dataDict["longitude"]])
            ], 2))

    i = 0
    while (i < len(speeds)):
        if (speeds[i] == 102.3):  # remove the not available ones
            speeds = np.delete(speeds, i, 0)
        else:
            i += 1

    timeAxis = np.asarray(timeAxis)
    return speeds, timeAxis, distanceAxis
def convertListOfTrajectoriesToXY(originLatitude, originLongtitude, listOfTrajectories):
	for i in range(0, len(listOfTrajectories)):
		for j in range(0, len(listOfTrajectories[i])):
			x, y = utils.LatLonToXY(originLatitude, originLongtitude, listOfTrajectories[i][j][utils.dataDict["latitude"]], listOfTrajectories[i][j][utils.dataDict["longitude"]])
			listOfTrajectories[i][j][utils.data_dict_x_y_coordinate["y"]] = y
			listOfTrajectories[i][j][utils.data_dict_x_y_coordinate["x"]] = x
	return listOfTrajectories
def getSpeedAccelerations(data):
    accelerations = []  # acceleration will be in m/s^2
    timeAxis = []  # time axis in seconds
    startTime = data[0][utils.dataDict["ts"]]
    distanceAxis = []
    startPoint = [
        data[0][utils.dataDict["latitude"]],
        data[0][utils.dataDict["longitude"]]
    ]
    for i in range(1, len(data)):
        curSpeed = data[i][utils.dataDict["speed_over_ground"]]
        prevSpeed = data[i - 1][utils.dataDict["speed_over_ground"]]
        dt_secs = data[i][utils.dataDict["ts"]] - data[i -
                                                       1][utils.dataDict["ts"]]
        if (curSpeed == 102.3 or prevSpeed == 102.3):
            continue
        if (dt_secs == 0):
            continue
        accelerations.append(
            (utils.convertKnotToMeterPerSec(curSpeed) -
             utils.convertKnotToMeterPerSec(prevSpeed)) / float(dt_secs))
        timeAxis.append(data[i][utils.dataDict["ts"]] - startTime)
        distanceAxis.append(
            np.linalg.norm([
                utils.LatLonToXY(startPoint[0], startPoint[1],
                                 data[i][utils.dataDict["latitude"]],
                                 data[i][utils.dataDict["longitude"]])
            ], 2))

    accelerations = np.asarray(accelerations)
    timeAxis = np.asarray(timeAxis)
    distanceAxis = np.asarray(distanceAxis)
    return accelerations, timeAxis, distanceAxis
def endPointMatchTrajectoryCentroid(endpoint, centroid, reference_lat, reference_lon):
	assert (len(centroid) > 0), "cluster centroid must be non empty"
	x, y = utils.LatLonToXY(reference_lat,reference_lon,endpoint[utils.dataDict["latitude"]], endpoint[utils.dataDict["longitude"]])
	centroid_start_x = centroid[0][utils.data_dict_x_y_coordinate["x"]]
	centroid_start_y = centroid[0][utils.data_dict_x_y_coordinate["y"]]
	if (np.linalg.norm([x - centroid_start_x, y - centroid_start_y], 2) < 20 * utils.NEIGHBOURHOOD_ENDPOINT):
		return True
	else:
		return False
Esempio n. 5
0
def computeVesselMinDistanceMatrix(data, TIME_WINDOW=3600):
    """
	data: trajectory points with mmsi ids attached and sorted using ts
	"""
    mmsi_set = getSetOfMMSI(data)
    mmsi_list_dict = {item: list(mmsi_set).index(item)
                      for item in mmsi_set
                      }  # dictionary of mmsi to its index in the list
    print mmsi_list_dict
    distance_matrix = np.zeros(shape=(len(mmsi_set), len(mmsi_set)))
    vessel_distance_speed_dict = {
    }  # [vesselid1_vesselid2: [SpeedDistanceTuple([distanace, relative_speed])]]
    for i in range(0, distance_matrix.shape[0]):
        for j in range(i + 1, distance_matrix.shape[1]):
            distance_matrix[i][j] = sys.maxint
            distance_matrix[j][i] = distance_matrix[i][j]

            vessel_distance_speed_dict["{id1}_{id2}".format( \
             id1 = min(list(mmsi_set)[i],list(mmsi_set)[j]) , \
             id2 = max(list(mmsi_set)[i],list(mmsi_set)[j]))] = []

    i = 0
    while (i + 1 < len(data)):
        print "checking start index:", i, " total data length:", len(data)
        start_ts = data[i][utils.dataDict["ts"]]
        this_set_data_end_index = i + 1
        while (this_set_data_end_index < len(data)
               and data[this_set_data_end_index][utils.dataDict["ts"]] <
               start_ts + TIME_WINDOW):
            this_set_data_end_index += 1

        # check this set
        if (this_set_data_end_index - i > 1):
            # pairwise check
            for p in range(i, this_set_data_end_index):
                for q in range(i + 1, this_set_data_end_index):
                    if(data[p][utils.dataDict["mmsi"]] != data[q][utils.dataDict["mmsi"]] and \
                     data[p][utils.dataDict["course_over_ground"]] != utils.UNKNOWN_COURSE_OVER_GROUND and \
                     data[q][utils.dataDict["course_over_ground"]] != utils.UNKNOWN_COURSE_OVER_GROUND): # two vessels, both has direction info

                        this_distance = np.linalg.norm([utils.LatLonToXY ( \
                         lat1 = data[p][utils.dataDict["latitude"]], \
                         lon1 = data[p][utils.dataDict["longitude"]], \
                         lat2 = data[q][utils.dataDict["latitude"]], \
                         lon2 = data[q][utils.dataDict["longitude"]])], 2)

                        v_p_magnitude = data[p][
                            utils.dataDict["speed_over_ground"]]
                        v_q_magnitude = data[p][
                            utils.dataDict["speed_over_ground"]]

                        theta_p = data[p][
                            utils.dataDict["course_over_ground"]] / 10.0 / (
                                180 / math.pi
                            )  # to the true north, let y axis to be towards north
                        theta_q = data[q][utils.dataDict[
                            "course_over_ground"]] / 10.0 / (180 / math.pi)

                        v_p = np.array([
                            v_p_magnitude * math.sin(theta_p),
                            v_p_magnitude * math.cos(theta_p)
                        ])
                        v_q = np.array([
                            v_q_magnitude * math.sin(theta_q),
                            v_q_magnitude * math.cos(theta_q)
                        ])

                        p_index = mmsi_list_dict[data[p][
                            utils.dataDict["mmsi"]]]
                        q_index = mmsi_list_dict[data[q][
                            utils.dataDict["mmsi"]]]
                        # record the distance in the dict if close say, < 1km
                        if (this_distance < 1):
                            vessel_distance_speed_dict["{id1}_{id2}".format(\
                             id1 = long(min(data[p][utils.dataDict["mmsi"]], data[q][utils.dataDict["mmsi"]])) , \
                             id2 = long(max(data[p][utils.dataDict["mmsi"]], data[q][utils.dataDict["mmsi"]]))) ].append( \
                             utils.SpeedDistanceTuple(distance = this_distance, speed = np.linalg.norm([v_q - v_p], 2)))

                        if (this_distance < distance_matrix[p_index][q_index]):
                            distance_matrix[p_index][q_index] = this_distance
                            distance_matrix[q_index][
                                p_index] = distance_matrix[p_index][
                                    q_index]  # make sure symmetric
                            if (this_distance == 0
                                ):  # logging the collision case
                                print "vessel ", data[p][utils.dataDict["mmsi"]], " and vessel ", data[q][utils.dataDict["mmsi"]], \
                                "clashes at ", data[p][utils.dataDict["ts"]], "/", data[q][utils.dataDict["ts"]]

        # update indexes
        i = this_set_data_end_index

    print "final min_distance_matrix:\n", distance_matrix
    return mmsi_list_dict, distance_matrix, vessel_distance_speed_dict
def extractTrajectoriesUntilOD(data, originTS, originLatitude, originLongtitude, endTS, endLatitude, endLongtitude, show = True, save = False, clean = False, fname = "", path = "plots"):
	"""
	returns: OD_trajectories: in x,y coordinate;
			 OD_trajectories_lat_lon: in lat, lon coordinate;
	"""
	
	maxSpeed = 0
	for i in range(0, data.shape[0]):
		speed_over_ground = data[i][utils.dataDict["speed_over_ground"]]
		if(speed_over_ground > maxSpeed and speed_over_ground != 102.3): #1023 indicates speed not available
			maxSpeed = speed_over_ground
	print "This tanker maxSpeed:", maxSpeed, " knot"
	
	OD_trajectories = [] # origin destination endpoints trajectory
	i = 0
	while(i< data.shape[0]):
		cur_pos = data[i]
		if(utils.nearOrigin( \
			originLatitude, \
			originLongtitude, \
			cur_pos[utils.dataDict["latitude"]], \
			cur_pos[utils.dataDict["longitude"]], \
			thresh = 0.0) and \
			cur_pos[utils.dataDict["ts"]] == originTS): # must be exact point

			this_OD_trajectory = []
			this_OD_trajectory.append(cur_pos)
			i += 1
			while(i < data.shape[0] and \
				(not utils.nearOrigin( \
					endLatitude, \
					endLongtitude, \
					data[i][utils.dataDict["latitude"]], \
					data[i][utils.dataDict["longitude"]], \
					thresh = 0.0))):
				this_OD_trajectory.append(data[i])
				i += 1
			if(i < data.shape[0]):
				this_OD_trajectory.append(data[i]) # append the destination endpoint
			this_OD_trajectory = np.asarray(this_OD_trajectory) # make it to be an np 2D array

			""" box/radius approach in cleaning of points around origin"""
			j = 1
			print "checking points around origin:", j
			while(j < this_OD_trajectory.shape[0] and \
				utils.nearOrigin( \
					originLatitude, \
					originLongtitude, \
					this_OD_trajectory[j][utils.dataDict["latitude"]], \
					this_OD_trajectory[j][utils.dataDict["longitude"]], \
					thresh = utils.NEIGHBOURHOOD_ORIGIN)):
				j += 1
			print "last point around origin:", j
			this_OD_trajectory_around_origin = this_OD_trajectory[0:j]

			"""Take the box mean, treat timestamp as averaged as well"""
			this_OD_trajectory_mean_origin = boxMeanTrajectoryPoints(this_OD_trajectory_around_origin, originLatitude, originLongtitude)
			print "mean start point x,y : ", utils.LatLonToXY( \
				originLatitude, \
				originLongtitude, \
				this_OD_trajectory_mean_origin[utils.dataDict["latitude"]], \
				this_OD_trajectory_mean_origin[utils.dataDict["longitude"]])
			OD_trajectories.append(np.insert(this_OD_trajectory[j:],0,this_OD_trajectory_mean_origin, axis = 0))
			break  # only one trajectory per pair OD, since OD might be duplicated
		i += 1

	OD_trajectories = np.array(OD_trajectories)
	OD_trajectories_lat_lon = copy.deepcopy(OD_trajectories)
	for i in range(0, len(OD_trajectories)):
		for j in range(0, len(OD_trajectories[i])):
			x, y = utils.LatLonToXY(originLatitude, originLongtitude, OD_trajectories[i][j][utils.dataDict["latitude"]], OD_trajectories[i][j][utils.dataDict["longitude"]])
			OD_trajectories[i][j][utils.data_dict_x_y_coordinate["y"]] = y
			OD_trajectories[i][j][utils.data_dict_x_y_coordinate["x"]] = x
		# plotting purpose
		plt.scatter(OD_trajectories[i][0:len(OD_trajectories[i]),utils.data_dict_x_y_coordinate["x"]], \
			OD_trajectories[i][0:len(OD_trajectories[i]),utils.data_dict_x_y_coordinate["y"]])
	if(not plt.gca().yaxis_inverted()):
		plt.gca().invert_yaxis()
	if(save):
		plt.savefig("./{path}/{fname}.png".format(path = path, fname = fname))
	if(show):
		plt.show()
	if(clean):
		plt.clf()

	return OD_trajectories, OD_trajectories_lat_lon
def executeClustering(root_folder, all_OD_trajectories_XY, reference_lat, reference_lon, filenames):
	fname = "{root_folder}_dissimilarity_l2_cophenetic_distance".format(root_folder = root_folder)
	# fname = "10_tankers_dissimilarity_l2_inconsistent_refined_endpoints"
	# fname = "10_tankers_dissimilarity_l2_cophenetic_distance_refined_endpoints"
	# fname = "10_tankers_dissimilarity_center_mass_cophenetic_distance_refined_endpoints"

	# fname = "10_tankers_dissimilarity_l2_inconsistent"
	# fname = "10_tankers_dissimilarity_l2_all_K"
	# fname = "10_tankers_dissimilarity_center_mass"
	# fname = "10_tankers_dissimilarity_center_mass_cophenetic_distance_cleaned"
	# fname = "10_tankers_dissimilarity_center_mass_inconsistent_cleaned"
	

	opt_cluster_label , cluster_labels, CH_indexes = clustering_worker.clusterTrajectories( \
		trajectories  = all_OD_trajectories_XY, \
		fname = fname, \
		path = utils.queryPath("tankers/cluster_result/{folder}".format(folder = fname)), \
		metric_func = clustering_worker.trajectoryDissimilarityL2, \
		# metric_func = clustering_worker.trajectoryDissimilarityCenterMass, \
		
		# user_distance_matrix = writeToCSV.loadData(root_folder + \
			# "/cluster_result/10_tankers_dissimilarity_center_mass/10_tankers_dissimilarity_center_mass_cleaned.npz"), \

		# user_distance_matrix = writeToCSV.loadData(root_folder + \
			# "/cluster_result/10_tankers_dissimilarity_l2_cophenetic_distance_cleaned/10_tankers_dissimilarity_l2_cophenetic_distance_cleaned.npz"), \
		
		# user_distance_matrix = writeToCSV.loadData(root_folder + \
		# 	"/cluster_result/10_tankers_dissimilarity_l2_cophenetic_distance_refined_endpoints" + \
		# 	"/10_tankers_dissimilarity_l2_cophenetic_distance_refined_endpoints.npz"), \

		# user_distance_matrix = writeToCSV.loadData(root_folder + \
			# "/cluster_result/10_tankers_dissimilarity_center_mass_cophenetic_distance_refined_endpoints" + \
			# "/10_tankers_dissimilarity_center_mass_cophenetic_distance_refined_endpoints.npz"), \

		criterion = 'distance')

	print "opt_cluster_label:", opt_cluster_label
	print "opt_num_cluster:", len(set(opt_cluster_label))


	# print "distance between 1 and 4, should be quite small:", clustering_worker.trajectoryDissimilarityL2( \
	# 	all_OD_trajectories_XY[1], all_OD_trajectories_XY[4])
	# print "distance between 0 and 4, should be quite large:", clustering_worker.trajectoryDissimilarityL2( \
	# 	all_OD_trajectories_XY[0], all_OD_trajectories_XY[4])
	# print "center of mass measure distance between 1 and 4, should be quite small:", clustering_worker.trajectoryDissimilarityCenterMass( \
	# 	all_OD_trajectories_XY[1], all_OD_trajectories_XY[4])
	# print "center of mass measure distance between 0 and 4, should be quite large:", clustering_worker.trajectoryDissimilarityCenterMass( \
	# 	all_OD_trajectories_XY[0], all_OD_trajectories_XY[4])
	# print "matrix:\n", clustering_worker.getTrajectoryDistanceMatrix(\
	# 	all_OD_trajectories_XY, \
	# 	metric_func = clustering_worker.trajectoryDissimilarityL2)
	# plotter.plotListOfTrajectories(all_OD_trajectories_XY, show = True, clean = True, save = False, fname = "")
	
	"""Construct the endpoints to representative trajectory mapping"""
	endpoints = None
	for filename in filenames:
		this_vessel_endpoints = writeToCSV.readDataFromCSVWithMMSI( \
		root_folder + "/endpoints", \
		"{filename}_endpoints.csv".format(filename = filename[:filename.find(".")]))

		# Append to the total end points
		if(endpoints is None):
			endpoints = this_vessel_endpoints
		else:
			endpoints = np.concatenate((endpoints, this_vessel_endpoints), axis=0)

	cluster_centroids = clustering_worker.getClusterCentroids(opt_cluster_label, all_OD_trajectories_XY)
	cluster_centroids_lat_lon = {} # [cluster_label : centroid] dictionary
	for cluster_label, centroid in cluster_centroids.iteritems():
		cluster_centroids_lat_lon[cluster_label] = convertListOfTrajectoriesToLatLon(reference_lat, reference_lon, \
			[copy.deepcopy(centroid)])[0]
		# writeToCSV.writeDataToCSV(np.asarray(cluster_centroids_lat_lon[cluster_label]), root_folder + "/cleanedData/DEBUGGING", \
		# "refined_centroid_{i}".format(i = cluster_label))

	# flatten
	cluster_centroids_lat_lon_flattened = [point for cluster_label, centroid in cluster_centroids_lat_lon.iteritems() \
	for point in centroid]
	writeToCSV.writeDataToCSV(np.asarray(cluster_centroids_lat_lon_flattened), root_folder + "/cleanedData", \
		"centroids_" + fname)

	"""array of centroids written to .npz"""
	writeToCSV.saveData([centroid for cluster_label, centroid in cluster_centroids_lat_lon.iteritems()], \
		root_folder + "/cleanedData/centroids_arr")

	# raise ValueError("purpose stop for clusering only")

	"""DEBUGGING,using unrefined data"""
	# point_to_examine = (1.2625833, 103.6827)
	# point_to_examine_XY = utils.LatLonToXY(reference_lat,reference_lon,point_to_examine[0], point_to_examine[1])
	# augmented_trajectories_from_point_to_examine_index = []
	# augmented_trajectories_from_point_to_examine = []
	# for i in range(0, len(all_OD_trajectories_XY)):
	# 	trajectory = all_OD_trajectories_XY[i]
	# 	if (np.linalg.norm([ \
	# 		point_to_examine_XY[0] - trajectory[0][utils.data_dict_x_y_coordinate["x"]], \
	# 		point_to_examine_XY[1] - trajectory[0][utils.data_dict_x_y_coordinate["y"]]], 2) < utils.NEIGHBOURHOOD_ENDPOINT):
	# 		augmented_trajectories_from_point_to_examine_index.append(i)
	# 		augmented_trajectories_from_point_to_examine.append(trajectory)
	# 		print "augmented_trajectories_from_point_to_examine_index:", augmented_trajectories_from_point_to_examine_index, \
	# 		"starting pos:", trajectory[0][utils.data_dict_x_y_coordinate["x"]], trajectory[0][utils.data_dict_x_y_coordinate["y"]] 
	# print "augmented_trajectories_from_point_to_examine_index:", augmented_trajectories_from_point_to_examine_index



	# augmented_trajectories_from_point_to_examine = convertListOfTrajectoriesToLatLon(reference_lat, reference_lon, copy.deepcopy(augmented_trajectories_from_point_to_examine))
	# for t in range(0, len(augmented_trajectories_from_point_to_examine)):
	# 	writeToCSV.writeDataToCSV(np.asarray(augmented_trajectories_from_point_to_examine[t]), root_folder + "/cleanedData/DEBUGGING", \
	# 	"DEBUGGING_augmented_{t}".format(t = augmented_trajectories_from_point_to_examine_index[t]))


	# augmented_trajectories_from_point_to_examine_clusters = []
	# for i in augmented_trajectories_from_point_to_examine_index:
	# 	augmented_trajectories_from_point_to_examine_clusters.append(opt_cluster_label[i])
	# augmented_trajectories_from_point_to_examine_clusters_unique = list(set(augmented_trajectories_from_point_to_examine_clusters))


	# class_trajectories_dict = clustering_worker.formClassTrajectoriesDict(opt_cluster_label, all_OD_trajectories_XY)

	# for i in augmented_trajectories_from_point_to_examine_clusters_unique:
	# 	writeToCSV.writeDataToCSV(np.asarray(cluster_centroids_lat_lon[i]), root_folder + "/cleanedData/DEBUGGING", \
	# 	"DEBUGGING_centroid_{i}".format(i = i))
	# 	print "cluster_centroids[{i}], starting point:".format(i = i), cluster_centroids[i][0]

	# 	"""save all trajectories under this cluster i """
	# 	class_trajectories = class_trajectories_dict[i]
	# 	class_trajectories_lat_lon = convertListOfTrajectoriesToLatLon(reference_lat, reference_lon, copy.deepcopy(class_trajectories))
	# 	for j in range(0, len(class_trajectories_lat_lon)):
	# 		print "class_trajectories[{i}], starting point:".format(i = i), class_trajectories[j][0]
	# 		writeToCSV.writeDataToCSV(np.asarray(class_trajectories_lat_lon[j]), \
	# 			utils.queryPath(root_folder + "/cleanedData/DEBUGGING/CLASS{i}".format(i = i)) , \
	# 			"DEBUGGING_class_{i}_trajectory_{j}".format(i = i , j = j))

	"""END DEBUGGING"""


	endpoints_cluster_dict = endPointsToRepresentativeTrajectoryMapping(\
		endpoints, \
		all_OD_trajectories_XY , \
		opt_cluster_label, \
		reference_lat, \
		reference_lon)

	empty_endpoints = []
	augmented_index_to_extra_label_mapping = {} # mapping from normal index to appended index in all_protocol_trajectories
	cluster_label_to_cluster_size = {} # 'cluster size' of the appended augmented trajectory in all_protocol_trajectories
	
	all_protocol_trajectories = [] # indexed by cluster label (offset by 1, cluster 1 -> all_protocol_trajectories[0])
	for label in range(np.min(opt_cluster_label), np.max(opt_cluster_label) + 1):
		assert (label in cluster_centroids_lat_lon), "{label} is supposed to be in the cluster_centroids_lat_lon dict".format(label = label)
		all_protocol_trajectories.append(cluster_centroids_lat_lon[label])
		cluster_label_to_cluster_size[label - 1] = len(np.where(opt_cluster_label == label)[0])
	assert(np.sum([size for label, size in cluster_label_to_cluster_size.iteritems()]) == len(opt_cluster_label)), "sum of individual label size should == total count"

	"""
	assign augmented trajectories to empty endpoints: True/False
	"""	
	assign_augmented_to_empty_enpoints_flag = False
	DEBUG_APPEND_INDEXS = []
	if (assign_augmented_to_empty_enpoints_flag):
		for endpoint_str, endpoint_tuple_list in endpoints_cluster_dict.iteritems():
			endpoint_starting_clusters = [item.cluster for item in endpoint_tuple_list] # get the list of cluster_labels of centroids to a certain endpoint

			if (len(endpoint_starting_clusters) == 0):
				"""If no centroid assigned, then assign the original augmented trajectory"""
				this_empty_endpoint = lookForEndPoints(endpoints, endpoint_str) # endpoints is in lat, lon
				if (this_empty_endpoint is None):
					raise ValueError("Error! should always be able to map back endpoints, but {p} is not found".format(p = endpoint_str))
				empty_endpoints.append(this_empty_endpoint)

				point_to_examine_XY = utils.LatLonToXY(reference_lat,reference_lon, \
					this_empty_endpoint[utils.dataDict["latitude"]], this_empty_endpoint[utils.dataDict["longitude"]])
				augmented_trajectories_from_point_to_examine_index = []
				augmented_trajectories_from_point_to_examine = []
				for i in range(0, len(all_OD_trajectories_XY)):
					trajectory = all_OD_trajectories_XY[i]
					if (np.linalg.norm([ \
						point_to_examine_XY[0] - trajectory[0][utils.data_dict_x_y_coordinate["x"]], \
						point_to_examine_XY[1] - trajectory[0][utils.data_dict_x_y_coordinate["y"]]], 2) < utils.NEIGHBOURHOOD_ENDPOINT):
						augmented_trajectories_from_point_to_examine_index.append(i)
						augmented_trajectories_from_point_to_examine.append(trajectory)
						# print "this found augmented_trajectories_from_point_to_examine_index:", \
						# augmented_trajectories_from_point_to_examine_index, \
						# "starting pos:", \
						# trajectory[0][utils.data_dict_x_y_coordinate["x"]], \
						# trajectory[0][utils.data_dict_x_y_coordinate["y"]] 
				print "all indexes (w.r.t all_OD_trajectories_XY) for this_empty_endpoint:", augmented_trajectories_from_point_to_examine_index

				DEBUG_APPEND_INDEXS.append(augmented_trajectories_from_point_to_examine_index)

				"""Append augmented_trajectories_from_point_to_examine to end of array of centroids and give extra label"""
				for augmented_index in augmented_trajectories_from_point_to_examine_index:
					if (not augmented_index in augmented_index_to_extra_label_mapping): 
						# if this normal trajectory is not appened, append it and mark in the augmented_index_to_extra_label_mapping
						augmented_index_to_extra_label_mapping[augmented_index] = len(all_protocol_trajectories)
						cluster_label_to_cluster_size[augmented_index_to_extra_label_mapping[augmented_index]] = 1
						all_protocol_trajectories.append(\
							convertListOfTrajectoriesToLatLon(reference_lat, reference_lon, \
								[copy.deepcopy(all_OD_trajectories_XY[augmented_index])])[0])
					else:
						cluster_label_to_cluster_size[augmented_index_to_extra_label_mapping[augmented_index]] += 1

					endpoints_cluster_dict[endpoint_str].append(utils.ClusterCentroidTuple(\
						cluster = augmented_index_to_extra_label_mapping[augmented_index], \
						centroid = all_protocol_trajectories[augmented_index_to_extra_label_mapping[augmented_index]]))

	"""Asserting and Saving of info for Agent Based Simulator"""
	assert (len(set([index for index_list in DEBUG_APPEND_INDEXS for index in index_list])) == \
		len(all_protocol_trajectories) - len(set(opt_cluster_label))), \
	"size of appended augmented trajectories should == len(DEBUG_APPEND_INDEXS)" 

	for index in range(0, len(all_protocol_trajectories)):
		assert(index in cluster_label_to_cluster_size), "all_protocol_trajectories's index mapping to cluster should be complete"
	
	for label, size in cluster_label_to_cluster_size.iteritems():
		print "label, size:", label, size

	print "number of endpoints that do not have clusters assigned to:", len(empty_endpoints)
	print "total number of endpoints:", len(endpoints)
	writeToCSV.writeDataToCSVWithMMSI(np.asarray(endpoints), root_folder + "/endpoints", "all_endpoints_with_MMSI")
	writeToCSV.writeDataToCSV(np.asarray(empty_endpoints), root_folder + "/cleanedData", \
		"non_starting_endpoints_{root_folder}_dissimilarity_l2_cophenetic_distance_cleaned".format(root_folder = root_folder))
	writeToCSV.saveData([endpoints_cluster_dict], \
		filename = root_folder + "/cleanedData" + "/endpoints_cluster_dict" + fname)

	"""write all the all_protocol_trajectories for DEBUGGING purpose"""
	for i in range(0, len(all_protocol_trajectories)):
		protocol_trajectory = all_protocol_trajectories[i]
		writeToCSV.writeDataToCSV(\
			np.asarray(protocol_trajectory), \
			utils.queryPath(root_folder + "/cleanedData/DEBUGGING/ALL_PROTOCOLS_PATTERN_ONLY"), \
			"all_protocol_{i}".format(i = i))

	"""Save related csv files for Agent Based Simulator"""
	writeToCSV.writeAllProtocolTrajectories(\
		path = utils.queryPath(root_folder+"LearningResult"), \
		file_name = "protocol_trajectories_with_cluster_size", \
		all_protocol_trajectories = all_protocol_trajectories, \
		cluster_label_to_cluster_size = cluster_label_to_cluster_size)

	writeToCSV.writeEndPointsToProtocolTrajectoriesIndexesWithMMSI(\
		path = utils.queryPath(root_folder+"LearningResult"), \
		file_name = "endpoints_to_protocol_trajectories", \
		endpoints = endpoints, \
		endpoints_cluster_dict = endpoints_cluster_dict)
def getDistance(point1, point2):
	dx, dy = utils.LatLonToXY(point1[utils.dataDict["latitude"]], point1[utils.dataDict["longitude"]], point2[utils.dataDict["latitude"]], point2[utils.dataDict["longitude"]])
	return (np.linalg.norm([dx,dy],2))