Exemple #1
0
def main():
    metric_to_use = int(
        raw_input("use metric?\n" + "1. l2\n" + "2. center of mass\n"))
    root_folder = "tankers/out_sample_test"
    """read centroids"""
    centroids = None
    if (metric_to_use == 1):
        centroids = writeToCSV.loadData(
            "tankers/cleanedData/centroids_arr_l2.npz")
    elif (metric_to_use == 2):
        centroids = writeToCSV.loadData(
            "tankers/cleanedData/centroids_arr_center_mass.npz")
    """Extract endpoints, trajectories, augmentation"""
    filenames = [
        "9050462.csv", "9259769.csv", "9327138.csv", "9408475.csv",
        "9417464.csv", "9548440.csv"
    ]  # for out sample test
    # filenames = ["9408475.csv"]
    endpoints = None
    all_OD_trajectories = []
    """Do the augmentation if not yet done"""
    if (not os.path.exists(root_folder +
                           "/all_OD_trajectories_with_1D_data.npz")):
        for i in range(0, len(filenames)):
            this_vessel_trajectory_points = writeToCSV.readDataFromCSV(
                root_folder + "/cleanedData", filenames[i])
            # Extract end points, along with MMSI
            this_vessel_endpoints = np.asarray(
                trajectory_modeller.extractEndPoints(
                    writeToCSV.readDataFromCSVWithMMSI(
                        root_folder + "/cleanedData", filenames[i])))
            # Save end points, along with MMSI
            writeToCSV.writeDataToCSVWithMMSI( \
             this_vessel_endpoints, \
             utils.queryPath(root_folder + "/endpoints"), \
             "{filename}_endpoints".format(filename = filenames[i][:filenames[i].find(".")]))
            print "this_vessel_endpoints.shape:", this_vessel_endpoints.shape

            # Append to the total end points
            if (endpoints is None):
                endpoints = this_vessel_endpoints
            else:
                endpoints = np.concatenate((endpoints, this_vessel_endpoints),
                                           axis=0)

            for s in range(0, len(this_vessel_endpoints) - 1):
                originLatitude = this_vessel_endpoints[s][
                    utils.dataDict["latitude"]]
                originLongtitude = this_vessel_endpoints[s][
                    utils.dataDict["longitude"]]
                origin_ts = this_vessel_endpoints[s][utils.dataDict["ts"]]

                endLatitude = this_vessel_endpoints[s + 1][
                    utils.dataDict["latitude"]]
                endLongtitude = this_vessel_endpoints[s + 1][
                    utils.dataDict["longitude"]]
                end_ts = this_vessel_endpoints[s + 1][utils.dataDict["ts"]]
                """Extracting trajectory between a pair of OD"""
                print "\n\nextracting endpoints between ", s, " and ", s + 1
                OD_trajectories, OD_trajectories_lat_lon = trajectory_modeller.extractTrajectoriesUntilOD(\
                 this_vessel_trajectory_points, \
                 origin_ts, \
                 originLatitude, \
                 originLongtitude, \
                 end_ts, \
                 endLatitude, \
                 endLongtitude, \
                 show = False, save = True, clean = False, \
                 fname = filenames[i][:filenames[i].find(".")] + "_trajectory_between_endpoint{s}_and{e}".format(s = s, e = s + 1), \
                 path = utils.queryPath(root_folder + "/plots"))
                # there will be one trajectory between each OD
                assert (len(OD_trajectories) >
                        0), "OD_trajectories extracted must have length > 0"
                print "number of trajectory points extracted : ", len(
                    OD_trajectories[0])

                if (
                        len(OD_trajectories[0]) > 2
                ):  # more than just the origin and destination endpoints along the trajectory
                    writeToCSV.writeDataToCSV( \
                     data = OD_trajectories_lat_lon[0],
                     path = utils.queryPath(root_folder + "/trajectories"), \
                     file_name = "{filename}_trajectory_endpoint_{s}_to_{e}".format(filename = filenames[i][:filenames[i].find(".")], \
                      s = s, \
                      e = s + 1))
                    """
					Interpolation based on pure geographical trajectory, ignore temporal information
					"""
                    interpolated_OD_trajectories = interpolator.geographicalTrajetoryInterpolation(
                        OD_trajectories)
                    plotter.plotListOfTrajectories( \
                     interpolated_OD_trajectories, \
                     show = False, \
                     clean = True, \
                     save = True, \
                     fname = filenames[i][:filenames[i].find(".")] + "_interpolated_algo_3_between_endpoint{s}_and{e}".format(\
                      s = s, \
                      e = s + 1), \
                     path = utils.queryPath(root_folder + "/plots"))
                    """
					Interpolation of 1D data: speed, rate_of_turn, etc; interpolated_OD_trajectories / OD_trajectories are both in X, Y coordinates
					"""
                    if (len(interpolated_OD_trajectories) > 0):
                        interpolated_OD_trajectories[0] = interpolator.interpolate1DFeatures( \
                         interpolated_OD_trajectories[0], \
                         OD_trajectories[0])

                    # change X, Y coordinate to Lat, Lon
                    interpolated_OD_trajectories_lat_lon = trajectory_modeller.convertListOfTrajectoriesToLatLon( \
                     originLatitude, originLongtitude, interpolated_OD_trajectories)
                    if (len(interpolated_OD_trajectories_lat_lon) > 0):
                        # since there should be only one trajectory between each pair of OD
                        all_OD_trajectories.append(
                            interpolated_OD_trajectories_lat_lon[0])
                else:
                    print "no trajectories extracted between endpoints ", s, " and ", s + 1
                    plt.clf()

        assert (
            not endpoints is None
        ), "Error!: No endpoints extracted from the historial data of vessels" + "_".join(
            filenames)
        print "Final endpoints.shape:", endpoints.shape
        print "number of interpolated all_OD_trajectories:", len(
            all_OD_trajectories)
        all_OD_trajectories = utils.removeErrorTrajectoryFromList(
            all_OD_trajectories)
        writeToCSV.saveData(all_OD_trajectories,
                            root_folder + "/all_OD_trajectories_with_1D_data")
    else:
        all_OD_trajectories = writeToCSV.loadData(
            root_folder + "/all_OD_trajectories_with_1D_data.npz")
    """convert Lat, Lon to XY for displaying"""
    all_OD_trajectories_XY = trajectory_modeller.convertListOfTrajectoriesToXY(
        utils.CENTER_LAT_SG, utils.CENTER_LON_SG, all_OD_trajectories)
    plotter.plotListOfTrajectories(\
     all_OD_trajectories_XY, \
     show = True, \
     clean = True, \
     save = False, \
     fname = "out_sample_tanker_all_OD_trajectories", path = utils.queryPath(root_folder + "/plots"))
    """Test distance to cluster centroids"""
    centroids_XY = trajectory_modeller.convertListOfTrajectoriesToXY(\
     utils.CENTER_LAT_SG, utils.CENTER_LON_SG, centroids)

    for i in range(0, len(all_OD_trajectories_XY)):
        this_tr_XY = all_OD_trajectories_XY[i]
        if (metric_to_use == 1):
            this_tr_centroids_dist, according_pattern_index = minDistanceAgainstCentroids(\
             this_tr_XY, centroids_XY, clustering_worker.trajectoryDissimilarityL2)
            print "augmented trajectories[{i}]".format(i = i), \
            "'s best l2 distance is against cluster centroids[{i}], = ".format(i = according_pattern_index), \
            this_tr_centroids_dist, ", max allowed distance  = ", 1000
        elif (metric_to_use == 2):
            this_tr_centroids_dist, according_pattern_index = minDistanceAgainstCentroids(\
             this_tr_XY, centroids_XY, clustering_worker.trajectoryDissimilarityCenterMass)
            print "augmented trajectories[{i}]".format(i = i), \
            "'s best center of mass distance is against cluster centroids[{i}], = ".format(i = according_pattern_index), \
            this_tr_centroids_dist, ", max allowed distance  = ", 1.5

        # plotter.plotFeatureSpace(centroids[according_pattern_index])
        # plotter.plotFeatureSpace(\
        # trajectory_modeller.convertListOfTrajectoriesToLatLon(utils.CENTER_LAT_SG, utils.CENTER_LON_SG, [this_tr_XY])[0])

    return
def main():
	root_folder = raw_input("Input the root_folder name:")
	
	"""
	Firstly, extract all .csv input file names from {root_folder}/input/*.csv
	"""
	# filenames = ["8514019.csv", "9116943.csv", "9267118.csv", "9443140.csv", "9383986.csv", "9343340.csv", "9417464.csv", "9664225.csv", "9538440.csv", "9327138.csv"]
	# filenames = ["9664225.csv"]
	# filenames = ["8514019.csv"]
	filenames = []
	for input_filename in os.listdir("{root_folder}/input/".format(root_folder = root_folder)):
		if (input_filename.find(".csv") != -1):
			filenames.append(input_filename)

	"""
	Get min distance between vessels
	"""
	need_compute_mindistance = raw_input("Need to compute min_distance_matrix for vessel interaction? (y/n) :") == 'y'
	if (need_compute_mindistance):
		"""sort the aggregateData with MMSI based on TS"""
		data_with_mmsi = writeToCSV.readDataFromCSVWithMMSI(path = root_folder + "/cleanedData", filename = "aggregateData_with_mmsi.csv")
		data_with_mmsi_sorted = compute_mindistance.sortDataBasedOnTS(data_with_mmsi)
		writeToCSV.writeDataToCSVWithMMSI(data_with_mmsi_sorted, root_folder + "/cleanedData", "aggregateData_with_mmsi_sorted")

		"""Apply the computing of min distance using a timed window"""
		data_with_mmsi_sorted = writeToCSV.readDataFromCSVWithMMSI(path = root_folder + "/cleanedData", filename = "aggregateData_with_mmsi_sorted.csv")
		mmsi_set = compute_mindistance.getSetOfMMSI(data_with_mmsi_sorted)
		print mmsi_set
		print list(mmsi_set)

		start_time = time.time()
		mmsi_list_dict, min_distance_matrix, vessel_distance_speed_dict = \
		compute_mindistance.computeVesselMinDistanceMatrix(data_with_mmsi_sorted, TIME_WINDOW = 1800)

		writeToCSV.saveData([{ \
			'mmsi_list_dict': mmsi_list_dict, \
			'min_distance_matrix': min_distance_matrix, \
			'vessel_distance_speed_dict': vessel_distance_speed_dict
			}], filename = root_folder + "/cleanedData" + "/min_distance_matrix_with_mmsi_time_window_1800_sec")

		print "time spent:", time.time() - start_time

		"""From already computed"""	
		# min_distance_matrix_result = writeToCSV.loadData(\
		# 	root_folder + "/cleanedData" + "/min_distance_matrix_with_mmsi_time_window_1800_sec.npz")
		# print "min_distance_matrix_result type:\n", type(min_distance_matrix_result)
		# mmsi_list_dict = min_distance_matrix_result[0]["mmsi_list_dict"]
		# min_distance_matrix = min_distance_matrix_result[0]["min_distance_matrix"]
		# vessel_distance_speed_dict = min_distance_matrix_result[0]["vessel_distance_speed_dict"]
		# print "min_distance_matrix loaded:\n", min_distance_matrix
		# min_of_min_distance = sys.maxint
		# for i in range(0, min_distance_matrix.shape[0]):
		# 	for j in range(i + 1, min_distance_matrix.shape[1]):
		# 		if (min_distance_matrix[i][j] < min_of_min_distance):
		# 			min_of_min_distance = min_distance_matrix[i][j]
		# print "min_distance_matrix min of 10 tankers:", min_of_min_distance

		"""write min distance records for Agent Based Simulator"""
		writeToCSV.writeVesselSpeedToDistance(\
			path = utils.queryPath(root_folder+"LearningResult"),\
			file_name = "vessel_speed_to_distance", \
			vessel_distance_speed_dict = vessel_distance_speed_dict)
		writeToCSV.writeVesselMinDistanceMatrix(\
			path = utils.queryPath(root_folder+"LearningResult"), \
			file_name = "vessel_min_distance_matrix", \
			mmsi_list_dict = mmsi_list_dict, \
			min_distance_matrix = min_distance_matrix)
		writeToCSV.writeMMSIs(\
			path = utils.queryPath(root_folder+"LearningResult"), \
			file_name = "mmsi_list", \
			mmsi_list = [key for key, index in mmsi_list_dict.iteritems()])

	"""
	Test Clustering
	"""
	# trajectories_to_cluster = writeToCSV.loadData(root_folder + "/" + "all_OD_trajectories_with_1D_data_refined.npz")
	# # trajectories_to_cluster = writeToCSV.loadData(root_folder + "/" + "all_OD_trajectories_cleaned.npz")
	# # trajectories_to_cluster = writeToCSV.loadData(root_folder + "/" + "all_OD_trajectories_9664225.npz")
	# print "trajectories_to_cluster.shape: ", trajectories_to_cluster.shape
	# print "type(trajectories_to_cluster): ", type(trajectories_to_cluster)
	# print "len(trajectories_to_cluster): ", len(trajectories_to_cluster)
	
	# # convert Lat, Lon to XY for clustering
	# all_OD_trajectories_XY = convertListOfTrajectoriesToXY(utils.CENTER_LAT_SG, utils.CENTER_LON_SG, trajectories_to_cluster)
	# executeClustering(root_folder = root_folder, \
	# 	all_OD_trajectories_XY = all_OD_trajectories_XY, \
	# 	reference_lat = utils.CENTER_LAT_SG, \
	# 	reference_lon = utils.CENTER_LON_SG, \
	#   filenames = filenames)
	# raise ValueError("purpose stop for testing clustering")


	"""
	plot out the value space of the features, speed, accelerations, etc, for the aggregateData
	"""
	# filename = "aggregateData.npz"
	# path = "tankers/cleanedData"
	# data = writeToCSV.loadArray("{p}/{f}".format(p = path, f=filename))
	# for trajectory in trajectories_to_cluster:
		# plotter.plotFeatureSpace(trajectory)
	# raise ValueError("For plotting feature space only")

	"""
	Read the cleaned .csv input files form {root_folder}/cleanedData/
	Extract endpoints
	"""
	endpoints = None
	all_OD_trajectories = []
	utils.queryPath("{root_folder}/endpoints".format(root_folder = root_folder))
	utils.queryPath("{root_folder}/trajectories".format(root_folder = root_folder))
	
	for i in range(0, len(filenames)):
		this_vessel_trajectory_points = writeToCSV.readDataFromCSV(root_folder + "/cleanedData", filenames[i])
		# Extract end points, along with MMSI
		this_vessel_endpoints = np.asarray(extractEndPoints(writeToCSV.readDataFromCSVWithMMSI(root_folder + "/cleanedData", filenames[i])))
		# Save end points, along with MMSI
		writeToCSV.writeDataToCSVWithMMSI( \
			this_vessel_endpoints, \
			root_folder + "/endpoints", \
			"{filename}_endpoints".format(filename = filenames[i][:filenames[i].find(".")]))
		print "this_vessel_endpoints.shape:", this_vessel_endpoints.shape

		# Append to the total end points
		if(endpoints is None):
			endpoints = this_vessel_endpoints
		else:
			endpoints = np.concatenate((endpoints, this_vessel_endpoints), axis=0)

		for s in range (0, len(this_vessel_endpoints) - 1):
			originLatitude = this_vessel_endpoints[s][utils.dataDict["latitude"]]
			originLongtitude = this_vessel_endpoints[s][utils.dataDict["longitude"]]
			origin_ts = this_vessel_endpoints[s][utils.dataDict["ts"]]

			endLatitude = this_vessel_endpoints[s + 1][utils.dataDict["latitude"]]
			endLongtitude = this_vessel_endpoints[s + 1][utils.dataDict["longitude"]]	
			end_ts = this_vessel_endpoints[s + 1][utils.dataDict["ts"]]
			
			"""Extracting trajectory between a pair of OD"""
			print "\n\nextracting endpoints between ", s, " and ", s + 1
			OD_trajectories, OD_trajectories_lat_lon = extractTrajectoriesUntilOD(\
				this_vessel_trajectory_points, \
				origin_ts, \
				originLatitude, \
				originLongtitude, \
				end_ts, \
				endLatitude, \
				endLongtitude, \
				show = False, save = True, clean = False, \
				fname = filenames[i][:filenames[i].find(".")] + "_trajectory_between_endpoint{s}_and{e}".format(s = s, e = s + 1))
				# there will be one trajectory between each OD		
			assert (len(OD_trajectories) > 0), "OD_trajectories extracted must have length > 0"
			print "number of trajectory points extracted : ", len(OD_trajectories[0])

			if(len(OD_trajectories[0]) > 2): # more than just the origin and destination endpoints along the trajectory
				writeToCSV.writeDataToCSV( \
					data = OD_trajectories_lat_lon[0],
					path = root_folder + "/trajectories", \
					file_name = "{filename}_trajectory_endpoint_{s}_to_{e}".format(filename = filenames[i][:filenames[i].find(".")], \
						s = s, \
						e = s + 1))
				"""
				Interpolation based on pure geographical trajectory, ignore temporal information
				"""
				interpolated_OD_trajectories = interpolator.geographicalTrajetoryInterpolation(OD_trajectories)
				plotter.plotListOfTrajectories( \
					interpolated_OD_trajectories, \
					show = False, \
					clean = True, \
					save = True, \
					fname = filenames[i][:filenames[i].find(".")] + "_interpolated_algo_3_between_endpoint{s}_and{e}".format(\
						s = s, \
						e = s + 1))
				
				"""
				Interpolation of 1D data: speed, rate_of_turn, etc; interpolated_OD_trajectories / OD_trajectories are both in X, Y coordinates
				"""
				if(len(interpolated_OD_trajectories) > 0):
					interpolated_OD_trajectories[0] = interpolator.interpolate1DFeatures( \
						interpolated_OD_trajectories[0], \
						OD_trajectories[0])

				# change X, Y coordinate to Lat, Lon
				interpolated_OD_trajectories_lat_lon = convertListOfTrajectoriesToLatLon( \
					originLatitude, originLongtitude, interpolated_OD_trajectories)
				if(len(interpolated_OD_trajectories_lat_lon) > 0):
					# since there should be only one trajectory between each pair of OD
					all_OD_trajectories.append(interpolated_OD_trajectories_lat_lon[0])
			else:
				print "no trajectories extracted between endpoints ", s , " and ", s + 1
				plt.clf()

	assert (not endpoints is None), "Error!: No endpoints extracted from the historial data of vessels" + "_".join(filenames)
	print "Final endpoints.shape:", endpoints.shape
	print "number of interpolated all_OD_trajectories:", len(all_OD_trajectories)

	"""
	save the augmented trajectories between endpoints as npz data file and the plot
	"""
	# remove error trajectories that are too far from Singapore
	all_OD_trajectories = utils.removeErrorTrajectoryFromList(all_OD_trajectories)
	writeToCSV.saveData(all_OD_trajectories, root_folder + "/all_OD_trajectories_with_1D_data")
	# convert Lat, Lon to XY for displaying
	all_OD_trajectories_XY = convertListOfTrajectoriesToXY(utils.CENTER_LAT_SG, utils.CENTER_LON_SG, all_OD_trajectories)
	plotter.plotListOfTrajectories(all_OD_trajectories_XY, show = False, clean = True, save = True, \
		fname = "{root_folder}_all_OD_trajectories".format(root_folder = root_folder))


	"""
	Execute Clustering
	"""
	executeClustering(root_folder = root_folder, \
		all_OD_trajectories_XY = all_OD_trajectories_XY, \
		reference_lat = utils.CENTER_LAT_SG, \
		reference_lon = utils.CENTER_LON_SG, \
		filenames = filenames)
def main():
    metric_to_use = int(raw_input("use metric?\n" + "1. l2\n" + "2. center of mass\n"))
    root_folder = "tankers/out_sample_test"
    """read centroids"""
    centroids = None
    if metric_to_use == 1:
        centroids = writeToCSV.loadData("tankers/cleanedData/centroids_arr_l2.npz")
    elif metric_to_use == 2:
        centroids = writeToCSV.loadData("tankers/cleanedData/centroids_arr_center_mass.npz")

    """Extract endpoints, trajectories, augmentation"""
    filenames = [
        "9050462.csv",
        "9259769.csv",
        "9327138.csv",
        "9408475.csv",
        "9417464.csv",
        "9548440.csv",
    ]  # for out sample test
    # filenames = ["9408475.csv"]
    endpoints = None
    all_OD_trajectories = []
    """Do the augmentation if not yet done"""
    if not os.path.exists(root_folder + "/all_OD_trajectories_with_1D_data.npz"):
        for i in range(0, len(filenames)):
            this_vessel_trajectory_points = writeToCSV.readDataFromCSV(root_folder + "/cleanedData", filenames[i])
            # Extract end points, along with MMSI
            this_vessel_endpoints = np.asarray(
                trajectory_modeller.extractEndPoints(
                    writeToCSV.readDataFromCSVWithMMSI(root_folder + "/cleanedData", filenames[i])
                )
            )
            # Save end points, along with MMSI
            writeToCSV.writeDataToCSVWithMMSI(
                this_vessel_endpoints,
                utils.queryPath(root_folder + "/endpoints"),
                "{filename}_endpoints".format(filename=filenames[i][: filenames[i].find(".")]),
            )
            print "this_vessel_endpoints.shape:", this_vessel_endpoints.shape

            # Append to the total end points
            if endpoints is None:
                endpoints = this_vessel_endpoints
            else:
                endpoints = np.concatenate((endpoints, this_vessel_endpoints), axis=0)

            for s in range(0, len(this_vessel_endpoints) - 1):
                originLatitude = this_vessel_endpoints[s][utils.dataDict["latitude"]]
                originLongtitude = this_vessel_endpoints[s][utils.dataDict["longitude"]]
                origin_ts = this_vessel_endpoints[s][utils.dataDict["ts"]]

                endLatitude = this_vessel_endpoints[s + 1][utils.dataDict["latitude"]]
                endLongtitude = this_vessel_endpoints[s + 1][utils.dataDict["longitude"]]
                end_ts = this_vessel_endpoints[s + 1][utils.dataDict["ts"]]

                """Extracting trajectory between a pair of OD"""
                print "\n\nextracting endpoints between ", s, " and ", s + 1
                OD_trajectories, OD_trajectories_lat_lon = trajectory_modeller.extractTrajectoriesUntilOD(
                    this_vessel_trajectory_points,
                    origin_ts,
                    originLatitude,
                    originLongtitude,
                    end_ts,
                    endLatitude,
                    endLongtitude,
                    show=False,
                    save=True,
                    clean=False,
                    fname=filenames[i][: filenames[i].find(".")]
                    + "_trajectory_between_endpoint{s}_and{e}".format(s=s, e=s + 1),
                    path=utils.queryPath(root_folder + "/plots"),
                )
                # there will be one trajectory between each OD
                assert len(OD_trajectories) > 0, "OD_trajectories extracted must have length > 0"
                print "number of trajectory points extracted : ", len(OD_trajectories[0])

                if (
                    len(OD_trajectories[0]) > 2
                ):  # more than just the origin and destination endpoints along the trajectory
                    writeToCSV.writeDataToCSV(
                        data=OD_trajectories_lat_lon[0],
                        path=utils.queryPath(root_folder + "/trajectories"),
                        file_name="{filename}_trajectory_endpoint_{s}_to_{e}".format(
                            filename=filenames[i][: filenames[i].find(".")], s=s, e=s + 1
                        ),
                    )
                    """
					Interpolation based on pure geographical trajectory, ignore temporal information
					"""
                    interpolated_OD_trajectories = interpolator.geographicalTrajetoryInterpolation(OD_trajectories)
                    plotter.plotListOfTrajectories(
                        interpolated_OD_trajectories,
                        show=False,
                        clean=True,
                        save=True,
                        fname=filenames[i][: filenames[i].find(".")]
                        + "_interpolated_algo_3_between_endpoint{s}_and{e}".format(s=s, e=s + 1),
                        path=utils.queryPath(root_folder + "/plots"),
                    )

                    """
					Interpolation of 1D data: speed, rate_of_turn, etc; interpolated_OD_trajectories / OD_trajectories are both in X, Y coordinates
					"""
                    if len(interpolated_OD_trajectories) > 0:
                        interpolated_OD_trajectories[0] = interpolator.interpolate1DFeatures(
                            interpolated_OD_trajectories[0], OD_trajectories[0]
                        )

                        # change X, Y coordinate to Lat, Lon
                    interpolated_OD_trajectories_lat_lon = trajectory_modeller.convertListOfTrajectoriesToLatLon(
                        originLatitude, originLongtitude, interpolated_OD_trajectories
                    )
                    if len(interpolated_OD_trajectories_lat_lon) > 0:
                        # since there should be only one trajectory between each pair of OD
                        all_OD_trajectories.append(interpolated_OD_trajectories_lat_lon[0])
                else:
                    print "no trajectories extracted between endpoints ", s, " and ", s + 1
                    plt.clf()

        assert not endpoints is None, "Error!: No endpoints extracted from the historial data of vessels" + "_".join(
            filenames
        )
        print "Final endpoints.shape:", endpoints.shape
        print "number of interpolated all_OD_trajectories:", len(all_OD_trajectories)
        all_OD_trajectories = utils.removeErrorTrajectoryFromList(all_OD_trajectories)
        writeToCSV.saveData(all_OD_trajectories, root_folder + "/all_OD_trajectories_with_1D_data")
    else:
        all_OD_trajectories = writeToCSV.loadData(root_folder + "/all_OD_trajectories_with_1D_data.npz")

    """convert Lat, Lon to XY for displaying"""
    all_OD_trajectories_XY = trajectory_modeller.convertListOfTrajectoriesToXY(
        utils.CENTER_LAT_SG, utils.CENTER_LON_SG, all_OD_trajectories
    )
    plotter.plotListOfTrajectories(
        all_OD_trajectories_XY,
        show=True,
        clean=True,
        save=False,
        fname="out_sample_tanker_all_OD_trajectories",
        path=utils.queryPath(root_folder + "/plots"),
    )

    """Test distance to cluster centroids"""
    centroids_XY = trajectory_modeller.convertListOfTrajectoriesToXY(
        utils.CENTER_LAT_SG, utils.CENTER_LON_SG, centroids
    )

    for i in range(0, len(all_OD_trajectories_XY)):
        this_tr_XY = all_OD_trajectories_XY[i]
        if metric_to_use == 1:
            this_tr_centroids_dist, according_pattern_index = minDistanceAgainstCentroids(
                this_tr_XY, centroids_XY, clustering_worker.trajectoryDissimilarityL2
            )
            print "augmented trajectories[{i}]".format(
                i=i
            ), "'s best l2 distance is against cluster centroids[{i}], = ".format(
                i=according_pattern_index
            ), this_tr_centroids_dist, ", max allowed distance  = ", 1000
        elif metric_to_use == 2:
            this_tr_centroids_dist, according_pattern_index = minDistanceAgainstCentroids(
                this_tr_XY, centroids_XY, clustering_worker.trajectoryDissimilarityCenterMass
            )
            print "augmented trajectories[{i}]".format(
                i=i
            ), "'s best center of mass distance is against cluster centroids[{i}], = ".format(
                i=according_pattern_index
            ), this_tr_centroids_dist, ", max allowed distance  = ", 1.5

            # plotter.plotFeatureSpace(centroids[according_pattern_index])
            # plotter.plotFeatureSpace(\
            # trajectory_modeller.convertListOfTrajectoriesToLatLon(utils.CENTER_LAT_SG, utils.CENTER_LON_SG, [this_tr_XY])[0])

    return