def compute_dis_matrix():
    station_dic = sp.get_station_dic()
    dist_matrix = [([0] * len(station_dic)) for i in range(len(station_dic))]
    stations_shortest_path_dic = sp.get_all_stations_spt_dic_from_file()

    row = 0
    stat_id_to_index = {}
    for stat_id in station_dic:
        stat_id_to_index[stat_id] = row
        row += 1

    for stat_id1 in station_dic:
        for stat_id2 in station_dic:
            if stat_id1 is not stat_id2:
                distance = 0
                key = stat_id1 + "#" + stat_id2
                #print(key)
                distance, path = sp.get_shortest_path_from_stat_id(
                    stat_id1, stat_id2, station_dic,
                    stations_shortest_path_dic)
                if distance == None:
                    print("Coundn't find distance in file, calculate again")
                    path, distance = internal_get_spt_from_stat_name(
                        station_dic[stat_id1]['name'],
                        station_dic[stat_id2]['name'])

                idx1 = stat_id_to_index[stat_id1]
                idx2 = stat_id_to_index[stat_id2]
                dist_matrix[idx1][idx2] = distance
                dist_matrix[idx2][idx1] = distance
    #print(dist_matrix)
    return dist_matrix
Beispiel #2
0
def computeDisMetrix():
    staion_dic = get_station_dic()
    id_name = {}
    id = 0
    for station in staion_dic:
        id_name[staion_dic[station]['name']] = id
        id += 1

    dis_metrix = [([0] * len(id_name)) for i in range(len(id_name))]

    road_network_dic, station_info_dic = sp.preprocess()
    stations_shortest_path_dic = sp.get_all_stations_spt_dic_from_file()

    for station in id_name:
        for station1 in id_name:
            if station is not station1:
                distance = 0
                print(station + "#" + station1)
                distance, path = sp.get_shortest_path(
                    station, station1, station_info_dic,
                    stations_shortest_path_dic)
                if distance == None:
                    _, distance = internal_get_spt_from_stat_name(
                        station, station1)

                dis_metrix[id_name[station]][id_name[station1]] = distance
                dis_metrix[id_name[station1]][id_name[station]] = distance

    return dis_metrix
def plot_stations_cluster(k):
    stat_dic = sp.get_station_dic()
    X = np.array(get_stat_samples_from_dic(stat_dic))
    #print(X)
    kmeans = KMeans(n_clusters=k, random_state=100)
    y_pred = kmeans.fit_predict(X)
    #print(y_pred)
    plt.rcParams['figure.figsize'] = (15, 10)
    df_roads = gpd.read_file(files.roads_pads_network_utm_geojson)
    df_roads.plot(color='black')
    plt.scatter(X[:, 0], X[:, 1], c=y_pred)
    
    if False:
        for stat_id in stat_dic:
            label = stat_dic[stat_id]['name']
            e = stat_dic[stat_id]['road_coordinates'][0]
            n = stat_dic[stat_id]['road_coordinates'][1]
            plt.annotate(label, xy=(e, n), xytext=(e+10, n+10))
    # Plot centroids
    centroids = kmeans.cluster_centers_
    plt.scatter(centroids[:, 0], centroids[:, 1],
            marker='x', s=169, linewidths=3,color='r', zorder=10)

    plt.title('Stations with ' +  str(k) + ' clusters')
    plt.xlabel('Easting [m]', fontsize=13)
    plt.ylabel('Northing [m]', fontsize=13)

    filename = '../resources/img/' + str(k) + '_clusters_stations.png'
    plt.savefig(filename, dpi=1000)
    plt.show()
def plot_clustering_from_dic(cluster_stat_dic):
    stat_info_dic = sp.get_station_dic()
    plt.rcParams['figure.figsize'] = (12, 9)
    df_roads = gpd.read_file(files.roads_pads_network_utm_geojson)
    df_roads.plot(color='black')

    for cluster in cluster_stat_dic:
        stats = cluster_stat_dic[cluster]['stations']
        stat_coords = []

        # plot stations
        for stat in stats:
            stat_coords.append(stat_info_dic[stat]['coordinates'])
        stat_coords = np.array(stat_coords)
        plt.scatter(stat_coords[:, 0], stat_coords[:, 1])

        # plot cluster only once
        if False:
            stat1 = stats[0]
            e = stat_info_dic[stat1]['coordinates'][0]
            n = stat_info_dic[stat1]['coordinates'][1]
            plt.annotate(cluster,
                         xy=(e, n),
                         xytext=(e + 20, n + 20),
                         fontsize=15)

        # plot annotate
        if False:
            for stat in stats:
                e = stat_info_dic[stat]['coordinates'][0]
                n = stat_info_dic[stat]['coordinates'][1]
                #label = stat + "(" + cluster + ")"
                #label_name = stat_info_dic[stat]['name'] + "(" + cluster + ")"
                label_id = stat + "(" + cluster + ")"
                plt.annotate(label_id, xy=(e, n), xytext=(e + 10, n + 10))

        if False:
            for coord in stat_coords:
                plt.annotate(cluster,
                             xy=(coord[0], coord[1]),
                             xytext=(coord[0] + 10, coord[1] + 10))
    plt.title('Stations with ' + str(len(cluster_stat_dic)) + ' clusters')
    plt.xlabel('Easting [m]', fontsize=13)
    plt.ylabel('Northing [m]', fontsize=13)
    filename = '../resources/img/stations_name_clustering.png'
    plt.savefig(filename, dpi=1000)
    plt.show()
def determine_k_by_elbow():
    stat_dic = sp.get_station_dic()
    X = np.array(get_stat_samples_from_dic(stat_dic))
    distortions = []
    K = range(1,20)
    for k in K:
        kmeanModel = KMeans(n_clusters=k).fit(X)
        kmeanModel.fit(X)
        distortions.append(sum(np.min(cdist(X, kmeanModel.cluster_centers_, 'euclidean'), axis=1)) / X.shape[0])

    plt.plot(K, distortions, 'bx-')
    plt.xlabel('k')
    plt.ylabel('Distortion')
    plt.title('The Elbow Method showing the optimal k')
    plt.xlim([0, 20])
    plt.savefig('../resources/img/elbow_method.png', dpi=1000)
    plt.show()
def get_cluster_dic():
    global stat_list
    global dist_matrix
    if len(cluster_stat_dic) != 0:
        return cluster_stat_dic
    station_dic = sp.get_station_dic()
    dist_matrix = compute_dis_matrix()

    stat_list = np.array(list(station_dic.keys()))

    cluster_labels = agglomerative_clustering(cluster_num, dist_matrix)

    #    cluster_stat_dic = {}
    #    for i in range(0, cluster_number):
    #        cluster_stat_dic[str(i)] = {}
    #        cluster_stat_dic[str(i)]['stations'] = stat_list[cluster_labels==i]

    #print(cluster_stat_dic)
    #print(cluster_stat_dic['0'])
    #print(cluster_stat_dic)
    return cluster_stat_dic
Beispiel #7
0
#itemlist = [([0] * len(time_station)) for i in range(len(time_station))]
#i = 0
#for stat in time_station:
#	j = 0
#	for stat1 in time_station[stat]:
#		if i == j:
#			j += 1
#		itemlist[i][j] = time_station[stat][stat1]
#		j += 1
#	i += 1
#
#print(itemlist)
#print(len(itemlist))

# create scatter plot
staion_dic = get_station_dic()

loc = []
xmin = 5000000
xmax = 0
ymin = 50000000
ymax = 0
for station in staion_dic:
    loc.append(staion_dic[station]['coordinates'])
    if staion_dic[station]['coordinates'][0] < xmin:
        xmin = staion_dic[station]['coordinates'][0]

    if staion_dic[station]['coordinates'][0] > xmax:
        xmax = staion_dic[station]['coordinates'][0]

    if staion_dic[station]['coordinates'][1] < ymin: