def returnPopularitydistanceGraphLocations(D_results): ''' Parameters ---------- D_results : TYPE pandas dataframe DESCRIPTION.dataframe containing columns : idNode : idnode defined from a graph (asis scenario) popularity: popularity of the idnode (asis scenario) distance: distance from the input-output new_idNode: idNodeTobe (tobe scenario) new_distance: distance of the new_idNode from the input-output (tobe scenario) Returns ------- figure_out : TYPE dictionary DESCRIPTION. dictionary of figure with the chart ''' figure_out={} D_results['distance'] = D_results['distance'].astype(float) D_graph=D_results.groupby(['idNode']).agg({'popularity':['sum'],'distance':['mean']}).reset_index() D_graph.columns = ['idNode','popularity','distance'] # clean popularity using IQR D_graph, _ = cleanUsingIQR(D_graph, features=['popularity']) #plot asis graph fig1 = plt.figure() plt.scatter(D_graph['popularity'], D_graph['distance']) plt.xlabel('Popularity') plt.ylabel('Distance') plt.title("AS-IS Scenario") figure_out['asis'] = fig1 # graph pop-dist optimal D_results['new_distance'] = D_results['new_distance'].astype(float) D_graph=D_results.groupby(['new_idNode']).agg({'popularity':['sum'],'new_distance':['mean']}).reset_index() D_graph.columns = ['idNode','popularity','distance'] # clean popularity using IQR D_graph, _ = cleanUsingIQR(D_graph, features=['popularity']) #plot tobe graph fig2 = plt.figure() plt.scatter(D_graph['popularity'], D_graph['distance']) plt.xlabel('Popularity') plt.ylabel('Distance') plt.title("TO-BE Scenario") figure_out['tobe'] = fig2 return figure_out
def asisTobeBubblePopDist(D_results,cleanData=False): output_figures={} if cleanData: D_results, _=cleanUsingIQR(D_results, ['popularity']) D_results['distance'] = D_results['distance'].astype(float) #ASIS GRAPH D_graph=D_results.groupby(['idNode']).agg({'popularity':['sum'],'distance':['mean']}).reset_index() D_graph.columns = ['idNode','popularity','distance'] fig1 = plt.figure() plt.scatter(D_graph['distance'],D_graph['popularity']) plt.xlabel('Distance (m)') plt.ylabel('Popularity') plt.title("AS-IS configuration") output_figures['pop_dist_asis'] = fig1 #TOBE GRAPH D_results['new_distance'] = D_results['new_distance'].astype(float) D_graph=D_results.groupby(['new_idNode']).agg({'popularity':['sum'],'new_distance':['mean']}).reset_index() D_graph.columns = ['idNode','popularity','distance'] fig2 = plt.figure() plt.scatter(D_graph['distance'],D_graph['popularity']) plt.xlabel('Distance (m)') plt.ylabel('Popularity') plt.title("TO-BE configuration") output_figures['pop_dist_tobe'] = fig2 return output_figures
def import_graph_drive(D_node, latCol, lonCol, D_plant, plantLatitude, plantLongitude, cleanOutliers=False): ''' the function imports a road network using osmnx library D_node is the table containing the nodes of the network latCol is the name attribute of the latitude of the node collection lonCol is the name attribute of the longitude of the node collection D_plant id the table containing the plant of the network plantLatitude is the name attribute of the latitude of the plant collection plantLongitude is the name attribute of the longitude of the plant collection cleanOutliers is True to remove outliers of latitude and logitude by using IQR return the cleaned dataframe and a coverage tuple ''' coverages = (1, np.nan) #mdb.setConnection(dbName) #D_plant=mdb.queryTodf(model_prod.plant.objects) #D_node=mdb.queryTodf(model_dist.node.objects) #remove latitude and longitude outliers if cleanOutliers: D_node, coverages, = cleanUsingIQR(D_node, [latCol, lonCol]) allLatitudes = list(D_node[latCol]) + list(D_plant[plantLatitude]) allLongitudes = list(D_node[lonCol]) + list(D_plant[plantLongitude]) min_lat = min(allLatitudes) max_lat = max(allLatitudes) min_lon = min(allLongitudes) max_Lon = max(allLongitudes) G = ox.graph_from_bbox(max_lat, min_lat, max_Lon, min_lon, network_type='drive') output_coverages = pd.DataFrame(coverages) return G, output_coverages
def returnbubbleGraphAsIsToBe(D_results,cleanData=False): ''' Return the graph with storage plant layout and picking bubbles Parameters ---------- D_results : TYPE pandas dataframe DESCRIPTION. Returns ------- figure_out : TYPE dictionary DESCRIPTION. dictionary of output figures ''' def normaliseVector(x): return(x-min(x))/(max(x)-min(x)) figure_out={} if cleanData: D_results, _=cleanUsingIQR(D_results, ['popularity']) #graph as/is D_graph=D_results.groupby(['loccodex','loccodey'])['popularity'].agg(['sum']).reset_index() D_graph['size'] = normaliseVector(D_graph['sum'])*100 fig1 = plt.figure() plt.scatter(D_graph.loccodex, D_graph.loccodey, D_graph['size']) plt.title("Warehouse as-is") figure_out['pick_layout_asis']=fig1 #graph to/be D_graph=D_results.groupby(['loccodexTOBE','loccodeyTOBE'])['popularity'].agg(['sum']).reset_index() D_graph['size'] = normaliseVector(D_graph['sum'])*100 fig2=plt.figure() plt.scatter(D_graph.loccodexTOBE, D_graph.loccodeyTOBE, D_graph['size']) plt.title("Warehouse to-be") figure_out['pick_layout_tobe']=fig2 return figure_out
def spaceProductivity(D_movements,variableToPlot,inout_column, x_col, y_col, z_col, graphType='2D',cleanData = False): ''' Parameters ---------- D_movements : TYPE pandas dataframe DESCRIPTION. pandas dataframe with movements variableToPlot : string DESCRIPTION. string with the column to plot. or "popularity" for movement count inout_column : TYPE string DESCRIPTION. string of the column with inout x_col : TYPE string DESCRIPTION. string of the column with x coordinates y_col : TYPE string DESCRIPTION. string of the column with y coordinates z_col : TYPE string DESCRIPTION. string of the column with z coordinates graphType : TYPE string, optional DESCRIPTION. The default is '2D'. 2D or 3D depending on the graph type cleanData : TYPE boolean, optional DESCRIPTION. The default is False. if True, IQR is used to clean popularity of each location Returns ------- figure_output : TYPE dict DESCRIPTION. dictionary of output figures ''' def scaleSize(series): if min(series)==max(series): return [1 for i in range(0,len(series))] else: return (series - min(series))/(max(series)-min(series)) figure_output={} #group data if variableToPlot=='popularity': if graphType=='3D': D_mov = D_movements.groupby(['PERIOD',inout_column,x_col,y_col,z_col]).size().reset_index() D_mov.columns=['PERIOD','INOUT','LOCCODEX','LOCCODEY','LOCCODEZ','POPULARITY'] elif graphType=='2D': D_mov = D_movements.groupby(['PERIOD',inout_column,x_col,y_col,]).size().reset_index() D_mov.columns=['PERIOD','INOUT','LOCCODEX','LOCCODEY','POPULARITY'] else: if graphType=='3D': D_mov = D_movements.groupby(['PERIOD',inout_column,x_col,y_col,z_col]).sum()[variableToPlot].reset_index() D_mov.columns=['PERIOD','INOUT','LOCCODEX','LOCCODEY','LOCCODEZ','POPULARITY'] elif graphType=='2D': D_mov = D_movements.groupby(['PERIOD',inout_column,x_col,y_col,]).sum()[variableToPlot].reset_index() D_mov.columns=['PERIOD','INOUT','LOCCODEX','LOCCODEY','POPULARITY'] # split data into inbound and outbound D_loc_positive=D_mov[D_mov[inout_column]=='+'] D_loc_negative=D_mov[D_mov[inout_column]=='-'] #render inbound figure if len(D_loc_positive)>0: #clean data if cleanData: D_warehouse_grouped, _ = cleanUsingIQR(D_loc_positive, features = ['POPULARITY'],capacityField=[]) else: D_warehouse_grouped = D_loc_positive #create figures for period in set(D_warehouse_grouped['PERIOD']): #period = list(set(D_warehouse_grouped['PERIOD']))[0] D_warehouse_grouped_filtered = D_warehouse_grouped[D_warehouse_grouped['PERIOD']==period] D_warehouse_grouped_filtered['SIZE'] = scaleSize(D_warehouse_grouped_filtered['POPULARITY']) #scale size D_warehouse_grouped_filtered['SIZE'] =100*D_warehouse_grouped_filtered['SIZE'] #graphType 2-Dimensional if graphType == '2D': fig1 = plt.figure() plt.scatter(D_warehouse_grouped_filtered['LOCCODEX'], D_warehouse_grouped_filtered['LOCCODEY'], D_warehouse_grouped_filtered['SIZE'], c=D_warehouse_grouped_filtered['SIZE']) plt.colorbar() plt.title(f"Warehouse INBOUND productivity, period:{period}") plt.xlabel("Warehouse front (x)") plt.ylabel("Warehouse depth (y)") figure_output[f"IN_productivity_2D_{period}"] = fig1 #graphtype 3-Dimensional elif graphType == '3D': fig1 = plt.figure() fig1.add_subplot(111, projection='3d') plt.scatter(x = D_warehouse_grouped_filtered['LOCCODEX'], y = D_warehouse_grouped_filtered['LOCCODEY'], zs = D_warehouse_grouped_filtered['LOCCODEZ'], s = D_warehouse_grouped_filtered['SIZE'], c = D_warehouse_grouped_filtered['SIZE'] ) plt.colorbar() plt.xlabel("Warehouse front (x)") plt.ylabel("Warehouse depth (y)") plt.title(f"Warehouse INBOUND productivity, period:{period}") figure_output[f"IN_productivity_3D_{period}"] = fig1 #render outbound figure if len(D_loc_negative)>0: #clean data if cleanData: D_warehouse_grouped, _ = cleanUsingIQR(D_loc_negative, features = ['POPULARITY'],capacityField=[]) else: D_warehouse_grouped = D_loc_negative #create figures for period in set(D_warehouse_grouped['PERIOD']): #period = list(set(D_warehouse_grouped['PERIOD']))[0] D_warehouse_grouped_filtered = D_warehouse_grouped[D_warehouse_grouped['PERIOD']==period] D_warehouse_grouped_filtered['SIZE'] = scaleSize(D_warehouse_grouped_filtered['POPULARITY']) #scale size D_warehouse_grouped_filtered['SIZE'] =100*D_warehouse_grouped_filtered['SIZE'] #graphType 2-Dimensional if graphType == '2D': fig1 = plt.figure() plt.scatter(D_warehouse_grouped_filtered['LOCCODEX'], D_warehouse_grouped_filtered['LOCCODEY'], D_warehouse_grouped_filtered['SIZE'], c = D_warehouse_grouped_filtered['SIZE']) plt.colorbar() plt.title(f"Warehouse OUTBOUND productivity, period:{period}") plt.xlabel("Warehouse front (x)") plt.ylabel("Warehouse depth (y)") figure_output[f"OUT_productivity_2D_{period}"] = fig1 #graphtype 3-Dimensional elif graphType == '3D': fig1 = plt.figure() fig1.add_subplot(111, projection='3d') plt.scatter(x = D_warehouse_grouped_filtered['LOCCODEX'], y = D_warehouse_grouped_filtered['LOCCODEY'], zs = D_warehouse_grouped_filtered['LOCCODEZ'], s = D_warehouse_grouped_filtered['SIZE'], c = D_warehouse_grouped_filtered['SIZE'] ) plt.colorbar() plt.xlabel("Warehouse front (x)") plt.ylabel("Warehouse depth (y)") plt.title(f"Warehouse OUTBOUND productivity, period:{period}") figure_output[f"OUT_productivity_3D_{period}"] = fig1 return figure_output
def calculateMultipleOptimalLocation(D_table, timeColumns, distanceType, latCol, lonCol, codeCol_node, descrCol_node, cleanOutliers=False, k=1, method='kmeans'): ''' #this function defines k facility location using an aggregation method # this function import a table D_table where each row is a node of the network #columns "NODE_DESCRIPTION" describe the node #timeColumns e' la lista delle colonne con l'orizzonte temporale che contengono i dati di flusso #latCol identify the latitude of the node #lonCol identify the longitude of the node #codeCol_node is a column with description of the node (the same appearing in plantListName) #descrCol_node is a column with description of the node #cleanOutliers if True use IQR to remove latitude and longitude outliers # k is the number of optimal point to define # method is the method to cluster the points: kmeans, gmm # it returns a dataframe D_res with the ID, LATITUDE, LONGITUDE AND YEAR # for each flow adding the column COST AND FLOW representing the distance # travelled (COST) and the flow intensity (FLOW). The column # COST_NORM is a the flows scaled between 0 and 100 # it returns a dataframe D_res_optimal with the loptimal latitude and longitude for each # time frame, and a column COST and FLOW with the total cost (distance) and flows ''' # pulisco i dati e calcolo le coperture output_coverages={} analysisFieldList=[latCol, lonCol] outputCoverages, _ = getCoverageStats(D_table,analysisFieldList,capacityField=timeColumns[0]) D_table=D_table.dropna(subset=[latCol,lonCol]) if cleanOutliers: D_table, coverages, =cleanUsingIQR(D_table, [latCol,lonCol]) outputCoverages = (coverages[0]*outputCoverages[0],coverages[1]*outputCoverages[1]) output_coverages['coverages'] = pd.DataFrame(outputCoverages) #sostituisco i nulli rimasti con zeri D_table=D_table.fillna(0) #identifico gli anni nella colonna dizionario yearsColumns = timeColumns #clusterizzo i punti if method == 'kmeans': km = cluster.KMeans(n_clusters=k).fit(D_table[[latCol,lonCol]]) D_table['CLUSTER'] = pd.DataFrame(km.labels_) elif method == 'gmm': gmm = GaussianMixture(n_components=k, covariance_type='full').fit(D_table[[latCol,lonCol]]) D_table['CLUSTER']=pd.DataFrame(gmm.predict(D_table[[latCol,lonCol]])) else: print("No valid clustering method") return [], [], [] # identifico le colonne utili D_res=pd.DataFrame(columns=[codeCol_node, descrCol_node,latCol,lonCol,'YEAR','COST','CLUSTER']) D_res_optimal=pd.DataFrame(columns=['PERIOD',latCol,lonCol,'YEAR','COST','FLOW','CLUSTER']) #analizzo ogni cluster separatamente for cluster_id in set(D_table['CLUSTER']): #cluster_id=0 D_table_filtered=D_table[D_table['CLUSTER']==cluster_id] for year in yearsColumns: #year = yearsColumns[0] D_filter_columns=[codeCol_node,descrCol_node,latCol,lonCol,year,'CLUSTER'] D_filtered = D_table_filtered[D_filter_columns] D_filtered = D_filtered.rename(columns={year:'FLOW'}) D_filtered['YEAR']=year # define optimal location if distanceType.lower()=='rectangular': lat_optimal, lon_optimal = optimalLocationRectangularDistance(D_filtered, latCol, lonCol, 'FLOW') D_filtered['COST']=func_rectangularDistanceCost(D_filtered[lonCol], D_filtered[latCol], lon_optimal, lat_optimal, D_filtered['FLOW']) elif distanceType.lower()=='gravity': lat_optimal, lon_optimal = optimalLocationGravityProblem(D_filtered, latCol, lonCol, 'FLOW') D_filtered['COST']=func_gravityDistanceCost(D_filtered[lonCol], D_filtered[latCol], lon_optimal, lat_optimal, D_filtered['FLOW']) elif distanceType.lower()=='euclidean': lat_optimal, lon_optimal = optimalLocationEuclideanDistance(D_filtered, latCol, lonCol, 'FLOW') D_filtered['COST']=func_euclideanDistanceCost(D_filtered[lonCol], D_filtered[latCol], lon_optimal, lat_optimal, D_filtered['FLOW']) D_res=D_res.append(D_filtered) D_res_optimal=D_res_optimal.append(pd.DataFrame([[f"OPTIMAL LOCATION YEAR: {year}", lat_optimal, lon_optimal, year, sum(D_res['COST']), sum(D_res['FLOW']), cluster_id ]], columns=D_res_optimal.columns)) #D_res['COST_norm']=(D_res['COST']-min(D_res['COST']))/(max(D_res['COST'])-min(D_res['COST']))*10 D_res['FLOW_norm']=(D_res['FLOW']-min(D_res['FLOW']))/(max(D_res['FLOW'])-min(D_res['FLOW']))*100 D_res=D_res.rename(columns={'COST':'COST_TOBE'}) return D_res, D_res_optimal, output_coverages
def calculateOptimalLocation(D_table, timeColumns, distanceType, latCol, lonCol, codeCol_node, descrCol_node, cleanOutliers=False): ''' # this function import a table D_table where each row is a node of the network #columns "NODE_DESCRIPTION" describe the node #timeColumns e' la lista delle colonne con l'orizzonte temporale che contengono i dati di flusso #latCol identify the latitude of the node #lonCol identify the longitude of the node #codeCol_node is a column with description of the node (the same appearing in plantListName) #descrCol_node is a column with description of the node #cleanOutliers if True use IQR to remove latitude and longitude outliers # it returns a dataframe D_res with the ID, LATITUDE, LONGITUDE AND YEAR # for each flow adding the column COST AND FLOW representing the distance # travelled (COST) and the flow intensity (FLOW). The column # COST_NORM is a the flows scaled between 0 and 100 # it returns a dataframe D_res_optimal with the loptimal latitude and longitude for each # time frame, and a column COST and FLOW with the total cost (distance) and flows ''' # pulisco i dati e calcolo le coperture output_coverages={} analysisFieldList=[latCol, lonCol] outputCoverages, _ = getCoverageStats(D_table,analysisFieldList,capacityField=timeColumns[0]) D_table=D_table.dropna(subset=[latCol,lonCol]) if cleanOutliers: D_table, coverages, =cleanUsingIQR(D_table, [latCol,lonCol]) outputCoverages = (coverages[0]*outputCoverages[0],coverages[1]*outputCoverages[1]) output_coverages['coverages'] = pd.DataFrame(outputCoverages) #sostituisco i nulli rimasti con zeri D_table=D_table.fillna(0) #identifico gli anni nella colonna dizionario yearsColumns = timeColumns # identifico le colonne utili D_res=pd.DataFrame(columns=[codeCol_node, descrCol_node,latCol,lonCol,'YEAR','COST',]) D_res_optimal=pd.DataFrame(columns=['PERIOD',latCol,lonCol,'YEAR','COST','FLOW']) for year in yearsColumns: #year = yearsColumns[0] D_filter_columns=[codeCol_node,descrCol_node,latCol,lonCol,year] D_filtered = D_table[D_filter_columns] D_filtered = D_filtered.rename(columns={year:'FLOW'}) D_filtered['YEAR']=year # define optimal location if distanceType.lower()=='rectangular': lat_optimal, lon_optimal = optimalLocationRectangularDistance(D_filtered, latCol, lonCol, 'FLOW') D_filtered['COST']=func_rectangularDistanceCost(D_filtered[lonCol], D_filtered[latCol], lon_optimal, lat_optimal, D_filtered['FLOW']) elif distanceType.lower()=='gravity': lat_optimal, lon_optimal = optimalLocationGravityProblem(D_filtered, latCol, lonCol, 'FLOW') D_filtered['COST']=func_gravityDistanceCost(D_filtered[lonCol], D_filtered[latCol], lon_optimal, lat_optimal, D_filtered['FLOW']) elif distanceType.lower()=='euclidean': lat_optimal, lon_optimal = optimalLocationEuclideanDistance(D_filtered, latCol, lonCol, 'FLOW') D_filtered['COST']=func_euclideanDistanceCost(D_filtered[lonCol], D_filtered[latCol], lon_optimal, lat_optimal, D_filtered['FLOW']) D_res=D_res.append(D_filtered) D_res_optimal=D_res_optimal.append(pd.DataFrame([[f"OPTIMAL LOCATION YEAR: {year}", lat_optimal, lon_optimal, year, sum(D_res['COST']), sum(D_res['FLOW']), ]], columns=D_res_optimal.columns)) #D_res['COST_norm']=(D_res['COST']-min(D_res['COST']))/(max(D_res['COST'])-min(D_res['COST']))*10 D_res['FLOW_norm']=(D_res['FLOW']-min(D_res['FLOW']))/(max(D_res['FLOW'])-min(D_res['FLOW']))*100 D_res=D_res.rename(columns={'COST':'COST_TOBE'}) return D_res, D_res_optimal, output_coverages
def defineDistanceTableEstimator(D_mov,lonCol_From_mov,latCol_From_mov,lonCol_To_mov,latCol_To_mov,G,cleanOutliersCoordinates=False,capacityField='QUANTITY'): ''' D_mov is the dataframe with movements lonCol_From_mov is the name of the D_mov dataframe with longitude of the loading node latCol_From_mov is the name of the D_mov dataframe with latitude of the loading node lonCol_To_mov is the name of the D_mov dataframe with longitude of the discharging node latCol_To_mov is the name of the D_mov dataframe with latitude of the loading node G is a road graph obtained with osmnx cleanOutliersCoordinates is true to remove outliers in latitude and longitude capacityField is a field of capacity to measure the coverage statistics on it ''' #clean data and get coverages analysisFieldList = [lonCol_From_mov,latCol_From_mov,lonCol_To_mov,latCol_To_mov] coverages,_ = getCoverageStats(D_mov,analysisFieldList,capacityField=capacityField) D_dist = D_mov[[lonCol_From_mov,latCol_From_mov,lonCol_To_mov,latCol_To_mov]].drop_duplicates().dropna().reset_index() if cleanOutliersCoordinates: D_dist,coverages_outl=cleanUsingIQR(D_dist, [lonCol_From_mov,latCol_From_mov,lonCol_To_mov,latCol_To_mov]) coverages = (coverages[0]*coverages_outl[0],coverages[1]*coverages_outl[1]) df_coverages = pd.DataFrame(coverages) D_dist['REAL_DISTANCE'] = np.nan D_dist['MERCATOR_X_FROM'] = np.nan D_dist['MERCATOR_Y_FROM'] = np.nan D_dist['MERCATOR_X_TO'] = np.nan D_dist['MERCATOR_Y_TO'] = np.nan for index, row in D_dist.iterrows(): #get the coordinates lonFrom = row[lonCol_From_mov] latFrom = row[latCol_From_mov] lonTo = row[lonCol_To_mov] latTo = row[latCol_To_mov] #get the closest node on the graph node_from = ox.get_nearest_node(G, (latFrom,lonFrom), method='euclidean') node_to = ox.get_nearest_node(G, (latTo,lonTo), method='euclidean') length = nx.shortest_path_length(G=G, source=node_from, target=node_to, weight='length') D_dist['REAL_DISTANCE'].loc[index]=length #convert into mercator coordinates x_merc_from, y_merc_from =mercatorProjection(latFrom,lonFrom) x_merc_to, y_merc_to =mercatorProjection(latTo,lonTo) D_dist['MERCATOR_X_FROM'].loc[index]=x_merc_from D_dist['MERCATOR_Y_FROM'].loc[index]=y_merc_from D_dist['MERCATOR_X_TO'].loc[index]=x_merc_to D_dist['MERCATOR_Y_TO'].loc[index]=y_merc_to D_dist['EUCLIDEAN_DISTANCE'] = 1000*func_euclideanDistanceCost(D_dist['MERCATOR_X_FROM'],D_dist['MERCATOR_Y_FROM'],D_dist['MERCATOR_X_TO'],D_dist['MERCATOR_Y_TO'],1) D_dist['RECTANGULAR_DISTANCE'] = 1000*func_rectangularDistanceCost(D_dist['MERCATOR_X_FROM'],D_dist['MERCATOR_Y_FROM'],D_dist['MERCATOR_X_TO'],D_dist['MERCATOR_Y_TO'],1) D_dist['GRAVITY_DISTANCE'] = 1000*func_gravityDistanceCost(D_dist['MERCATOR_X_FROM'],D_dist['MERCATOR_Y_FROM'],D_dist['MERCATOR_X_TO'],D_dist['MERCATOR_Y_TO'],1) error_euclidean = mean_squared_error(D_dist['REAL_DISTANCE'], D_dist['EUCLIDEAN_DISTANCE']) error_rectangular = mean_squared_error(D_dist['REAL_DISTANCE'], D_dist['RECTANGULAR_DISTANCE']) error_gravity = mean_squared_error(D_dist['REAL_DISTANCE'], D_dist['GRAVITY_DISTANCE']) print(f"MSE EUCLIDEAN: {np.round(error_euclidean,2)}") print(f"MSE RECTANGULAR: {np.round(error_rectangular,2)}") print(f"MSE GRAVITY: {np.round(error_gravity,2)}") return D_dist, df_coverages