def mode_purpose_comparison(context, df_syn, df_act, suffix = None): # first in the synthetic data types = df_syn.groupby(["mode","following_purpose"]).count()["person_id"] syn = types / types.sum() # then in the actual data df_act.loc[df_act["mode"]=='car_passanger', "mode"] = 'car_passenger' which = ["car","car_passenger","pt", "taxi","walk"] atypes = df_act.groupby(["mode","destination_purpose"]).sum().loc[which,"weight_person"].reindex(index=which, level=0) act = atypes / atypes.sum() lista = [item for item in list(types.index.levels[0]) for i in range(len(types.index.levels[1]))] listb = list(types.index.levels[1]) * len(types.index.levels[0]) labels = [a + " " + b for a, b in zip(lista,listb)] # already ready to plot! title_plot = "Synthetic and HTS Mode-Purpose Distribution" title_figure = "modepurpose" if suffix: title_plot += " - " + suffix title_figure += "_" + suffix title_figure += ".png" myplottools.plot_comparison_bar(context, imtitle = title_figure, plottitle = title_plot, ylabel = "Percentage", xlabel = "", lab = labels, actual = act.values.tolist(), synthetic = syn.values.tolist(), t = 10, xticksrot = True )
def activity_counts_comparison(context, all_CC, suffix = None): all_CC_dic = all_CC.to_dict('records') counts_dic = {} for actchain in all_CC_dic: chain = actchain["Chain"] s = actchain["synthetic Count"] a = actchain["actual Count"] if np.isnan(s): s = 0 if np.isnan(a): a = 0 if chain == "-" or chain == "h": x = 0 else: act = chain.split("-") x = len(act) - 2 x = min(x, 7) if x not in counts_dic.keys(): counts_dic[x] = [s, a] else: counts_dic[x][0] += s counts_dic[x][1] += a counts = pd.DataFrame(columns = ["number", "synthetic Count", "actual Count"]) for k in range(8): v = counts_dic[k] if k == 7: l = "7+" else: l = str(int(k)) counts.loc[k] = pd.Series({"number": l, "synthetic Count": v[0], "actual Count": v[1] }) # Get percentages, prepare for plotting counts["synthetic Count"] = counts["synthetic Count"] / counts["synthetic Count"].sum() *100 counts["actual Count"] = counts["actual Count"] / counts["actual Count"].sum() *100 #counts = counts.sort_values(by=['actual Count'], ascending=False) # First step done: plot activity chain counts title_plot = "Synthetic and HTS activity counts comparison" title_figure = "activitycounts" if suffix: title_plot += " - " + suffix title_figure += "_" + suffix title_figure += ".png" myplottools.plot_comparison_bar(context, imtitle = title_figure, plottitle = title_plot, ylabel = "Percentage", xlabel = "Number of activities in the activity chain", lab = counts["number"], actual = counts["actual Count"], synthetic = counts["synthetic Count"])
def activity_chains_comparison(context, all_CC, suffix = None): # Get percentages, prepare for plotting all_CC["synthetic Count"] = all_CC ["synthetic Count"] / all_CC["synthetic Count"].sum() *100 all_CC["actual Count"] = all_CC["actual Count"] / all_CC["actual Count"].sum() *100 all_CC = all_CC.sort_values(by=['actual Count'], ascending=False) # First step done: plot activity chain counts title_plot = "Synthetic and HTS activity chain comparison" title_figure = "activitychains" if suffix: title_plot += " - " + suffix title_figure += "_" + suffix title_figure += ".png" myplottools.plot_comparison_bar(context, imtitle = title_figure, plottitle = title_plot, ylabel = "Percentage", xlabel = "Activity chain", lab = all_CC["Chain"], actual = all_CC["actual Count"], synthetic = all_CC["synthetic Count"])
def pipeline_menwomen(df_syn, df_act_trips, df_act_persons, gender, context): # Comparing men and women activity chains CC = process_synthetic_activity_chain_counts(df_syn) act_CC, amdf = process_actual_activity_chain_counts( df_act_trips, df_act_persons) # Merging together all_CC = CC.merge(act_CC, on="Chain", how="left") # Get percentages, prepare for plotting all_CC["synthetic Count"] = all_CC["synthetic Count"] / all_CC[ "synthetic Count"].sum() * 100 all_CC["actual Count"] = all_CC["actual Count"] / all_CC[ "actual Count"].sum() * 100 all_CC = all_CC.sort_values(by=['actual Count'], ascending=False) # First step done: plot activity chain counts myplottools.plot_comparison_bar( context, imtitle="activitychains_" + gender + ".png", plottitle="Synthetic and HTS activity chain comparison - " + gender, ylabel="Percentage", xlabel="Activity chain", lab=all_CC["Chain"], actual=all_CC["actual Count"], synthetic=all_CC["synthetic Count"]) # first in the synthetic data types = df_syn.groupby(["mode", "destination_purpose"]).count()["person_id"] syn = types / types.sum() # then in the actual data which = ["car", "car_passenger", "pt", "bike", "walk"] atypes = amdf.groupby(["mode", "destination_purpose" ]).sum().loc[which, "weight_person"].reindex(index=which, level=0) act = atypes / atypes.sum() lista = [ item for item in list(types.index.levels[0]) for i in range(len(types.index.levels[1])) ] listb = list(types.index.levels[1]) * len(types.index.levels[0]) labels = [a + " " + b for a, b in zip(lista, listb)] # already ready to plot! myplottools.plot_comparison_bar( context, imtitle="modepurpose_" + gender + ".png", plottitle="Synthetic and HTS Mode-Purpose Distribution - " + gender, ylabel="Percentage", xlabel="", lab=labels, actual=act.values.tolist(), synthetic=syn.values.tolist(), t=10, xticksrot=True) # Third step: look into the crowfly distances # Compute the distances amdf["crowfly_distance"] = 0.001 * np.sqrt( (amdf["origin_x"] - amdf["destination_x"])**2 + (amdf["origin_y"] - amdf["destination_y"])**2) df_syn["crowfly_distance"] = df_syn.geometry.length df_syn["crowfly_distance"] = df_syn["crowfly_distance"] * 0.001 # Only consider crowfly distances shorter than 25 km df2 = df_syn[df_syn["crowfly_distance"] < 25] amdf2 = amdf[amdf["crowfly_distance"] < 25] # Finish to prepare for plotting amdf2["x"] = amdf2["weight_person"] * amdf2["crowfly_distance"] act = amdf2.groupby(["destination_purpose"]).sum()["x"] / amdf2.groupby( ["destination_purpose"]).sum()["weight_person"] syn = df2.groupby(["destination_purpose"]).mean()["crowfly_distance"] # Ready to plot! myplottools.plot_comparison_bar(context, imtitle="distancepurpose_" + gender + ".png", plottitle="Crowfly distance - " + gender, ylabel="Mean crowfly distance [km]", xlabel="", lab=syn.index, actual=act, synthetic=syn, t=None, xticksrot=True) myplottools.plot_comparison_hist_purpose(context, "distance_purpose_hist_" + gender + ".png", amdf2, df2, bins=np.linspace(0, 25, 120), dpi=300, cols=3, rows=2) myplottools.plot_comparison_hist_mode(context, "distance_mode_hist_" + gender + ".png", amdf2, df2, bins=np.linspace(0, 25, 120), dpi=300, cols=3, rows=2) myplottools.plot_comparison_cdf_purpose(context, "distance_purpose_cdf_" + gender + ".png", amdf2, df2, bins=np.linspace(0, 25, 120), dpi=300, cols=3, rows=2) myplottools.plot_comparison_cdf_mode(context, "distance_mode_cdf_" + gender + ".png", amdf2, df2, bins=np.linspace(0, 25, 120), dpi=300, cols=3, rows=2)
def execute(context): # Import data, merging df_syn, df_syn_no_trip = import_data_synthetic(context) df_act, df_act_no_trip = import_data_actual(context) df_aux = aux_data_frame(df_act) # 1. ACTIVITY CHAINS # Creating the new dataframes with activity chain counts syn_CC = myutils.process_synthetic_activity_chain_counts(df_syn) syn_CC.loc[len(syn_CC) + 1] = pd.Series({"Chain": "h", "synthetic Count": df_syn_no_trip.shape[0] }) act_CC = myutils.process_actual_activity_chain_counts(df_act, df_aux) act_CC.loc[len(act_CC) + 1] = pd.Series({"Chain": "h", "actual Count": np.sum(df_act_no_trip["weight_person"].values.tolist())}) # Merging together, comparing all_CC = pd.merge(syn_CC, act_CC, on = "Chain", how = "left") activity_chains_comparison(context, all_CC) # Number of activities activity_counts_comparison(context, all_CC) # Number of activities per purposes activity_counts_per_purpose(context, all_CC) # 2. MODE AND DESTINATION PURPOSE mode_purpose_comparison(context, df_syn, df_act) # 3. CROWFLY DISTANCES # 3.1. Compute the distances df_syn_dist = compute_distances_synthetic(df_syn) df_act_dist = compute_distances_actual(df_act) # 3.2 Prepare for plotting df_act_dist["x"] = df_act_dist["weight_person"] * df_act_dist["crowfly_distance"] act = df_act_dist.groupby(["destination_purpose"]).sum()["x"] / df_act_dist.groupby(["destination_purpose"]).sum()["weight_person"] syn = df_syn_dist.groupby(["following_purpose"]).mean()["crowfly_distance"] # 3.3 Ready to plot! myplottools.plot_comparison_bar(context, imtitle = "distancepurpose.png", plottitle = "Crowfly distance", ylabel = "Mean crowfly distance [km]", xlabel = "", lab = syn.index, actual = act, synthetic = syn, t = None, xticksrot = True ) all_the_plot_distances(context, df_act_dist, df_syn_dist) # 3.4 Distance from home to education syn_0, act_0, act_w0 = compare_dist_educ(context, df_syn, df_act) # 4. Do the same for men and women separated, aged 18 to 40 # 4.1 Create the dataframes df_syn_men = df_syn[df_syn["sex"] == "male"] df_syn_men = df_syn_men[np.logical_and(df_syn_men["age"] >= 18, df_syn_men["age"] <= 40)] df_syn_no_trip_men = df_syn_no_trip[df_syn_no_trip["sex"] == "male"] df_syn_no_trip_men = df_syn_no_trip_men[np.logical_and(df_syn_no_trip_men["age"] >= 18, df_syn_no_trip_men["age"] <= 40)] df_syn_women = df_syn[df_syn["sex"] == "female"] df_syn_women = df_syn_women[np.logical_and(df_syn_women["age"] >= 18, df_syn_women["age"] <= 40)] df_syn_no_trip_women = df_syn_no_trip[df_syn_no_trip["sex"] == "female"] df_syn_no_trip_women = df_syn_no_trip_women[np.logical_and(df_syn_no_trip_women["age"] >= 18, df_syn_no_trip_women["age"] <= 40)] df_act_men = df_act[df_act["sex"] == "male"] df_act_men = df_act_men[np.logical_and(df_act_men["age"] >= 18, df_act_men["age"] <= 40)] df_aux_men = aux_data_frame(df_act_men) df_act_no_trip_men = df_act_no_trip[df_act_no_trip["sex"] == "male"] df_act_no_trip_men = df_act_no_trip_men[np.logical_and(df_act_no_trip_men["age"] >= 18, df_act_no_trip_men["age"] <= 40)] df_act_women = df_act[df_act["sex"] == "female"] df_act_women = df_act_women[np.logical_and(df_act_women["age"] >= 18, df_act_women["age"] <= 40)] df_aux_women = aux_data_frame(df_act_women) df_act_no_trip_women = df_act_no_trip[df_act_no_trip["sex"] == "female"] df_act_no_trip_women = df_act_no_trip_women[np.logical_and(df_act_no_trip_women["age"] >= 18, df_act_no_trip_women["age"] <= 40)] # 4.2 Activity chains # Creating the new dataframes with activity chain counts M_syn_CC = myutils.process_synthetic_activity_chain_counts(df_syn_men) M_syn_CC.loc[len(M_syn_CC) + 1] = pd.Series({"Chain": "h", "synthetic Count": df_syn_no_trip_men.shape[0] }) M_act_CC = myutils.process_actual_activity_chain_counts(df_act_men, df_aux_men) M_act_CC.loc[len(M_act_CC) + 1] = pd.Series({"Chain": "h", "actual Count": np.sum(df_act_no_trip_men["weight_person"].values.tolist()) }) W_syn_CC = myutils.process_synthetic_activity_chain_counts(df_syn_women) W_syn_CC.loc[len(W_syn_CC) + 1] = pd.Series({"Chain": "h", "synthetic Count": df_syn_no_trip_women.shape[0] }) W_act_CC = myutils.process_actual_activity_chain_counts(df_act_women, df_aux_women) W_act_CC.loc[len(W_act_CC) + 1] = pd.Series({"Chain": "h", "actual Count": np.sum(df_act_no_trip_women["weight_person"].values.tolist()) }) # Merging together, comparing M_all_CC = pd.merge(M_syn_CC, M_act_CC, on = "Chain", how = "left") activity_chains_comparison(context, M_all_CC, "men") W_all_CC = pd.merge(W_syn_CC, W_act_CC, on = "Chain", how = "left") activity_chains_comparison(context, W_all_CC, "women") activity_counts_comparison(context, M_all_CC, "men") activity_counts_comparison(context, W_all_CC, "women") activity_counts_per_purpose(context, M_all_CC, "men") activity_counts_per_purpose(context, W_all_CC, "women") # 4.3 Mode-purpose comparison mode_purpose_comparison(context, df_syn_men, df_act_men, "men") mode_purpose_comparison(context, df_syn_women, df_act_women, "women") # 4.4 Distance-purpose comparison df_syn_distM = compute_distances_synthetic(df_syn_men) df_act_distM = compute_distances_actual(df_act_men) df_act_distM["x"] = df_act_distM["weight_person"] * df_act_distM["crowfly_distance"] actM = df_act_distM.groupby(["destination_purpose"]).sum()["x"] / df_act_distM.groupby(["destination_purpose"]).sum()["weight_person"] synM = df_syn_distM.groupby(["following_purpose"]).mean()["crowfly_distance"] myplottools.plot_comparison_bar(context, imtitle = "distancepurpose_men.png", plottitle = "Crowfly distances - men", ylabel = "Mean crowfly distance [km]", xlabel = "", lab = synM.index, actual = actM, synthetic = synM, t = None, xticksrot = True ) all_the_plot_distances(context, df_act_distM, df_syn_distM, suffix = "men") df_syn_distW = compute_distances_synthetic(df_syn_women) df_act_distW = compute_distances_actual(df_act_women) df_act_distW["x"] = df_act_distW["weight_person"] * df_act_distW["crowfly_distance"] actW = df_act_distW.groupby(["destination_purpose"]).sum()["x"] / df_act_distW.groupby(["destination_purpose"]).sum()["weight_person"] synW = df_syn_distW.groupby(["following_purpose"]).mean()["crowfly_distance"] myplottools.plot_comparison_bar(context, imtitle = "distancepurpose_women.png", plottitle = "Crowfly distances - women", ylabel = "Mean crowfly distance [km]", xlabel = "", lab = synM.index, actual = actW, synthetic = synW, t = None, xticksrot = True ) all_the_plot_distances(context, df_act_distW, df_syn_distW, suffix = "women") # 5 Distance from home to education according to age ages = [[0, 14], [15, 18], [19, 24], [25, 1000]] syn_means = [np.mean(syn_0)] act_means = [np.average(act_0, weights = act_w0)] labels = ["All"] for age in ages: df_syn_age = df_syn[np.logical_and(df_syn["age"] >= age[0], df_syn["age"] <= age[1] )] df_act_age = df_act[np.logical_and(df_act["age"] >= age[0], df_act["age"] <= age[1] )] suf = "aged " + str(age[0]) + " to " + str(age[1]) lab = str(age[0]) + " to " + str(age[1]) + " y. o." if age[1] == 1000: lab = "25 +" syn, act, act_w = compare_dist_educ(context, df_syn_age, df_act_age, suffix = suf) syn_means.append(np.average(syn)) act_means.append(np.average(act, weights = act_w)) labels.append(lab) myplottools.plot_comparison_bar(context,"avdisthomeeduc - age.png", "Average distances from home to education", "Average distance [km]", "Population group", labels, act_means, syn_means) # 6. Distance from home to education according to residence area areas = [1,2,3] syn_means = [np.mean(syn_0)] act_means = [np.average(act_0, weights = act_w0)] labels = ["All"] for area in areas: df_syn_area = df_syn[df_syn["residence_area_index"] == area] df_act_area = df_act[df_act["residence_area_index"] == area] suf = "agents living in " if area == 1: suf += " rest of the state" lab = "rest of the state" if area == 2 : suf += " city" lab = "city of Sao Paulo" if area == 3 : suf += " downtown" lab = "downtown" syn, act, act_w = compare_dist_educ(context, df_syn_area, df_act_area, suffix = suf) syn_means.append(np.average(syn)) act_means.append(np.average(act, weights = act_w)) labels.append(lab) myplottools.plot_comparison_bar(context,"avdisthomeeduc - area.png", "Average distances from home to education", "Average distance [km]", "Population group", labels, act_means, syn_means) # 7. Distance from home to education according to gender genders = ["male","female"] syn_means = [np.mean(syn_0)] act_means = [np.average(act_0, weights = act_w0)] labels = ["All"] for gender in genders: df_syn_gender = df_syn[df_syn["sex"] == gender] df_act_gender = df_act[df_act["sex"] == gender] suf = gender lab = gender syn, act, act_w = compare_dist_educ(context, df_syn_gender, df_act_gender, suffix = suf) syn_means.append(np.average(syn)) act_means.append(np.average(act, weights = act_w)) labels.append(lab) myplottools.plot_comparison_bar(context,"avdisthomeeduc - gender.png", "Average distances from home to education", "Average distance [km]", "Population group", labels, act_means, syn_means)
def activity_counts_per_purpose(context, all_CC, suffix = None): all_CC_dic = all_CC.to_dict('records') purposes = ['h', 'w', 'e', 's', 'l', 'o'] counts_dic = {} cpt = 0 for actchain in all_CC_dic: chain = actchain["Chain"] s = actchain["synthetic Count"] a = actchain["actual Count"] if np.isnan(s): s = 0 if np.isnan(a): a = 0 if chain == "-" or chain == "h": pass else: act = chain.split("-") act = act[1:-1] for p in purposes: cpt_purpose = act.count(p) if cpt_purpose > 0 : identifier = p + " - " + str(cpt_purpose) if cpt_purpose > 1: identifier += " times" else: identifier += " time" if cpt_purpose >= 3 or (cpt_purpose == 2 and p not in ['h', 'w', 'e']): identifier = "Other" if identifier not in counts_dic.keys(): counts_dic[identifier] = [s, a] else: counts_dic[identifier][0] += s counts_dic[identifier][1] += a counts = pd.DataFrame(columns = ["number", "synthetic Count", "actual Count"]) for k, v in counts_dic.items(): counts.loc[k] = pd.Series({"number": k, "synthetic Count": v[0], "actual Count": v[1] }) # Get percentages, prepare for plotting counts["synthetic Count"] = counts["synthetic Count"] / counts["synthetic Count"].sum() *100 counts["actual Count"] = counts["actual Count"] / counts["actual Count"].sum() *100 counts = counts.sort_values(by=['actual Count'], ascending=False) val = "Other" idx = counts.index.drop(val).tolist() + [val] counts = counts.reindex(idx) # First step done: plot activity chain counts title_plot = "Activity counts per purpose comparison" title_figure = "activitycountspurpose" if suffix: title_plot += " - " + suffix title_figure += "_" + suffix title_figure += ".png" myplottools.plot_comparison_bar(context, imtitle = title_figure, plottitle = title_plot, ylabel = "Percentage", xlabel = "Activities with the same purpose in the activity chain", lab = counts["number"], actual = counts["actual Count"], synthetic = counts["synthetic Count"], t = 20)