def run(): a2_dir = charts_dir + '/b_aggregated_a2 number of immediate decision' check_dir_create(a2_dir) for LABEL, label, fn in [('AP', 'ap', ap_tm_num_dur_fare_fn), ('NS', 'ns', ns_tm_num_dur_fare_fn)]: whole_trips = pd.read_csv(fn) Y09 = whole_trips[(whole_trips['yy'] == 9)] Y10 = whole_trips[(whole_trips['yy'] == 10)] # Y09_gb, Y10_gb = Y09.groupby(['trip-mode']), Y10.groupby(['trip-mode']) Y09_tm_num, Y10_tm_num = Y09_gb.sum()['total-num'], Y10_gb.sum()['total-num'] _data = [[int(Y09_tm_num[DIn_PIn]), int(Y10_tm_num[DIn_PIn])], [int(Y09_tm_num[DIn_POut]), int(Y10_tm_num[DIn_POut])]] # row_labels, col_labels = ['Pick up in %s' % LABEL, 'Pick up out %s' % LABEL], ['Y2009', 'Y2010'] bar_table((8,6), '', '', row_labels, col_labels, _data, a2_dir + '/table_decision_at_%s' % label) # per_data1 = [_data[1][0] / float(_data[0][0] + _data[1][0]), _data[0][0] / float(_data[0][0] + _data[1][0])] per_data2 = [_data[1][1] / float(_data[0][1] + _data[1][1]), _data[0][1] / float(_data[0][1] + _data[1][1])] one_pie_chart('', per_data1, ['Pick up out %s' % LABEL, 'Pick up in %s' % LABEL], a2_dir + '/Y2009_decision_at_%s' % label) one_pie_chart('', per_data2, ['Pick up out %s' % LABEL, 'Pick up in %s' % LABEL], a2_dir + '/Y2010_decision_at_%s' % label) # per_data1 = [_data[1][0] / float(_data[0][0] + _data[1][0]), _data[0][0] / float(_data[0][0] + _data[1][0])] per_data2 = [_data[1][1] / float(_data[0][1] + _data[1][1]), _data[0][1] / float(_data[0][1] + _data[1][1])] two_pie_chart(['Pick up out %s' % LABEL, 'Pick up in %s' % LABEL], "Y2009", per_data1, "Y2010", per_data2, a2_dir + '/decision_at_%s' % label)
def run(): a1_dir = charts_dir + '/a_overall_a1 fare and duration per trip' check_dir_create(a1_dir) for fn, l, x0_label in [(ap_tm_num_dur_fare_fn, 'ap', 'Airport'), (ns_tm_num_dur_fare_fn, 'ns', 'Night safari')]: trip_df = pd.read_csv(fn) gb = trip_df.groupby('trip-mode') # # calculate statistics # in_num = gb.sum()['total-num'][DIn_PIn] + gb.sum()['total-num'][DOut_PIn] in_fare = (gb.sum()['total-fare'][DIn_PIn] + gb.sum()['total-fare'][DOut_PIn]) / float(CENT) in_dur = (gb.sum()['total-dur'][DIn_PIn] + gb.sum()['total-dur'][DOut_PIn]) / float(SEC60) # in_fare_per_trip = in_fare / float(in_num) in_dur_per_trip = in_dur / float(in_num) # out_num = gb.sum()['total-num'][DIn_POut] + gb.sum()['total-num'][DOut_POut] out_fare = (gb.sum()['total-fare'][DIn_POut] + gb.sum()['total-fare'][DOut_POut]) / float(CENT) out_dur = (gb.sum()['total-dur'][DIn_POut] + gb.sum()['total-dur'][DOut_POut]) / float(SEC60) # out_fare_per_trip = out_fare / float(out_num) out_dur_per_trip = out_dur / float(out_num) # # charts # _data = [in_fare_per_trip, out_fare_per_trip] simple_barchart([x0_label, 'Other areas'], 'S$', _data, a1_dir + '/fare_per_trip_%s' % l) # _data = [in_dur_per_trip, out_dur_per_trip] simple_barchart([x0_label, 'Other areas'], 'Minute', _data, a1_dir + '/dur_per_trip_%s' % l)
def run(): check_dir_create(dpaths['baseline', '2009', 'countGraph']) # yyyy = '20%02d' % 9 for tfZ_TP_fn in get_all_files(tfZ_TP_dpath, '%s%s*.csv' % (tfZ_TP_prefix, yyyy)): tfZ_TP_fpath = '%s/%s' % (tfZ_TP_dpath, tfZ_TP_fn) process_file(tfZ_TP_fpath)
def run(): init_multiprocessor(11) count_num_jobs = 0 # for tm in ['spendingTime', 'roamingTime']: for tm in ['spendingTime']: for year in ['2009', '2010', '2011', '2012']: gt_dpath = dpaths[tm, year, 'groupTrips'] gt_prefix = prefixs[tm, year, 'groupTrips'] check_dir_create(gt_dpath) # gs_dpath = dpaths[tm, year, 'groupShifts'] # for dpath in [gt_dpath, gs_dpath]: # check_dir_create(dpath) # gp_dpath = dpaths[tm, year, 'groupPartition'] gp_prefix = prefixs[tm, year, 'groupPartition'] gp_summary_fpath = '%s/%ssummary.csv' % (gp_dpath, gp_prefix) # gs_df = pd.read_csv(gp_summary_fpath) for gn in gs_df['groupName'].values: igG = ig.Graph.Read_Pickle('%s/%s%s.pkl' % (gp_dpath, gp_prefix, gn)) groupDrivers = set() for e in igG.es: did0, did1 = [igG.vs[nIndex]['name'] for nIndex in e.tuple] groupDrivers.add(did0) groupDrivers.add(did1) # process_file(tm, year, gn, groupDrivers) put_task(process_file, [tm, year, gn, groupDrivers]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): tm = 'spendingTime' # for year in ['2009', '2010', '2011', '2012']: for year in ['2009']: gds_dpath = dpaths[tm, year, 'groupDayStats'] check_dir_create(gds_dpath) # process_file(tm, year)
def run(): check_dir_create(statisticsAllDrivers_ap_dpath) # # find_intelligentDrivers() # gen_summary() # # gen_summary2010() only_1001()
def run(): check_dir_create(statisticsAllDrivers_ap_dpath) # # process_tripBased() # filter_tripBased() # # process_dayBased() # filter_dayBased() # process_monthBased()
def run(): a1_dir = charts_dir + '/b_aggregated_a1 monthly fare' check_dir_create(a1_dir) # Y09, Y10 = load_pickle_file(driver_monthly_fare_fn) num_bin = 50 # print 't statistics %.3f, p-value %.3f' % (stats.ttest_ind(Y09, Y10, equal_var=False)) # one_histogram((8,6), '', 'Fare (S$)', 'Probability', num_bin, Y09, a1_dir + '/Y2009_monthly_fares') one_histogram((8,6), '', 'Fare (S$)', 'Probability', num_bin, Y10, a1_dir + '/Y2010_monthly_fares')
def run(): ir = 'influenceGraph' # tm = 'spendingTime' for year in ['2009', '2010', '2011', '2012']: check_dir_create(dpaths[tm, year, ir]) # yyyy = '20%02d' % 9 for tfZ_TP_fn in get_all_files(tfZ_TP_dpath, '%s%s*.csv' % (tfZ_TP_prefix, yyyy)): tfZ_TP_fpath = '%s/%s' % (tfZ_TP_dpath, tfZ_TP_fn) process_file(tfZ_TP_fpath)
def run(): ir = 'influenceGraph' # for tm in ['spendingTime', 'roamingTime']: for tm in ['spendingTime']: for year in ['2009', '2010', '2011', '2012']: check_dir_create(dpaths[tm, year, ir]) yyyy = '20%02d' % 9 for tfZ_TP_fn in get_all_files(tfZ_TP_dpath, '%s%s*.csv' % (tfZ_TP_prefix, yyyy)): tfZ_TP_fpath = '%s/%s' % (tfZ_TP_dpath, tfZ_TP_fn) process_file(tfZ_TP_fpath)
def run(): check_dir_create(statisticsAllDrivers_ns_dpath) # # process_tripBased() # filter_tripBased() # process_dayBased() filter_dayBased() # aggregate_monthBased() # aggregate_yearBased()
def run(): a2_dir = charts_dir + '/b_aggregated_a2 number of immediate decision' check_dir_create(a2_dir) for LABEL, label, fn in [('AP', 'ap', ap_tm_num_dur_fare_fn), ('NS', 'ns', ns_tm_num_dur_fare_fn)]: whole_trips = pd.read_csv(fn) Y09 = whole_trips[(whole_trips['yy'] == 9)] Y10 = whole_trips[(whole_trips['yy'] == 10)] # Y09_gb, Y10_gb = Y09.groupby(['trip-mode']), Y10.groupby(['trip-mode']) Y09_tm_num, Y10_tm_num = Y09_gb.sum()['total-num'], Y10_gb.sum( )['total-num'] _data = [[int(Y09_tm_num[DIn_PIn]), int(Y10_tm_num[DIn_PIn])], [int(Y09_tm_num[DIn_POut]), int(Y10_tm_num[DIn_POut])]] # row_labels, col_labels = [ 'Pick up in %s' % LABEL, 'Pick up out %s' % LABEL ], ['Y2009', 'Y2010'] bar_table((8, 6), '', '', row_labels, col_labels, _data, a2_dir + '/table_decision_at_%s' % label) # per_data1 = [ _data[1][0] / float(_data[0][0] + _data[1][0]), _data[0][0] / float(_data[0][0] + _data[1][0]) ] per_data2 = [ _data[1][1] / float(_data[0][1] + _data[1][1]), _data[0][1] / float(_data[0][1] + _data[1][1]) ] one_pie_chart('', per_data1, ['Pick up out %s' % LABEL, 'Pick up in %s' % LABEL], a2_dir + '/Y2009_decision_at_%s' % label) one_pie_chart('', per_data2, ['Pick up out %s' % LABEL, 'Pick up in %s' % LABEL], a2_dir + '/Y2010_decision_at_%s' % label) # per_data1 = [ _data[1][0] / float(_data[0][0] + _data[1][0]), _data[0][0] / float(_data[0][0] + _data[1][0]) ] per_data2 = [ _data[1][1] / float(_data[0][1] + _data[1][1]), _data[0][1] / float(_data[0][1] + _data[1][1]) ] two_pie_chart(['Pick up out %s' % LABEL, 'Pick up in %s' % LABEL], "Y2009", per_data1, "Y2010", per_data2, a2_dir + '/decision_at_%s' % label)
def run(): check_dir_create(ss_trips_dpath) # init_multiprocessor(11) count_num_jobs = 0 y = 9 for m in range(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue put_task(process_month, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): check_dir_create(productivity_dir) # init_multiprocessor(11) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_files('1007') put_task(process_files, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): check_dir_create(trips_dpath) # init_multiprocessor(11) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: # both years data are corrupted continue put_task(tripMode_prevTripTime, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): check_dir_create(shift_pro_dur_dir) # init_multiprocessor(11) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_file(yymm) put_task(process_file, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): a1_dir = charts_dir + '/b_aggregated_a1 monthly fare' check_dir_create(a1_dir) # Y09, Y10 = load_pickle_file(driver_monthly_fare_fn) num_bin = 50 # print 't statistics %.3f, p-value %.3f' % (stats.ttest_ind( Y09, Y10, equal_var=False)) # one_histogram((8, 6), '', 'Fare (S$)', 'Probability', num_bin, Y09, a1_dir + '/Y2009_monthly_fares') one_histogram((8, 6), '', 'Fare (S$)', 'Probability', num_bin, Y10, a1_dir + '/Y2010_monthly_fares')
def run(): a5_table_dir = tables_dir + '/c_individual_a5 multivariate regression' check_dir_create(a5_table_dir) for loc, stat_fn in [('ap', ftd_driver_stats_ap_fn), ('ns', ftd_driver_stats_ns_fn)]: sys.stdout = open('%s/%s' % (a5_table_dir, 'mr-%s.txt'% loc), 'w') df = pd.read_csv(stat_fn) diff_df_cn = [cn for cn in df.columns.values if cn.startswith('diff')] diff_df = df[diff_df_cn] for i, cn in enumerate(diff_df_cn): other_cns = diff_df_cn[:]; other_cns.pop(i) fomula = '%s ~ '%cn + ' + '.join(other_cns) est = smf.ols(fomula, data=diff_df).fit() print est.summary()
def run(): check_dir_create(logs_last_day_dir) # init_multiprocessor(8) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: # both years data_20160826 are corrupted continue # process_file(yymm) put_task(process_file, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): for dpath in [ssDriverTrip_dpath, ssDriverShiftProDur_dpath, ssDriverEP_ap_dpath, ssDriverEP_ns_dpath]: check_dir_create(dpath) # init_multiprocessor(11) count_num_jobs = 0 for y in xrange(10, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_files(yymm) put_task(process_files, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): for dpath in [economicProfit_ap_dpath, economicProfit_ns_dpath]: check_dir_create(dpath) init_multiprocessor(6) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_files(yymm) put_task(process_files, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): check_dir_create(logs_dir) # init_multiprocessor(8) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: # both years data_20160826 are corrupted continue # process_files(yymm) put_task(process_file, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): check_dir_create(ap_crossing_dir); check_dir_create(ns_crossing_dir) # init_multiprocessor(11) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: # both years data are corrupted continue # process_file(yymm) put_task(process_file, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): for dpath in [queueingTime_ap_dpath, queueingTime_ns_dpath]: check_dir_create(dpath) # init_multiprocessor(6) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: # both years data are corrupted continue # process_file(yymm) put_task(process_file, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): a5_table_dir = tables_dir + '/c_individual_a5 multivariate regression' check_dir_create(a5_table_dir) for loc, stat_fn in [('ap', ftd_driver_stats_ap_fn), ('ns', ftd_driver_stats_ns_fn)]: sys.stdout = open('%s/%s' % (a5_table_dir, 'mr-%s.txt' % loc), 'w') df = pd.read_csv(stat_fn) diff_df_cn = [cn for cn in df.columns.values if cn.startswith('diff')] diff_df = df[diff_df_cn] for i, cn in enumerate(diff_df_cn): other_cns = diff_df_cn[:] other_cns.pop(i) fomula = '%s ~ ' % cn + ' + '.join(other_cns) est = smf.ols(fomula, data=diff_df).fit() print est.summary()
def run(): for dpath in [ # statisticsSsDrivers_ap_dpath, statisticsSsDrivers_ns_dpath ]: check_dir_create(dpath) # ssDrivers = set() for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: # both years data are corrupted continue ssDrivers = ssDrivers.union(load_pickle_file('%s/%s%s.pkl' % (ss_drivers_dpath, ss_drivers_prefix, yymm))) # for all_dpath, ss_dpath in [ # (statisticsAllDrivers_ap_dpath, statisticsSsDrivers_ap_dpath), (statisticsAllDrivers_ns_dpath, statisticsSsDrivers_ns_dpath) ]: for all_prefix, ss_prefix in [ # (statisticsAllDriversDay_ap_prefix, statisticsSsDriversDay_ap_prefix), (statisticsAllDriversDay_ns1517_prefix, statisticsSsDriversDay_ns1517_prefix), (statisticsAllDriversDay_ns2023_prefix, statisticsSsDriversDay_ns2023_prefix), # (statisticsAllDriversMonth_ap_prefix, statisticsSsDriversMonth_ap_prefix), (statisticsAllDriversMonth_ns1517_prefix, statisticsSsDriversMonth_ns1517_prefix), (statisticsAllDriversMonth_ns2023_prefix, statisticsSsDriversMonth_ns2023_prefix), # (statisticsAllDriversTrip_ap_prefix, statisticsSsDriversTrip_ap_prefix), (statisticsAllDriversTrip_ns1517_prefix, statisticsSsDriversTrip_ns1517_prefix), (statisticsAllDriversTrip_ns2023_prefix, statisticsSsDriversTrip_ns2023_prefix), ]: for fn in get_all_files(all_dpath, '%s*' % all_prefix): period = fn[:-len('.csv')].split('-')[2] with open('%s/%s' % (all_dpath, fn), 'rt') as r_csvfile: reader = csv.reader(r_csvfile) header = reader.next() hid = {h: i for i, h in enumerate(header)} with open('%s/%s%s.csv' % (ss_dpath, ss_prefix, period), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) writer.writerow(header) for row in reader: did = int(row[hid['driverID']]) if did not in ssDrivers: continue writer.writerow(row)
def run(): a2_dir = charts_dir + '/a_overall_a2 statistics for time slots' check_dir_create(a2_dir) # whole_df = pd.read_csv(ap_tm_num_dur_fare_fn) ap_df = whole_df[(whole_df['trip-mode'] == DIn_PIn) | (whole_df['trip-mode'] == DOut_PIn)] whole_df = pd.read_csv(ns_tm_num_dur_fare_fn) ns_df = whole_df[(whole_df['trip-mode'] == DIn_PIn) | (whole_df['trip-mode'] == DOut_PIn)] for df, num_unit, num_chart_fn, fare_unit, fare_chart_fn in [ (whole_df, 1000, a2_dir + '/timeslot_wh_num', 1000000, a2_dir + '/timeslot_wh_fare'), (ap_df, 1000, a2_dir + '/timeslot_ap_num', 1000, a2_dir + '/timeslot_ap_fare'), (ns_df, 1000, a2_dir + '/timeslot_ns_num', 1000, a2_dir + '/timeslot_ns_fare') ]: df_gb = df.groupby(['hh', 'day-of-week']) # # Total Number of trips # hour_dow_totalNumTrip = df_gb.sum()['total-num'].to_frame( 'total-num-trip').reset_index() xs = range(len(TIME_SLOTS)) yss = [[0] * len(TIME_SLOTS) for _ in DAY_OF_WEEK] for hour, dow, totalNumTrip in hour_dow_totalNumTrip.values: yss[DAY_OF_WEEK.index(dow)][hour] += totalNumTrip / float(num_unit) # multiple_line_chart((12, 6), '', 'Time slot', 'Unit %s' % format(num_unit, ",d"), (xs, 0), yss, DAY_OF_WEEK, 'upper left', num_chart_fn) # # Total fare of trips # hour_dow_totalFare = df_gb.sum()['total-fare'].to_frame( 'total-fare').reset_index() xs = range(len(TIME_SLOTS)) yss = [[0] * len(TIME_SLOTS) for _ in DAY_OF_WEEK] for hour, dow, totalFare in hour_dow_totalFare.values: yss[DAY_OF_WEEK.index( dow)][hour] += (totalFare / float(CENT)) / float(fare_unit) # multiple_line_chart((12, 6), '', 'Time slot', 'S$ %s' % format(fare_unit, ",d"), (xs, 0), yss, DAY_OF_WEEK, 'upper left', fare_chart_fn)
def run(): for dpath in [log_dpath, log_last_day_dpath, crossingTime_ap_dpath, crossingTime_ns_dpath]: check_dir_create(dpath) # init_multiprocessor(11) count_num_jobs = 0 for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: # both years data_20160826 are corrupted continue put_task(log_location_labeling, [yymm]) # put_task(log_last_day, [yymm]) # put_task(process_file, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): init_multiprocessor(6) count_num_jobs = 0 tm = 'spendingTime' # for year in ['2009', '2010', '2011', '2012']: for year in ['2009']: gds_dpath = dpaths[tm, year, 'groupDriverStats'] check_dir_create(gds_dpath) # gm_dpath = dpaths[tm, year, 'groupMarginal'] gm_prefix = prefixs[tm, year, 'groupMarginal'] for fn in get_all_files(gm_dpath, '%s*.csv' % gm_prefix): _, _, _, gn = fn[:-len('.csv')].split('-') # process_file(tm, year, gn) put_task(process_file, [tm, year, gn]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): for dpath in [ ssDriverTrip_dpath, ssDriverShiftProDur_dpath, ssDriverEP_ap_dpath, ssDriverEP_ns_dpath ]: check_dir_create(dpath) # init_multiprocessor(11) count_num_jobs = 0 for y in xrange(10, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue # process_files(yymm) put_task(process_files, [yymm]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): for dpath in [productivity_dpath, shiftProDur_dpath]: check_dir_create(dpath) # # init_multiprocessor(11) # count_num_jobs = 0 # for y in xrange(9, 11): # for m in xrange(1, 13): # yymm = '%02d%02d' % (y, m) # if yymm in ['0912', '1010']: # continue # # process_file(yymm) # # put_task(productive_duration, [yymm]) # put_task(process_files, [yymm]) # count_num_jobs += 1 # end_multiprocessor(count_num_jobs) # summary()
def run(): global a3_dir a3_dir = charts_dir + '/b_aggregated_a3 queueing time' check_dir_create(a3_dir) # # Airport # Y09, Y10 = pd.read_csv(Y09_ap_trips), pd.read_csv(Y10_ap_trips) draw_cumulative_histogram(Y09, Y10, 'ap') monthly_queueing_time_in_only(Y09, Y10, 'ap', TIME_SLOTS) monthly_queueing_time_num_trips(Y09, Y10, 'ap', TIME_SLOTS) # # Night safari # Y09, Y10 = pd.read_csv(Y09_ns_trips), pd.read_csv(Y10_ns_trips) Y09, Y10 = Y09[(Y09['hh'] > 18)], Y10[(Y10['hh'] > 18)] draw_cumulative_histogram(Y09, Y10, 'ns') monthly_queueing_time_in_only(Y09, Y10, 'ns', range(19, 24)) monthly_queueing_time_num_trips(Y09, Y10, 'ns', range(19, 24))
def run(): init_multiprocessor(6) count_num_jobs = 0 for tm in ['spendingTime']: # for year in ['2009', '2010', '2011', '2012']: for year in ['2009']: gm_dpath = dpaths[tm, year, 'groupMarginal'] check_dir_create(gm_dpath) # gp_dpath = dpaths[tm, year, 'groupPartition'] gp_prefix = prefixs[tm, year, 'groupPartition'] for fn in get_all_files(gp_dpath, '%s*.pkl' % gp_prefix): _, _, _, gn = fn[:-len('.pkl')].split('-') if gn == 'drivers' or gn == 'original': continue # process_file(tm, year, gn) put_task(process_file, [tm, year, gn]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): a2_dir = charts_dir + '/a_overall_a2 statistics for time slots' check_dir_create(a2_dir) # whole_df = pd.read_csv(ap_tm_num_dur_fare_fn) ap_df = whole_df[(whole_df['trip-mode'] == DIn_PIn) | (whole_df['trip-mode'] == DOut_PIn)] whole_df = pd.read_csv(ns_tm_num_dur_fare_fn) ns_df = whole_df[(whole_df['trip-mode'] == DIn_PIn) | (whole_df['trip-mode'] == DOut_PIn)] for df, num_unit, num_chart_fn, fare_unit, fare_chart_fn in [(whole_df, 1000, a2_dir + '/timeslot_wh_num', 1000000, a2_dir + '/timeslot_wh_fare'), (ap_df, 1000, a2_dir + '/timeslot_ap_num', 1000, a2_dir + '/timeslot_ap_fare'), (ns_df, 1000, a2_dir + '/timeslot_ns_num', 1000, a2_dir + '/timeslot_ns_fare')]: df_gb = df.groupby(['hh', 'day-of-week']) # # Total Number of trips # hour_dow_totalNumTrip = df_gb.sum()['total-num'].to_frame('total-num-trip').reset_index() xs = range(len(TIME_SLOTS)) yss = [[0] * len(TIME_SLOTS) for _ in DAY_OF_WEEK] for hour, dow, totalNumTrip in hour_dow_totalNumTrip.values: yss[DAY_OF_WEEK.index(dow)][hour] += totalNumTrip / float(num_unit) # multiple_line_chart((12, 6), '', 'Time slot', 'Unit %s' % format(num_unit, ",d"), (xs, 0), yss, DAY_OF_WEEK, 'upper left', num_chart_fn) # # Total fare of trips # hour_dow_totalFare = df_gb.sum()['total-fare'].to_frame('total-fare').reset_index() xs = range(len(TIME_SLOTS)) yss = [[0] * len(TIME_SLOTS) for _ in DAY_OF_WEEK] for hour, dow, totalFare in hour_dow_totalFare.values: yss[DAY_OF_WEEK.index(dow)][hour] += (totalFare / float(CENT)) / float(fare_unit) # multiple_line_chart((12, 6), '', 'Time slot', 'S$ %s' % format(fare_unit, ",d"), (xs, 0), yss, DAY_OF_WEEK, 'upper left', fare_chart_fn)
def run(): check_dir_create(tfZ_TP_dpath) numWorker = 6 init_multiprocessor(numWorker) count_num_jobs = 0 numReducers = numWorker * 10 # yyyy = '20%02d' % 12 logger.info('loading driversRelations %s' % yyyy) driversRelations = load_pickle_file(driversRelations_fpaths[yyyy]) whole_drivers = driversRelations.keys() driver_subsets = [[] for _ in range(numReducers)] for i, did in enumerate(whole_drivers): driver_subsets[i % numReducers].append(did) for i, driver_subset in enumerate(driver_subsets): # process_files(yyyy, i, driver_subset, driversRelations) pickUp_drivers = set() for did1 in driver_subset: pickUp_drivers = pickUp_drivers.union(driversRelations[did1]) put_task(process_files, [yyyy, i, driver_subset, pickUp_drivers]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): a1_dir = charts_dir + '/a_overall_a1 fare and duration per trip' check_dir_create(a1_dir) for fn, l, x0_label in [(ap_tm_num_dur_fare_fn, 'ap', 'Airport'), (ns_tm_num_dur_fare_fn, 'ns', 'Night safari')]: trip_df = pd.read_csv(fn) gb = trip_df.groupby('trip-mode') # # calculate statistics # in_num = gb.sum()['total-num'][DIn_PIn] + gb.sum( )['total-num'][DOut_PIn] in_fare = (gb.sum()['total-fare'][DIn_PIn] + gb.sum()['total-fare'][DOut_PIn]) / float(CENT) in_dur = (gb.sum()['total-dur'][DIn_PIn] + gb.sum()['total-dur'][DOut_PIn]) / float(SEC60) # in_fare_per_trip = in_fare / float(in_num) in_dur_per_trip = in_dur / float(in_num) # out_num = gb.sum()['total-num'][DIn_POut] + gb.sum( )['total-num'][DOut_POut] out_fare = (gb.sum()['total-fare'][DIn_POut] + gb.sum()['total-fare'][DOut_POut]) / float(CENT) out_dur = (gb.sum()['total-dur'][DIn_POut] + gb.sum()['total-dur'][DOut_POut]) / float(SEC60) # out_fare_per_trip = out_fare / float(out_num) out_dur_per_trip = out_dur / float(out_num) # # charts # _data = [in_fare_per_trip, out_fare_per_trip] simple_barchart([x0_label, 'Other areas'], 'S$', _data, a1_dir + '/fare_per_trip_%s' % l) # _data = [in_dur_per_trip, out_dur_per_trip] simple_barchart([x0_label, 'Other areas'], 'Minute', _data, a1_dir + '/dur_per_trip_%s' % l)
def run(): init_multiprocessor(6) count_num_jobs = 0 tm = 'spendingTime' # for tm in ['spendingTime', 'roamingTime']: # for year in ['2009', '2010', '2011', '2012']: for year in ['2009']: gz_dpath = dpaths[tm, year, 'groupZones'] check_dir_create(gz_dpath) # gt_dpath = dpaths[tm, year, 'groupTrips'] gt_prefix = prefixs[tm, year, 'groupTrips'] for fn in get_all_files(gt_dpath, '%s*' % gt_prefix): if len(fn[:-len('.csv')].split('-')) != 4: continue _, _, _, gn = fn[:-len('.csv')].split('-') if gn == 'X': continue gt_fpath = '%s/%s' % (gt_dpath, fn) # process_file(tm, year, gt_fpath) put_task(process_file, [tm, year, gt_fpath]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
def run(): init_multiprocessor(6) count_num_jobs = 0 tm = 'baseline' # for tm in ['spendingTime', 'roamingTime']: # for year in ['2009', '2010', '2011', '2012']: for year in ['2009']: gz_dpath = dpaths[tm, year, 'groupZones'] check_dir_create(gz_dpath) # gt_dpath = dpaths[tm, year, 'groupTrips'] gt_prefix = prefixs[tm, year, 'groupTrips'] for fn in get_all_files(gt_dpath, '%s*' % gt_prefix): if len(fn[:-len('.csv')].split('-')) != 4: continue _, _, _, gn = fn[:-len('.csv')].split('-') if gn == 'X': continue gt_fpath = '%s/%s' % (gt_dpath, fn) # process_file(tm, year, gt_fpath) put_task(process_file, [tm, year, gt_fpath]) count_num_jobs += 1 end_multiprocessor(count_num_jobs)
import os, sys sys.path.append(os.path.dirname(os.path.realpath(__file__)) + '/../..') # from information_boards import taxi_data, summary_dir from taxi_common.file_handling_functions import check_dir_create # trips_dpath, trip_prefix = '%s/%s' % (taxi_data, 'trips'), 'trip-' # overall_summary_dptah = '%s/%s' % (summary_dir, 'overall_analysis') check_dir_create(overall_summary_dptah) ap_tm_num_dur_fare_fpath = '%s/%s' % (overall_summary_dptah, 'ap-tm-num-dur-fare.csv') ns_tm_num_dur_fare_fpath = '%s/%s' % (overall_summary_dptah, 'ns-tm-num-dur-fare.csv') # NUM, DUR, FARE = range(3)
def run(yymm): check_dir_create(prevDriversDefined_dpath) process_month(yymm)
from __future__ import division import os import sys sys.path.append(os.path.dirname(os.path.realpath(__file__)) + '/../..') # from taxi_common.file_handling_functions import check_dir_create # from information_boards.__init__ import taxi_data, summary_dir from information_boards.old_codes.a_overall_analysis import trips_dpath check_dir_create(taxi_data) check_dir_create(summary_dir) # # Log # logs_dir, log_prefix = '%s/%s' % (taxi_data, 'logs'), 'log-' logs_last_day_dir, log_last_day_prefix = '%s/%s' % (logs_dir, 'logs_last_day'), 'log-last-day-' ap_crossing_dir, ap_crossing_prefix = '%s/%s' % (logs_dir, 'ap_crossing'), 'ap-crossing-time-' ns_crossing_dir, ns_crossing_prefix = '%s/%s' % (logs_dir, 'ns_crossing'), 'ns-crossing-time-' # # Trip # ap_trips_dir, ap_trip_prefix = '%s/%s' % (trips_dpath, 'ap_trips'), 'ap-trip-' ns_trips_dir, ns_trip_prefix = '%s/%s' % (trips_dpath, 'ns_trips'), 'ns-trip-' # # Economic_profit # ep_dir = '%s/%s' % (taxi_data, 'trips_wEP') ap_ep_dir, ap_ep_prefix = '%s/%s' % (ep_dir, 'ap_ep'), 'ap-ep-'
def run(): print 'start' check_dir_create(com_dir) # yyyy = '2009' la_fn = '2009-CD(184)-N(7003)-E(5717371).pkl' la_fpath = '%s/%s' % (la_dir, la_fn) _, str_CD, _, _ = la_fn[:-len('.pkl')].split('-') CD = int(str_CD[len('CD('):-len(')')]) print 'pick file loading...' pairs_day_counting = load_pickle_file(la_fpath) print 'finished' for thD in [18, 36, 55, 73, 82, 92]: thD_dpath = '%s/%s' % (com_dir, '2009-CD(%d)-thD(%d)' % (CD, thD)) check_dir_create(thD_dpath) summary_fpath = '%s/%s-CD(%d)-thD(%d)-community-summary.csv' % (thD_dpath, yyyy, CD, thD) glayout_fpath = '%s/%s-CD(%d)-thD(%d)-glayout.pkl' % (thD_dpath, yyyy, CD, thD) with open(summary_fpath, 'wb') as w_csvfile: writer = csv.writer(w_csvfile, lineterminator='\n') new_headers = ['com-name', 'num-nodes', 'num-edges', 'tie-strength(# of days encounter / # of drivers)'] writer.writerow(new_headers) # nxG = nx.Graph() for (k0, k1), num_days in pairs_day_counting.iteritems(): if num_days < thD: continue nxG.add_edge(k0, k1, weight=num_days) print 'Whole graph pickling ...', yyyy, CD, thD nx.write_gpickle(nxG, '%s/%s-CD(%d)-thD(%d)-whole-N(%d)-E(%d).pkl' % (thD_dpath, yyyy, CD, thD, len(nxG.nodes()), len(nxG.edges()))) n_label, n_comId = [], [] nxId_igId = {} ig_nid = 0 print 'Partitioning ...' partition = community.best_partition(nxG) for i, com in enumerate(set(partition.values())): list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com] print i, 'Saving sub-graph ...' sub_nxG = nxG.subgraph(list_nodes) com_name = 'COM(%d)' % i com_fpath = '%s/%s-CD(%d)-thD(%d)-%s-N(%d)-E(%d).pkl' % (thD_dpath, yyyy, CD, thD, com_name, len(sub_nxG.nodes()), len(sub_nxG.edges())) nx.write_gpickle(sub_nxG, com_fpath) _, _, weight = zip(*list(sub_nxG.edges_iter(data='weight', default=1))) num_nodes, num_edges = len(sub_nxG), len(weight) with open(summary_fpath, 'a') as w_csvfile: writer = csv.writer(w_csvfile, lineterminator='\n') writer.writerow([com_name, num_nodes, num_edges, sum(weight) / float(num_nodes)]) # print i, 'labeling...' for n in sub_nxG.nodes(): n_label.append(n) n_comId.append(i) nxId_igId[n] = ig_nid ig_nid += 1 # if len(nxG.nodes()) < 1000: print 'Layout calculating...' print datetime.datetime.now() Edges = [(nxId_igId[n0], nxId_igId[n1]) for (n0, n1) in nxG.edges()] print 'finish edge converting', len(Edges) print datetime.datetime.now() igG = ig.Graph(Edges, directed=False) layt = igG.layout('kk', dim=3) print 'finish layout calculation' print datetime.datetime.now() # save_pickle_file(glayout_fpath, [n_label, n_comId, layt, Edges]) else: save_pickle_file(glayout_fpath, [])
import csv, datetime logger = get_logger() numWorker = 6 numReducers = numWorker * 10 # year = '20%02d' % 9 depVar = 'roamingTime' # depVar = 'interTravelTime' # if_dpath = dpaths['prevDrivers'] if_prefixs = prefixs['prevDrivers'] of_dpath = dpaths[depVar, 'priorPresence'] of_prefixs = prefixs[depVar, 'priorPresence'] try: check_dir_create(of_dpath) except OSError: pass def run(moduloIndex): logger.info('loading driversRelations %s; %s' % (year, depVar)) superSet_fpath = '%s/%sFiltered-superSet-%s%s.pkl' % (if_dpath, depVar, if_prefixs, year) driversRelations = load_pickle_file(superSet_fpath) whole_drivers = driversRelations.keys() driver_subsets = [[] for _ in range(numReducers)] for i, did in enumerate(whole_drivers): driver_subsets[i % numReducers].append(did) for i, driver_subset in enumerate(driver_subsets): if i % numWorker != moduloIndex: continue
logger = get_logger() numWorker = 64 # year = '20%02d' % 9 depVar = 'roamingTime' # depVar = 'interTravelTime' # # if_dpath = dpaths[depVar, 'individual'] if_prefix = prefixs[depVar, 'individual'] of_dpath = dpaths[depVar, 'indPartition'] of_prefix = prefixs[depVar, 'indPartition'] try: check_dir_create(of_dpath) except OSError: pass def run(): gp_summary_fpath = '%s/%ssummary.csv' % (of_dpath, of_prefix) gp_original_fpath = '%s/%soriginal.pkl' % (of_dpath, of_prefix) gp_drivers_fpath = '%s/%sdrivers.pkl' % (of_dpath, of_prefix) # with open(gp_summary_fpath, 'wt') as w_csvfile: writer = csv.writer(w_csvfile, lineterminator='\n') writer.writerow([ 'groupName', 'numDrivers', 'numRelations', 'graphComplexity', 'tieStrength', 'contribution', 'benCon' ])
import os, sys sys.path.append(os.path.dirname(os.path.realpath(__file__)) + '/..') # from taxi_common.__init__ import get_taxi_home_path taxi_home = get_taxi_home_path() # from taxi_common.file_handling_functions import check_dir_create taxi_data = os.path.dirname(os.path.realpath(__file__)) + '/data_20160826' check_dir_create(taxi_data) # logs_dir = taxi_data + '/logs' trips_dir = taxi_data + '/trips' for _dir in [logs_dir, trips_dir]: check_dir_create(_dir) FREE = 0 HOUR1, HOUR12 = 1, 12 grid_info_fn = 'hl_vl_zones.pkl' # get_processed_log_fn = lambda time_from, time_to: 'processed-log-%s-%s.csv' % ( get_str_timeformat(time_from), get_str_timeformat(time_to)) get_processed_trip_fn = lambda time_from, time_to: 'processed-trip-%s-%s.csv' % ( get_str_timeformat(time_from), get_str_timeformat(time_to)) def get_str_timeformat(time_tuple): return str(time_tuple[0]) + ''.join(['%02d' % d for d in time_tuple[1:]]) #
import os, sys sys.path.append(os.path.dirname(os.path.realpath(__file__)) + '/..') # from taxi_common.__init__ import get_taxi_home_path taxi_home = get_taxi_home_path() # from taxi_common.file_handling_functions import check_dir_create taxi_data = os.path.dirname(os.path.realpath(__file__)) + '/z_data'; try: check_dir_create(taxi_data) except OSError: pass shift_dpath, shift_prefix = '/home/sfcheng/toolbox/results', 'shift-hour-state-' dpaths, prefixs = {}, {} for irName in ['roamingNinterTravel', 'prevDrivers', 'driverLog', 'driverTrip']: dpaths[irName] = '%s/%s' % (taxi_data, irName) prefixs[irName] = '%s-' % irName for depVar in ['roamingTime', 'interTravelTime']: for irName in ['priorPresence', 'sigRelation', 'individual', 'indPartition', 'influenceGraph', 'graphPartition', 'comTrips', 'comEvolution']: dpaths[depVar, irName] = '%s/%s/%s' % (taxi_data, depVar, irName) prefixs[depVar, irName] = '%s-%s-' % (depVar, irName) # MON, TUE, WED, THR, FRI, SAT, SUN = range(7) AM10, PM8 = 10, 20 MINUTES40 = 40 * 60