def run(): check_dir_create(summary_dir) # Y09 = pd.read_csv("%s/%s" % (airport_trips_dir, "Y09-airport-trip.csv")) Y10 = pd.read_csv("%s/%s" % (airport_trips_dir, "Y10-airport-trip.csv")) Y09 = Y09[(Y09["did"] != -1)] Y10 = Y10[(Y10["did"] != -1)] # Y09_mm_did_gb, Y10_mm_did_gb = Y09.groupby(["mm", "did"]), Y10.groupby(["mm", "did"]) Y09_ap_fares = [x / CENT for x in list(Y09_mm_did_gb.sum()["fare"])] Y10_ap_fares = [x / CENT for x in list(Y10_mm_did_gb.sum()["fare"])] # save_pickle_file(driver_monthly_fare_ap_trips, [Y09_ap_fares, Y10_ap_fares])
def run(): check_dir_create(summary_dir) # Y09 = pd.read_csv('%s/%s' % (airport_trips_dir, 'Y09-airport-trip.csv')) Y10 = pd.read_csv('%s/%s' % (airport_trips_dir, 'Y10-airport-trip.csv')) Y09 = Y09[(Y09['did'] != -1)] Y10 = Y10[(Y10['did'] != -1)] # Y09_mm_did_gb, Y10_mm_did_gb = Y09.groupby(['mm', 'did' ]), Y10.groupby(['mm', 'did']) Y09_ap_fares = [x / CENT for x in list(Y09_mm_did_gb.sum()['fare'])] Y10_ap_fares = [x / CENT for x in list(Y10_mm_did_gb.sum()['fare'])] # save_pickle_file(driver_monthly_fare_ap_trips, [Y09_ap_fares, Y10_ap_fares])
def run(): check_dir_create(summary_dir) # Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare = [], [] for y in xrange(9, 11): for m in xrange(1, 13): yymm = "%02d%02d" % (y, m) if yymm in ["0912", "1010"]: continue trip_df = pd.read_csv("%s/whole-trip-%s.csv" % (trips_dir, yymm)) trip_df = trip_df[(trip_df["did"] != -1)] fares = [x / CENT for x in list(trip_df.groupby(["did"]).sum()["fare"])] if y == 9: Y09_driver_total_monthly_fare += fares else: Y10_driver_total_monthly_fare += fares save_pickle_file(monthly_fare_summary, [Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare])
def run(): check_dir_create(tables_dir) write_text_file(ftd_overall_analysis, 'Init', True) # # Load data # y09_gen_m_prod, y10_gen_m_prod = load_picle_file(ftd_general_prod_mb) (y09_piap_m_prod, y09_piap_m_eco_prof), \ (y10_piap_m_prod, y10_piap_m_eco_prof), \ (y09_poap_m_prod, y09_poap_m_eco_prof), \ (y10_poap_m_prod, y10_poap_m_eco_prof) = load_picle_file(ftd_ap_prod_eco_prof_mb) (y09_pins_m_prod, y09_pins_m_eco_prof), \ (y10_pins_m_prod, y10_pins_m_eco_prof), \ (y09_pons_m_prod, y09_pons_m_eco_prof), \ (y10_pons_m_prod, y10_pons_m_eco_prof) = load_picle_file(ftd_ns_prod_eco_prof_mb) # my_d = { # (Measure, Year): data ('General prod.', 'Y09'): y09_gen_m_prod, ('General prod.', 'Y10'): y10_gen_m_prod, # ('AP in prod.', 'Y09'): y09_piap_m_prod, ('AP in prod.', 'Y10'): y10_piap_m_prod, ('AP out prod.', 'Y09'): y09_poap_m_prod, ('AP out prod.', 'Y10'): y10_poap_m_prod, ('AP in eco.', 'Y09'): y09_piap_m_eco_prof, ('AP in eco.', 'Y10'): y10_piap_m_eco_prof, ('AP out eco.', 'Y09'): y09_poap_m_eco_prof, ('AP out eco.', 'Y10'): y10_poap_m_eco_prof, # ('NS in prod.', 'Y09'): y09_pins_m_prod, ('NS in prod.', 'Y10'): y10_pins_m_prod, ('NS out prod.', 'Y09'): y09_pons_m_prod, ('NS out prod.', 'Y10'): y10_pons_m_prod, ('NS in eco.', 'Y09'): y09_pins_m_eco_prof, ('NS in eco.', 'Y10'): y10_pins_m_eco_prof, ('NS out eco.', 'Y09'): y09_pons_m_eco_prof, ('NS out eco.', 'Y10'): y10_pons_m_eco_prof, } # # both_year_comparison(my_d) draw_month_change(my_d)
def run(): check_dir_create(tables_dir) write_text_file(ftd_overall_analysis, 'Init', True) # # Load data # y09_gen_m_prod, y10_gen_m_prod = load_picle_file(ftd_general_prod_mb) (y09_piap_m_prod, y09_piap_m_eco_prof), \ (y10_piap_m_prod, y10_piap_m_eco_prof), \ (y09_poap_m_prod, y09_poap_m_eco_prof), \ (y10_poap_m_prod, y10_poap_m_eco_prof) = load_picle_file(ftd_ap_prod_eco_prof_mb) (y09_pins_m_prod, y09_pins_m_eco_prof), \ (y10_pins_m_prod, y10_pins_m_eco_prof), \ (y09_pons_m_prod, y09_pons_m_eco_prof), \ (y10_pons_m_prod, y10_pons_m_eco_prof) = load_picle_file(ftd_ns_prod_eco_prof_mb) # my_d = { # (Measure, Year): data ('General prod.', 'Y09'): y09_gen_m_prod, ('General prod.', 'Y10'): y10_gen_m_prod, # ('AP in prod.' , 'Y09'): y09_piap_m_prod, ('AP in prod.' , 'Y10'): y10_piap_m_prod, ('AP out prod.' , 'Y09'): y09_poap_m_prod, ('AP out prod.' , 'Y10'): y10_poap_m_prod, ('AP in eco.' , 'Y09'): y09_piap_m_eco_prof, ('AP in eco.' , 'Y10'): y10_piap_m_eco_prof, ('AP out eco.' , 'Y09'): y09_poap_m_eco_prof, ('AP out eco.' , 'Y10'): y10_poap_m_eco_prof, # ('NS in prod.' , 'Y09'): y09_pins_m_prod, ('NS in prod.' , 'Y10'): y10_pins_m_prod, ('NS out prod.' , 'Y09'): y09_pons_m_prod, ('NS out prod.' , 'Y10'): y10_pons_m_prod, ('NS in eco.' , 'Y09'): y09_pins_m_eco_prof, ('NS in eco.' , 'Y10'): y10_pins_m_eco_prof, ('NS out eco.' , 'Y09'): y09_pons_m_eco_prof, ('NS out eco.' , 'Y10'): y10_pons_m_eco_prof, } # # both_year_comparison(my_d) draw_month_change(my_d)
def run(): check_dir_create(summary_dir) # Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare = [], [] for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue trip_df = pd.read_csv('%s/whole-trip-%s.csv' % (trips_dir, yymm)) trip_df = trip_df[(trip_df['did'] != -1)] fares = [ x / CENT for x in list(trip_df.groupby(['did']).sum()['fare']) ] if y == 9: Y09_driver_total_monthly_fare += fares else: Y10_driver_total_monthly_fare += fares save_pickle_file( monthly_fare_summary, [Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare])
def run(): check_dir_create(summary_dir) # cur_timestamp = datetime.datetime(2008, 12, 31, 23) last_timestamp = datetime.datetime(2011, 1, 1, 0) hp_summary, time_period_order = {}, [] while cur_timestamp < last_timestamp: cur_timestamp += datetime.timedelta(hours=1) yyyy, mm, dd, hh = cur_timestamp.year, cur_timestamp.month, cur_timestamp.day, cur_timestamp.hour if yyyy == 2009 and mm == 12: continue if yyyy == 2010 and mm == 10: continue k = (str(yyyy - 2000), str(mm), str(dd), str(hh)) hp_summary[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE, \ AP_DUR, AP_FARE, AP_QUEUE, \ NS_DUR, NS_FARE, NS_QUEUE]))] time_period_order.append(k) # yy_l, mm_l, dd_l, hh_l = 'yy', 'mm', 'dd', 'hh' # General for fn in get_all_files(general_dur_fare_dir, general_dur_fare_prefix, '.csv'): print fn with open('%s/%s' % (general_dur_fare_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h: i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[ hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][GEN_DUR] += eval(row[hid['gen-duration']]) hp_summary[k][GEN_FARE] += eval(row[hid['gen-fare']]) # Aiport for fn in get_all_files(ap_dur_fare_q_time_dir, ap_dur_fare_q_time_prefix, '.csv'): print fn with open('%s/%s' % (ap_dur_fare_q_time_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h: i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[ hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][AP_DUR] += eval(row[hid['ap-duration']]) hp_summary[k][AP_FARE] += eval(row[hid['ap-fare']]) hp_summary[k][AP_QUEUE] += eval(row[hid['ap-queue-time']]) # Night Safari for fn in get_all_files(ns_dur_fare_q_time_dir, ns_dur_fare_q_time_prefix, '.csv'): print fn with open('%s/%s' % (ns_dur_fare_q_time_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h: i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[ hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][NS_DUR] += eval(row[hid['ns-duration']]) hp_summary[k][NS_FARE] += eval(row[hid['ns-fare']]) hp_summary[k][NS_QUEUE] += eval(row[hid['ns-queue-time']]) # Summary print 'summary' zero_dur = [] with open(hourly_productivities, 'wt') as w_csvfile: writer = csv.writer(w_csvfile) header = [ 'yy', 'mm', 'dd', 'hh', 'gen-duration', 'gen-fare', 'ap-duration', 'ap-fare', 'ap-queue-time', 'ns-duration', 'ns-fare', 'ns-queue-time', 'gen-productivity', 'ap-productivity', 'ap-out-productivity', 'ns-productivity', 'ns-out-productivity' ] writer.writerow(header) for k in time_period_order: gen_dur, gen_fare, \ ap_dur, ap_fare, ap_queue, \ ns_dur, ns_fare, ns_queue = hp_summary[k] yy, mm, dd, hh = k # try: gen_prod = gen_fare / gen_dur except ZeroDivisionError: gen_prod = -1 zero_dur.append([GENERAL, k]) try: ap_prod = ap_fare / (ap_dur + ap_queue) except ZeroDivisionError: ap_prod = -1 zero_dur.append([AIRPORT, k]) ap_out_prod = (gen_fare - ap_fare) / (gen_dur - (ap_dur + ap_queue)) try: ns_prod = ns_fare / (ns_dur + ns_queue) except ZeroDivisionError: ns_prod = -1 zero_dur.append([NIGHTSAFARI, k]) ns_out_prod = (gen_fare - ns_fare) / (gen_dur - (ns_dur + ns_queue)) # writer.writerow([ yy, mm, dd, hh, gen_dur, gen_fare, ap_dur, ap_fare, ap_queue, ns_dur, ns_fare, ns_queue, gen_prod, ap_prod, ap_out_prod, ns_prod, ns_out_prod ]) # save_pickle_file(zero_duration_time_slots, zero_dur)
def run(): def difference(data0, data1): diff = {} for k, v in data0.iteritems(): diff[k] = data1[k] - v return diff check_dir_create(tables_dir) write_text_file(ftd_group_analysis, 'Init', True) # # Load data_20160826 # y09_gen_d_prod_for_ap, y10_gen_d_prod_for_ap = load_picle_file( ftd_general_prod_db_for_ap) y09_gen_d_prod_for_ns, y10_gen_d_prod_for_ns = load_picle_file( ftd_general_prod_db_for_ns) (y09_piap_d_prod, y09_piap_d_eco_prof), \ (y10_piap_d_prod, y10_piap_d_eco_prof), \ (y09_poap_d_prod, y09_poap_d_eco_prof), \ (y10_poap_d_prod, y10_poap_d_eco_prof) = load_picle_file(ftd_ap_prod_eco_prof_db) (y09_pins_d_prod, y09_pins_d_eco_prof), \ (y10_pins_d_prod, y10_pins_d_eco_prof), \ (y09_pons_d_prod, y09_pons_d_eco_prof), \ (y10_pons_d_prod, y10_pons_d_eco_prof) = load_picle_file(ftd_ns_prod_eco_prof_db) # diffs_for_ap = { 'diff_g_prod_for_ap': difference(y09_gen_d_prod_for_ap, y10_gen_d_prod_for_ap), # 'diff_apin_prod': difference(y09_piap_d_prod, y10_piap_d_prod), 'diff_apout_prod': difference(y09_poap_d_prod, y10_poap_d_prod), 'diff_apin_eco': difference(y09_piap_d_eco_prof, y10_piap_d_eco_prof), 'diff_apout_eco': difference(y09_poap_d_eco_prof, y10_poap_d_eco_prof), } diffs_for_ns = { 'diff_g_prod_for_ns': difference(y09_gen_d_prod_for_ns, y10_gen_d_prod_for_ns), # 'diff_nsin_prod': difference(y09_pins_d_prod, y10_pins_d_prod), 'diff_nsout_prod': difference(y09_pons_d_prod, y10_pons_d_prod), 'diff_nsin_eco': difference(y09_pins_d_eco_prof, y10_pins_d_eco_prof), 'diff_nsout_eco': difference(y09_pons_d_eco_prof, y10_pons_d_eco_prof) } # write_text_file( ftd_group_analysis, 'Airport analysis ---------------------------------------------------') an_analysis(diffs_for_ap) write_text_file( ftd_group_analysis, '\nNight safari analysis ---------------------------------------------------' ) ns_analysis(diffs_for_ns)
def run(): check_dir_create(summary_dir) # cur_timestamp = datetime.datetime(2008, 12, 31, 23) last_timestamp = datetime.datetime(2011, 1, 1, 0) hp_summary, time_period_order = {}, [] while cur_timestamp < last_timestamp: cur_timestamp += datetime.timedelta(hours=1) yyyy, mm, dd, hh = cur_timestamp.year, cur_timestamp.month, cur_timestamp.day, cur_timestamp.hour if yyyy == 2009 and mm == 12: continue if yyyy == 2010 and mm == 10: continue k = (str(yyyy - 2000), str(mm), str(dd), str(hh)) hp_summary[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE, \ AP_DUR, AP_FARE, AP_QUEUE, \ NS_DUR, NS_FARE, NS_QUEUE]))] time_period_order.append(k) # yy_l, mm_l, dd_l, hh_l = 'yy', 'mm', 'dd', 'hh' # General for fn in get_all_files(general_dur_fare_dir, general_dur_fare_prefix, '.csv'): print fn with open('%s/%s' % (general_dur_fare_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h : i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][GEN_DUR] += eval(row[hid['gen-duration']]) hp_summary[k][GEN_FARE] += eval(row[hid['gen-fare']]) # Aiport for fn in get_all_files(ap_dur_fare_q_time_dir, ap_dur_fare_q_time_prefix, '.csv'): print fn with open('%s/%s' % (ap_dur_fare_q_time_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h : i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][AP_DUR] += eval(row[hid['ap-duration']]) hp_summary[k][AP_FARE] += eval(row[hid['ap-fare']]) hp_summary[k][AP_QUEUE] += eval(row[hid['ap-queue-time']]) # Night Safari for fn in get_all_files(ns_dur_fare_q_time_dir, ns_dur_fare_q_time_prefix, '.csv'): print fn with open('%s/%s' % (ns_dur_fare_q_time_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h : i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][NS_DUR] += eval(row[hid['ns-duration']]) hp_summary[k][NS_FARE] += eval(row[hid['ns-fare']]) hp_summary[k][NS_QUEUE] += eval(row[hid['ns-queue-time']]) # Summary print 'summary' zero_dur = [] with open(hourly_productivities, 'wt') as w_csvfile: writer = csv.writer(w_csvfile) header = ['yy', 'mm', 'dd', 'hh', 'gen-duration', 'gen-fare', 'ap-duration', 'ap-fare', 'ap-queue-time', 'ns-duration', 'ns-fare', 'ns-queue-time', 'gen-productivity', 'ap-productivity', 'ap-out-productivity', 'ns-productivity', 'ns-out-productivity'] writer.writerow(header) for k in time_period_order: gen_dur, gen_fare, \ ap_dur, ap_fare, ap_queue, \ ns_dur, ns_fare, ns_queue = hp_summary[k] yy, mm, dd, hh = k # try: gen_prod = gen_fare / gen_dur except ZeroDivisionError: gen_prod = -1 zero_dur.append([GENERAL, k]) try: ap_prod = ap_fare / (ap_dur + ap_queue) except ZeroDivisionError: ap_prod = -1 zero_dur.append([AIRPORT, k]) ap_out_prod = (gen_fare - ap_fare) / (gen_dur - (ap_dur + ap_queue)) try: ns_prod = ns_fare / (ns_dur + ns_queue) except ZeroDivisionError: ns_prod = -1 zero_dur.append([NIGHTSAFARI, k]) ns_out_prod = (gen_fare - ns_fare) / (gen_dur - (ns_dur + ns_queue)) # writer.writerow([yy, mm, dd, hh, gen_dur, gen_fare, ap_dur, ap_fare, ap_queue, ns_dur, ns_fare, ns_queue, gen_prod, ap_prod, ap_out_prod, ns_prod, ns_out_prod]) # save_pickle_file(zero_duration_time_slots, zero_dur)