def process_file(fn): _, yymm = fn[:-len('.csv')].split('-') # print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not = {}, {} vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not = {}, {} if yymm not in ['0901', '1001', '1011']: path_to_last_day_csv_file = None temp_csv_files = get_all_files(log_last_day_dir, '', '.csv') prev_fn = None y, m = int(yymm[:2]), int(yymm[2:]) prev_m = m - 1 prev_yymm = '%02d%02d' % (y, prev_m) for temp_fn in temp_csv_files: if temp_fn.startswith('log-last-day-%s' % prev_yymm): prev_fn = temp_fn break assert prev_fn, yymm path_to_last_day_csv_file = '%s/%s' % (log_last_day_dir, prev_fn) vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not, vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not = \ record_crossing_time(path_to_last_day_csv_file, vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not, vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not) path_to_csv_file = '%s/%s' % (logs_dir, fn) vehicle_ap_crossing_time_from_out_to_in, _, vehicle_ns_crossing_time_from_out_to_in, _ = \ record_crossing_time(path_to_csv_file, vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not, vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not) # save_pickle_file('%s/ap-crossing-time-%s.pkl' % (logs_dir, yymm), vehicle_ap_crossing_time_from_out_to_in) save_pickle_file('%s/ns-crossing-time-%s.pkl' % (logs_dir, yymm), vehicle_ns_crossing_time_from_out_to_in) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_file(fn): _, _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) # is_driver_vehicle = load_picle_file('%s/driver-vehicle-%s.pkl' % (shifts_dir, yymm)) full_drivers = set() with open('%s/%s' % (shifts_dir, fn), 'rt') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_yy, id_mm, id_dd, id_hh = headers.index('yy'), headers.index('mm'), headers.index('dd'), headers.index('hh') id_vid, id_did = headers.index('vid'), headers.index('did') id_pro_dur, id_x_pro_dur = headers.index('pro-dur'), headers.index('x-pro-dur') with open('%s/shift-full-time-%s.csv' % (full_shift_dir, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) new_headers = ['year', 'month', 'day', 'hour', 'vehicle-id', 'driver-id', 'productive-duration', 'x-productive-duration'] writer.writerow(new_headers) for row in reader: if len(is_driver_vehicle[row[id_vid]]) > 1: continue writer.writerow([row[id_yy], row[id_mm], row[id_dd], row[id_hh], row[id_vid], row[id_did], row[id_pro_dur], row[id_x_pro_dur]]) full_drivers.add(row[id_did]) save_pickle_file('%s/full-time-drivers-%s.pkl' % (full_shift_dir, yymm), full_drivers) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def drivers(): policies = {} for fn in get_all_files(for_full_driver_dir, 'diff-pin-eco-extreme-drivers-trip-', '.csv'): _, _, _, _, _, _, yymm = fn[:-len('.csv')].split('-') with open('%s/%s' % (for_full_driver_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_ptet, id_ptel = headers.index( 'prev-trip-end-time'), headers.index('prev-trip-end-location') id_sl = headers.index('start-location') for row in reader: prev_tetime_datetime = datetime.datetime.fromtimestamp( int(row[id_ptet])) s1, s2 = prev_tetime_datetime.strftime( "%a"), prev_tetime_datetime.hour s3 = row[id_ptel] if not policies.has_key((s1, s2, s3)): policies[(s1, s2, s3)] = [0, 0] i = index_IN_OUT_AP[row[id_sl]] policies[(s1, s2, s3)][i] += 1 op_policies = {} for k, v in policies.iteritems(): op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])), '%.2f' % (v[1] / (v[0] + v[1]))) save_pickle_file('extreme_drivers_policy.pkl', op_policies)
def run(): check_dir_create(summary_dir) # Y09 = pd.read_csv("%s/%s" % (airport_trips_dir, "Y09-airport-trip.csv")) Y10 = pd.read_csv("%s/%s" % (airport_trips_dir, "Y10-airport-trip.csv")) Y09 = Y09[(Y09["did"] != -1)] Y10 = Y10[(Y10["did"] != -1)] # Y09_mm_did_gb, Y10_mm_did_gb = Y09.groupby(["mm", "did"]), Y10.groupby(["mm", "did"]) Y09_ap_fares = [x / CENT for x in list(Y09_mm_did_gb.sum()["fare"])] Y10_ap_fares = [x / CENT for x in list(Y10_mm_did_gb.sum()["fare"])] # save_pickle_file(driver_monthly_fare_ap_trips, [Y09_ap_fares, Y10_ap_fares])
def run(): check_dir_create(summary_dir) # Y09 = pd.read_csv('%s/%s' % (airport_trips_dir, 'Y09-airport-trip.csv')) Y10 = pd.read_csv('%s/%s' % (airport_trips_dir, 'Y10-airport-trip.csv')) Y09 = Y09[(Y09['did'] != -1)] Y10 = Y10[(Y10['did'] != -1)] # Y09_mm_did_gb, Y10_mm_did_gb = Y09.groupby(['mm', 'did' ]), Y10.groupby(['mm', 'did']) Y09_ap_fares = [x / CENT for x in list(Y09_mm_did_gb.sum()['fare'])] Y10_ap_fares = [x / CENT for x in list(Y10_mm_did_gb.sum()['fare'])] # save_pickle_file(driver_monthly_fare_ap_trips, [Y09_ap_fares, Y10_ap_fares])
def q_learning(): policies_dir = for_learning_dir + '/%s' % ('ALPHA-0.10-GAMMA-0.50') policies = {} for fn in get_all_files(policies_dir, 'ALPHA-', '.pkl'): Qsa_value, _ = load_picle_file(policies_dir + '/%s' % fn) for s1 in DAY_OF_WEEK: for s2 in TIME_SLOTS: for s3 in [IN_AP, OUT_AP]: if not policies.has_key((s1, s2, s3)): policies[(s1, s2, s3)] = [0, 0] i = index_IN_OUT_AP[IN_AP] if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else index_IN_OUT_AP[OUT_AP] policies[(s1, s2, s3)][i] += 1 op_policies = {} for k, v in policies.iteritems(): op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])), '%.2f' % (v[1] / (v[0] + v[1]))) save_pickle_file('q_learning_policy.pkl', op_policies)
def process_files(yymm): Qsa_value, state_action_fare_dur = load_picle_file('%s/ALPHA-0.10-GAMMA-0.50/ALPHA-0.10-GAMMA-0.50-q-value-fare-dur-%s.pkl'%(for_learning_dir,yymm)) argmax_as = {} for s1 in DAY_OF_WEEK: for s2 in TIME_SLOTS: for s3 in [IN_AP, OUT_AP]: argmax_as[(s1, s2, s3)] = IN_AP if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else OUT_AP # whole_rev, sub_rev = 0, 0 whole_count, sub_count = 0,0 count = 0 with open('%s/diff-pin-eco-extreme-drivers-trip-%s.csv' % (for_full_driver_dir, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_prev_tetime, id_prev_teloc = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location') id_stime, id_sloc = headers.index('start-time'), headers.index('start-location'), id_dur, id_fare = headers.index('duration'), headers.index('fare') for row in reader: prev_tetime, stime = eval(row[id_prev_tetime]), eval(row[id_stime]) setup_time = stime - prev_tetime # prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime) s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour s3 = row[id_prev_teloc] # a = row[id_sloc] # dur, fare = eval(row[id_dur]), eval(row[id_fare]) alter_a = OUT_AP if a == IN_AP else IN_AP if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0: op_cost = 0 else: op_cost = (setup_time + dur) * state_action_fare_dur[(s1, s2, s3, alter_a)][0] / state_action_fare_dur[(s1, s2, s3, alter_a)][1] economic_profit = fare - op_cost # whole_rev += economic_profit whole_count += 1 if argmax_as[(s1, s2, s3)] == a: sub_rev += economic_profit sub_count += 1 count += 1 if count % MOD_STAN == 0: print '%s, %d' % (yymm, count) logging_msg('%s, %d' % (yymm, count)) save_pickle_file('%s/comparision-%s.pkl'%(for_full_driver_dir, yymm), [whole_rev, whole_count, sub_rev, sub_count])
def run(): check_dir_create(summary_dir) # Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare = [], [] for y in xrange(9, 11): for m in xrange(1, 13): yymm = "%02d%02d" % (y, m) if yymm in ["0912", "1010"]: continue trip_df = pd.read_csv("%s/whole-trip-%s.csv" % (trips_dir, yymm)) trip_df = trip_df[(trip_df["did"] != -1)] fares = [x / CENT for x in list(trip_df.groupby(["did"]).sum()["fare"])] if y == 9: Y09_driver_total_monthly_fare += fares else: Y10_driver_total_monthly_fare += fares save_pickle_file(monthly_fare_summary, [Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare])
def q_learning(): policies_dir = for_learning_dir + '/%s' % ('ALPHA-0.10-GAMMA-0.50') policies = {} for fn in get_all_files(policies_dir, 'ALPHA-', '.pkl'): Qsa_value, _ = load_picle_file(policies_dir + '/%s' % fn) for s1 in DAY_OF_WEEK: for s2 in TIME_SLOTS: for s3 in [IN_AP, OUT_AP]: if not policies.has_key((s1, s2, s3)): policies[(s1, s2, s3)] = [0, 0] i = index_IN_OUT_AP[IN_AP] if Qsa_value[( s1, s2, s3, IN_AP)] >= Qsa_value[( s1, s2, s3, OUT_AP)] else index_IN_OUT_AP[OUT_AP] policies[(s1, s2, s3)][i] += 1 op_policies = {} for k, v in policies.iteritems(): op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])), '%.2f' % (v[1] / (v[0] + v[1]))) save_pickle_file('q_learning_policy.pkl', op_policies)
def drivers(): policies = {} for fn in get_all_files(for_full_driver_dir, 'diff-pin-eco-extreme-drivers-trip-', '.csv'): _, _, _, _, _, _, yymm = fn[:-len('.csv')].split('-') with open('%s/%s' % (for_full_driver_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_ptet, id_ptel = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location') id_sl = headers.index('start-location') for row in reader: prev_tetime_datetime = datetime.datetime.fromtimestamp(int(row[id_ptet])) s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour s3 = row[id_ptel] if not policies.has_key((s1, s2, s3)): policies[(s1, s2, s3)] = [0, 0] i = index_IN_OUT_AP[row[id_sl]] policies[(s1, s2, s3)][i] += 1 op_policies = {} for k, v in policies.iteritems(): op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])), '%.2f' % (v[1] / (v[0] + v[1]))) save_pickle_file('extreme_drivers_policy.pkl', op_policies)
def run(): check_dir_create(summary_dir) # Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare = [], [] for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue trip_df = pd.read_csv('%s/whole-trip-%s.csv' % (trips_dir, yymm)) trip_df = trip_df[(trip_df['did'] != -1)] fares = [ x / CENT for x in list(trip_df.groupby(['did']).sum()['fare']) ] if y == 9: Y09_driver_total_monthly_fare += fares else: Y10_driver_total_monthly_fare += fares save_pickle_file( monthly_fare_summary, [Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare])
def process_file(fn): _, _, _, yymm = fn[:-len('.csv')].split('-') print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) # driver_vehicle = {} productive_state = ['dur%d' % x for x in [0, 3, 4, 5, 6, 7, 8, 9, 10]] with open('%s/%s' % (shifts_dir, fn), 'rt') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h : i for i, h in enumerate(headers)} with open('%s/shift-pro-dur-%s.csv' % (shift_pro_dur_dir, yymm), 'wt') as w_csvfile: writer = csv.writer(w_csvfile) new_headers = ['yy', 'mm', 'dd', 'hh', 'vid', 'did', 'pro-dur'] writer.writerow(new_headers) for row in reader: vid, did = row[hid['vehicle-id']], row[hid['driver-id']] productive_duration = sum(int(row[hid[dur]]) for dur in productive_state) writer.writerow([row[hid['year']][-2:], row[hid['month']], row[hid['day']], row[hid['hour']], vid, did, productive_duration]) driver_vehicle.setdefault(vid, set()).add(did) save_pickle_file('%s/driver-vehicle-%s.pkl' % (shifts_dir, yymm), driver_vehicle) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_files(yymm, q_lerning_ended_dir): candi_pkl_files = [] for dn in q_lerning_ended_dir: if os.path.exists('%s/%s/results-%s.pkl' % (for_learning_dir, dn, yymm)): continue candi_pkl_files.append('%s/%s/%s-q-value-fare-dur-%s.pkl' % (for_learning_dir, dn, dn, yymm)) result_pkls = [os.path.dirname(pkl_path) + '/results-%s.pkl'% yymm for pkl_path in candi_pkl_files] # list_argmax_as = [] state_action_fare_dur = None for pkl_file_path in candi_pkl_files: Qsa_value, state_action_fare_dur = load_picle_file(pkl_file_path) argmax_as = {} for s1 in DAY_OF_WEEK: for s2 in TIME_SLOTS: for s3 in [IN_AP, OUT_AP]: argmax_as[(s1, s2, s3)] = IN_AP if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else OUT_AP list_argmax_as.append(argmax_as) # whole_rev, whole_count = 0, 0 list_sub_rev, list_sub_count = [0 for _ in xrange(len(candi_pkl_files))], [0 for _ in xrange(len(candi_pkl_files))] count = 0 with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_prev_tetime, id_prev_teloc = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location') id_stime, id_sloc = headers.index('start-time'), headers.index('start-location'), id_dur, id_fare = headers.index('duration'), headers.index('fare') for row in reader: prev_tetime, stime = eval(row[id_prev_tetime]), eval(row[id_stime]) setup_time = stime - prev_tetime # prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime) s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour s3 = row[id_prev_teloc] # a = row[id_sloc] # dur, fare = eval(row[id_dur]), eval(row[id_fare]) alter_a = OUT_AP if a == IN_AP else IN_AP if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0: op_cost = 0 else: op_cost = (setup_time + dur) * state_action_fare_dur[(s1, s2, s3, alter_a)][0] / state_action_fare_dur[(s1, s2, s3, alter_a)][1] economic_profit = fare - op_cost # whole_rev += economic_profit whole_count += 1 for i, argmax_as in enumerate(list_argmax_as): if argmax_as[(s1, s2, s3)] == a: list_sub_rev[i] += economic_profit list_sub_count[i] += 1 count += 1 if count % MOD_STAN == 0: print '%s, %d' % (yymm, count) logging_msg('%s, %d' % (yymm, count)) for i in xrange(len(result_pkls)): result_fn = result_pkls[i] save_pickle_file(result_fn, [whole_rev, whole_count, list_sub_rev[i], list_sub_count[i]]) for i in xrange(len(result_pkls)): result_fn = result_pkls[i] save_pickle_file(result_fn, [whole_rev, whole_count, list_sub_rev[i], list_sub_count[i]])
def run(): check_dir_create(summary_dir) # cur_timestamp = datetime.datetime(2008, 12, 31, 23) last_timestamp = datetime.datetime(2011, 1, 1, 0) hp_summary, time_period_order = {}, [] while cur_timestamp < last_timestamp: cur_timestamp += datetime.timedelta(hours=1) yyyy, mm, dd, hh = cur_timestamp.year, cur_timestamp.month, cur_timestamp.day, cur_timestamp.hour if yyyy == 2009 and mm == 12: continue if yyyy == 2010 and mm == 10: continue k = (str(yyyy - 2000), str(mm), str(dd), str(hh)) hp_summary[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE, \ AP_DUR, AP_FARE, AP_QUEUE, \ NS_DUR, NS_FARE, NS_QUEUE]))] time_period_order.append(k) # yy_l, mm_l, dd_l, hh_l = 'yy', 'mm', 'dd', 'hh' # General for fn in get_all_files(general_dur_fare_dir, general_dur_fare_prefix, '.csv'): print fn with open('%s/%s' % (general_dur_fare_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h: i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[ hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][GEN_DUR] += eval(row[hid['gen-duration']]) hp_summary[k][GEN_FARE] += eval(row[hid['gen-fare']]) # Aiport for fn in get_all_files(ap_dur_fare_q_time_dir, ap_dur_fare_q_time_prefix, '.csv'): print fn with open('%s/%s' % (ap_dur_fare_q_time_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h: i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[ hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][AP_DUR] += eval(row[hid['ap-duration']]) hp_summary[k][AP_FARE] += eval(row[hid['ap-fare']]) hp_summary[k][AP_QUEUE] += eval(row[hid['ap-queue-time']]) # Night Safari for fn in get_all_files(ns_dur_fare_q_time_dir, ns_dur_fare_q_time_prefix, '.csv'): print fn with open('%s/%s' % (ns_dur_fare_q_time_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h: i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[ hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][NS_DUR] += eval(row[hid['ns-duration']]) hp_summary[k][NS_FARE] += eval(row[hid['ns-fare']]) hp_summary[k][NS_QUEUE] += eval(row[hid['ns-queue-time']]) # Summary print 'summary' zero_dur = [] with open(hourly_productivities, 'wt') as w_csvfile: writer = csv.writer(w_csvfile) header = [ 'yy', 'mm', 'dd', 'hh', 'gen-duration', 'gen-fare', 'ap-duration', 'ap-fare', 'ap-queue-time', 'ns-duration', 'ns-fare', 'ns-queue-time', 'gen-productivity', 'ap-productivity', 'ap-out-productivity', 'ns-productivity', 'ns-out-productivity' ] writer.writerow(header) for k in time_period_order: gen_dur, gen_fare, \ ap_dur, ap_fare, ap_queue, \ ns_dur, ns_fare, ns_queue = hp_summary[k] yy, mm, dd, hh = k # try: gen_prod = gen_fare / gen_dur except ZeroDivisionError: gen_prod = -1 zero_dur.append([GENERAL, k]) try: ap_prod = ap_fare / (ap_dur + ap_queue) except ZeroDivisionError: ap_prod = -1 zero_dur.append([AIRPORT, k]) ap_out_prod = (gen_fare - ap_fare) / (gen_dur - (ap_dur + ap_queue)) try: ns_prod = ns_fare / (ns_dur + ns_queue) except ZeroDivisionError: ns_prod = -1 zero_dur.append([NIGHTSAFARI, k]) ns_out_prod = (gen_fare - ns_fare) / (gen_dur - (ns_dur + ns_queue)) # writer.writerow([ yy, mm, dd, hh, gen_dur, gen_fare, ap_dur, ap_fare, ap_queue, ns_dur, ns_fare, ns_queue, gen_prod, ap_prod, ap_out_prod, ns_prod, ns_out_prod ]) # save_pickle_file(zero_duration_time_slots, zero_dur)
def process_files(yymm, q_lerning_ended_dir): candi_pkl_files = [] for dn in q_lerning_ended_dir: if os.path.exists('%s/%s/results-%s.pkl' % (for_learning_dir, dn, yymm)): continue candi_pkl_files.append('%s/%s/%s-q-value-fare-dur-%s.pkl' % (for_learning_dir, dn, dn, yymm)) result_pkls = [ os.path.dirname(pkl_path) + '/results-%s.pkl' % yymm for pkl_path in candi_pkl_files ] # list_argmax_as = [] state_action_fare_dur = None for pkl_file_path in candi_pkl_files: Qsa_value, state_action_fare_dur = load_picle_file(pkl_file_path) argmax_as = {} for s1 in DAY_OF_WEEK: for s2 in TIME_SLOTS: for s3 in [IN_AP, OUT_AP]: argmax_as[(s1, s2, s3)] = IN_AP if Qsa_value[( s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else OUT_AP list_argmax_as.append(argmax_as) # whole_rev, whole_count = 0, 0 list_sub_rev, list_sub_count = [0 for _ in xrange(len(candi_pkl_files))], [ 0 for _ in xrange(len(candi_pkl_files)) ] count = 0 with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_prev_tetime, id_prev_teloc = headers.index( 'prev-trip-end-time'), headers.index('prev-trip-end-location') id_stime, id_sloc = headers.index('start-time'), headers.index( 'start-location'), id_dur, id_fare = headers.index('duration'), headers.index('fare') for row in reader: prev_tetime, stime = eval(row[id_prev_tetime]), eval(row[id_stime]) setup_time = stime - prev_tetime # prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime) s1, s2 = prev_tetime_datetime.strftime( "%a"), prev_tetime_datetime.hour s3 = row[id_prev_teloc] # a = row[id_sloc] # dur, fare = eval(row[id_dur]), eval(row[id_fare]) alter_a = OUT_AP if a == IN_AP else IN_AP if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0: op_cost = 0 else: op_cost = (setup_time + dur) * state_action_fare_dur[ (s1, s2, s3, alter_a)][0] / state_action_fare_dur[ (s1, s2, s3, alter_a)][1] economic_profit = fare - op_cost # whole_rev += economic_profit whole_count += 1 for i, argmax_as in enumerate(list_argmax_as): if argmax_as[(s1, s2, s3)] == a: list_sub_rev[i] += economic_profit list_sub_count[i] += 1 count += 1 if count % MOD_STAN == 0: print '%s, %d' % (yymm, count) logging_msg('%s, %d' % (yymm, count)) for i in xrange(len(result_pkls)): result_fn = result_pkls[i] save_pickle_file(result_fn, [ whole_rev, whole_count, list_sub_rev[i], list_sub_count[i] ]) for i in xrange(len(result_pkls)): result_fn = result_pkls[i] save_pickle_file( result_fn, [whole_rev, whole_count, list_sub_rev[i], list_sub_count[i]])
def run(): check_dir_create(summary_dir) # cur_timestamp = datetime.datetime(2008, 12, 31, 23) last_timestamp = datetime.datetime(2011, 1, 1, 0) hp_summary, time_period_order = {}, [] while cur_timestamp < last_timestamp: cur_timestamp += datetime.timedelta(hours=1) yyyy, mm, dd, hh = cur_timestamp.year, cur_timestamp.month, cur_timestamp.day, cur_timestamp.hour if yyyy == 2009 and mm == 12: continue if yyyy == 2010 and mm == 10: continue k = (str(yyyy - 2000), str(mm), str(dd), str(hh)) hp_summary[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE, \ AP_DUR, AP_FARE, AP_QUEUE, \ NS_DUR, NS_FARE, NS_QUEUE]))] time_period_order.append(k) # yy_l, mm_l, dd_l, hh_l = 'yy', 'mm', 'dd', 'hh' # General for fn in get_all_files(general_dur_fare_dir, general_dur_fare_prefix, '.csv'): print fn with open('%s/%s' % (general_dur_fare_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h : i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][GEN_DUR] += eval(row[hid['gen-duration']]) hp_summary[k][GEN_FARE] += eval(row[hid['gen-fare']]) # Aiport for fn in get_all_files(ap_dur_fare_q_time_dir, ap_dur_fare_q_time_prefix, '.csv'): print fn with open('%s/%s' % (ap_dur_fare_q_time_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h : i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][AP_DUR] += eval(row[hid['ap-duration']]) hp_summary[k][AP_FARE] += eval(row[hid['ap-fare']]) hp_summary[k][AP_QUEUE] += eval(row[hid['ap-queue-time']]) # Night Safari for fn in get_all_files(ns_dur_fare_q_time_dir, ns_dur_fare_q_time_prefix, '.csv'): print fn with open('%s/%s' % (ns_dur_fare_q_time_dir, fn), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() hid = {h : i for i, h in enumerate(headers)} for row in reader: yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]] k = (yy, mm, dd, hh) if not hp_summary.has_key(k): continue hp_summary[k][NS_DUR] += eval(row[hid['ns-duration']]) hp_summary[k][NS_FARE] += eval(row[hid['ns-fare']]) hp_summary[k][NS_QUEUE] += eval(row[hid['ns-queue-time']]) # Summary print 'summary' zero_dur = [] with open(hourly_productivities, 'wt') as w_csvfile: writer = csv.writer(w_csvfile) header = ['yy', 'mm', 'dd', 'hh', 'gen-duration', 'gen-fare', 'ap-duration', 'ap-fare', 'ap-queue-time', 'ns-duration', 'ns-fare', 'ns-queue-time', 'gen-productivity', 'ap-productivity', 'ap-out-productivity', 'ns-productivity', 'ns-out-productivity'] writer.writerow(header) for k in time_period_order: gen_dur, gen_fare, \ ap_dur, ap_fare, ap_queue, \ ns_dur, ns_fare, ns_queue = hp_summary[k] yy, mm, dd, hh = k # try: gen_prod = gen_fare / gen_dur except ZeroDivisionError: gen_prod = -1 zero_dur.append([GENERAL, k]) try: ap_prod = ap_fare / (ap_dur + ap_queue) except ZeroDivisionError: ap_prod = -1 zero_dur.append([AIRPORT, k]) ap_out_prod = (gen_fare - ap_fare) / (gen_dur - (ap_dur + ap_queue)) try: ns_prod = ns_fare / (ns_dur + ns_queue) except ZeroDivisionError: ns_prod = -1 zero_dur.append([NIGHTSAFARI, k]) ns_out_prod = (gen_fare - ns_fare) / (gen_dur - (ns_dur + ns_queue)) # writer.writerow([yy, mm, dd, hh, gen_dur, gen_fare, ap_dur, ap_fare, ap_queue, ns_dur, ns_fare, ns_queue, gen_prod, ap_prod, ap_out_prod, ns_prod, ns_out_prod]) # save_pickle_file(zero_duration_time_slots, zero_dur)
def process_file(ALPHA, GAMMA, ALPHA_GAMMA_dir, yymm): print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) # print yymm if yymm == '0901': prev_yymm = None elif yymm == '1001': prev_yymm = '0911' elif yymm == '1011': prev_yymm = '1009' else: yy, mm = int(yymm[:2]), int(yymm[2:]) prev_yymm = '%02d%02d' % (yy, mm - 1) # if not prev_yymm: Qsa_value, state_action_fare_dur = {}, {} locations = [IN_AP, OUT_AP] actions = [IN_AP, OUT_AP] for s1 in DAY_OF_WEEK: for s2 in TIME_SLOTS: for s3 in locations: for a in actions: Qsa_value[(s1, s2, s3, a)] = 0 state_action_fare_dur[(s1, s2, s3, a)] = [0, 0] else: Qsa_value, state_action_fare_dur = load_picle_file('%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, prev_yymm)) # with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_prev_tetime, id_prev_teloc = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location') id_stime, id_sloc = headers.index('start-time'), headers.index('start-location'), id_etime, id_eloc = headers.index('end-time'), headers.index('end-location'), id_dur, id_fare = headers.index('duration'), headers.index('fare') # count = 0 for row in reader: prev_tetime, stime, etime = eval(row[id_prev_tetime]), eval(row[id_stime]), eval(row[id_etime]) setup_time = stime - prev_tetime # if setup_time < 0 or HOUR * 2 < setup_time: continue # prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime) s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour s3 = row[id_prev_teloc] # etime_datetime = datetime.datetime.fromtimestamp(etime) new_s1, new_s2 = etime_datetime.strftime("%a"), etime_datetime.hour new_s3 = row[id_eloc] # a = row[id_sloc] dur, fare = eval(row[id_dur]), eval(row[id_fare]) # state_action_fare_dur[(s1, s2, s3, a)][0] += fare state_action_fare_dur[(s1, s2, s3, a)][1] += setup_time + dur # if Qsa_value[(new_s1, new_s2, new_s3, IN_AP)] > Qsa_value[(new_s1, new_s2, new_s3, OUT_AP)] : future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, IN_AP)] else: future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, OUT_AP)] # alter_a = OUT_AP if a == IN_AP else IN_AP if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0: op_cost = 0 else: op_cost = (setup_time + dur) * state_action_fare_dur[(s1, s2, s3, alter_a)][0] / state_action_fare_dur[(s1, s2, s3, alter_a)][1] qrs = fare - op_cost + GAMMA * future_max_q_value Qsa_value[(s1, s2, s3, a)] = \ (1 - ALPHA) * Qsa_value[(s1, s2, s3, a)] + ALPHA * qrs count += 1 if count % MOD_STAN == 0: print '%s, %d' % (yymm, count) logging_msg('%s, %d' % (yymm, count)) save_pickle_file('%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm), [Qsa_value, state_action_fare_dur]) save_pickle_file('%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm), [Qsa_value, state_action_fare_dur]) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def process_file(ALPHA, GAMMA, ALPHA_GAMMA_dir, yymm): print 'handle the file; %s' % yymm logging_msg('handle the file; %s' % yymm) # print yymm if yymm == '0901': prev_yymm = None elif yymm == '1001': prev_yymm = '0911' elif yymm == '1011': prev_yymm = '1009' else: yy, mm = int(yymm[:2]), int(yymm[2:]) prev_yymm = '%02d%02d' % (yy, mm - 1) # if not prev_yymm: Qsa_value, state_action_fare_dur = {}, {} locations = [IN_AP, OUT_AP] actions = [IN_AP, OUT_AP] for s1 in DAY_OF_WEEK: for s2 in TIME_SLOTS: for s3 in locations: for a in actions: Qsa_value[(s1, s2, s3, a)] = 0 state_action_fare_dur[(s1, s2, s3, a)] = [0, 0] else: Qsa_value, state_action_fare_dur = load_picle_file( '%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, prev_yymm)) # with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'rb') as r_csvfile: reader = csv.reader(r_csvfile) headers = reader.next() id_prev_tetime, id_prev_teloc = headers.index( 'prev-trip-end-time'), headers.index('prev-trip-end-location') id_stime, id_sloc = headers.index('start-time'), headers.index( 'start-location'), id_etime, id_eloc = headers.index('end-time'), headers.index( 'end-location'), id_dur, id_fare = headers.index('duration'), headers.index('fare') # count = 0 for row in reader: prev_tetime, stime, etime = eval(row[id_prev_tetime]), eval( row[id_stime]), eval(row[id_etime]) setup_time = stime - prev_tetime # if setup_time < 0 or HOUR * 2 < setup_time: continue # prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime) s1, s2 = prev_tetime_datetime.strftime( "%a"), prev_tetime_datetime.hour s3 = row[id_prev_teloc] # etime_datetime = datetime.datetime.fromtimestamp(etime) new_s1, new_s2 = etime_datetime.strftime("%a"), etime_datetime.hour new_s3 = row[id_eloc] # a = row[id_sloc] dur, fare = eval(row[id_dur]), eval(row[id_fare]) # state_action_fare_dur[(s1, s2, s3, a)][0] += fare state_action_fare_dur[(s1, s2, s3, a)][1] += setup_time + dur # if Qsa_value[(new_s1, new_s2, new_s3, IN_AP)] > Qsa_value[( new_s1, new_s2, new_s3, OUT_AP)]: future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, IN_AP)] else: future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, OUT_AP)] # alter_a = OUT_AP if a == IN_AP else IN_AP if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0: op_cost = 0 else: op_cost = (setup_time + dur) * state_action_fare_dur[ (s1, s2, s3, alter_a)][0] / state_action_fare_dur[ (s1, s2, s3, alter_a)][1] qrs = fare - op_cost + GAMMA * future_max_q_value Qsa_value[(s1, s2, s3, a)] = \ (1 - ALPHA) * Qsa_value[(s1, s2, s3, a)] + ALPHA * qrs count += 1 if count % MOD_STAN == 0: print '%s, %d' % (yymm, count) logging_msg('%s, %d' % (yymm, count)) save_pickle_file( '%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm), [Qsa_value, state_action_fare_dur]) save_pickle_file( '%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm), [Qsa_value, state_action_fare_dur]) print 'end the file; %s' % yymm logging_msg('end the file; %s' % yymm)
def run(): Y09_general = pd.read_csv('%s/Y09-individual-general.csv' % (individual_detail_dir)) Y10_general = pd.read_csv('%s/Y10-individual-general.csv' % (individual_detail_dir)) Y09_prev_in = pd.read_csv('%s/Y09-individual-prev-in-ap.csv' % (individual_detail_dir)) Y10_prev_in = pd.read_csv('%s/Y10-individual-prev-in-ap.csv' % (individual_detail_dir)) Y09_prev_out = pd.read_csv('%s/Y09-individual-prev-out-ap.csv' % (individual_detail_dir)) Y10_prev_out = pd.read_csv('%s/Y10-individual-prev-out-ap.csv' % (individual_detail_dir)) # Y09_prev_in = Y09_prev_in[(Y09_prev_in['ap-prod'] < PROD_LIMIT)] Y09_prev_in = Y09_prev_in[(0 < Y09_prev_in['op-cost'])] Y10_prev_in = Y10_prev_in[(Y10_prev_in['ap-prod'] < PROD_LIMIT)] Y10_prev_in = Y10_prev_in[(0 < Y10_prev_in['op-cost'])] # Y09_prev_out = Y09_prev_out[(Y09_prev_out['ap-prod'] < PROD_LIMIT)] Y09_prev_out = Y09_prev_out[(0 < Y09_prev_out['op-cost'])] Y10_prev_out = Y10_prev_out[(Y10_prev_out['ap-prod'] < PROD_LIMIT)] Y10_prev_out = Y10_prev_out[(0 < Y10_prev_out['op-cost'])] # both years Y09_general_did, Y10_general_did = set(Y09_general['did']), set(Y10_general['did']) general_both_years_full_drivers = Y09_general_did.intersection(Y10_general_did) Y09_prev_in_did, Y10_prev_in_did = set(Y09_prev_in['did']), set(Y10_prev_in['did']) prev_in_both_years_full_drivers = Y09_prev_in_did.intersection(Y10_prev_in_did) Y09_prev_out_did, Y10_prev_out_did = set(Y09_prev_out['did']), set(Y10_prev_out['did']) prev_out_both_years_full_drivers = Y09_prev_out_did.intersection(Y10_prev_out_did) subset_drivers = general_both_years_full_drivers.intersection(prev_in_both_years_full_drivers) subset_drivers = list(subset_drivers.intersection(prev_out_both_years_full_drivers)) # Y09_general = Y09_general[Y09_general['did'].isin(subset_drivers)] Y10_general = Y10_general[Y10_general['did'].isin(subset_drivers)] Y09_prev_in = Y09_prev_in[Y09_prev_in['did'].isin(subset_drivers)] Y10_prev_in = Y10_prev_in[Y10_prev_in['did'].isin(subset_drivers)] Y09_prev_out = Y09_prev_out[Y09_prev_out['did'].isin(subset_drivers)] Y10_prev_out = Y10_prev_out[Y10_prev_out['did'].isin(subset_drivers)] # Y09_general_gb, Y10_general_gb = Y09_general.groupby(['did']), Y10_general.groupby(['did']) Y09_driver_general_prod = Y09_general_gb.mean()['total-prod'].to_frame('avg_total_prod').reset_index() Y10_driver_general_prod = Y10_general_gb.mean()['total-prod'].to_frame('avg_total_prod').reset_index() Y09_driver_genprod_hour = {did : total_prod * HOUR / CENT for did, total_prod in Y09_driver_general_prod.values} Y10_driver_genprod_hour = {did : total_prod * HOUR / CENT for did, total_prod in Y10_driver_general_prod.values} # Y09_prev_in_gb, Y10_prev_in_gb = Y09_prev_in.groupby(['did']), Y10_prev_in.groupby(['did']) Y09_prev_in_driver_ap_prod = Y09_prev_in_gb.mean()['ap-prod'].to_frame('avg_ap_prod').reset_index() Y10_prev_in_driver_ap_prod = Y10_prev_in_gb.mean()['ap-prod'].to_frame('avg_ap_prod').reset_index() Y09_prev_in_driver_eco_pro = Y09_prev_in_gb.mean()['ap-eco-profit'].to_frame('avg_eco_pro').reset_index() Y10_prev_in_driver_eco_pro = Y10_prev_in_gb.mean()['ap-eco-profit'].to_frame('avg_eco_pro').reset_index() # Y09_pin_driver_aprod_hour = {did : ap_prod * HOUR / CENT for did, ap_prod in Y09_prev_in_driver_ap_prod.values} Y10_pin_driver_aprod_hour = {did : ap_prod * HOUR / CENT for did, ap_prod in Y10_prev_in_driver_ap_prod.values} Y09_pin_driver_epro_month = {did : eco_pro / CENT for did, eco_pro in Y09_prev_in_driver_eco_pro.values} Y10_pin_driver_epro_month = {did : eco_pro / CENT for did, eco_pro in Y10_prev_in_driver_eco_pro.values} # Y09_prev_out_gb, Y10_prev_out_gb = Y09_prev_out.groupby(['did']), Y10_prev_out.groupby(['did']) Y09_prev_out_driver_ap_prod = Y09_prev_out_gb.mean()['ap-prod'].to_frame('avg_ap_prod').reset_index() Y10_prev_out_driver_ap_prod = Y10_prev_out_gb.mean()['ap-prod'].to_frame('avg_ap_prod').reset_index() Y09_prev_out_driver_eco_pro = Y09_prev_out_gb.mean()['ap-eco-profit'].to_frame('avg_eco_pro').reset_index() Y10_prev_out_driver_eco_pro = Y10_prev_out_gb.mean()['ap-eco-profit'].to_frame('avg_eco_pro').reset_index() # Y09_pout_driver_aprod_hour = {did : ap_prod * HOUR / CENT for did, ap_prod in Y09_prev_out_driver_ap_prod.values} Y10_pout_driver_aprod_hour = {did : ap_prod * HOUR / CENT for did, ap_prod in Y10_prev_out_driver_ap_prod.values} Y09_pout_driver_epro_month = {did : eco_pro / CENT for did, eco_pro in Y09_prev_out_driver_eco_pro.values} Y10_pout_driver_epro_month = {did : eco_pro / CENT for did, eco_pro in Y10_prev_out_driver_eco_pro.values} save_pickle_file('%s/productivities_ext.pkl' % (individual_detail_dir), [subset_drivers, Y09_driver_genprod_hour, Y10_driver_genprod_hour, Y09_pin_driver_aprod_hour, Y10_pin_driver_aprod_hour, Y09_pout_driver_aprod_hour, Y10_pout_driver_aprod_hour, Y09_pin_driver_epro_month, Y10_pin_driver_epro_month, Y09_pout_driver_epro_month, Y10_pout_driver_epro_month ])