def test():
    extreme_drivers_policy = load_picle_file('extreme_drivers_policy.pkl')
    q_learning_policy = load_picle_file('q_learning_policy.pkl')
    for s1 in DAY_OF_WEEK:
        for s2 in TIME_SLOTS:
            k = (s1, s2, IN_AP)
            if extreme_drivers_policy.has_key(k) and q_learning_policy.has_key(k):
                print k, extreme_drivers_policy[k], q_learning_policy[k]
    print ''
    for s1 in DAY_OF_WEEK:
        for s2 in TIME_SLOTS:
            k = (s1, s2, OUT_AP)
            if extreme_drivers_policy.has_key(k) and q_learning_policy.has_key(k):
                print k, extreme_drivers_policy[k], q_learning_policy[k] 
Beispiel #2
0
def process_files(yymm):
    #     Qsa_value, _ = load_picle_file('%s/q-value-fare-dur-%s.pkl' % (for_learning_dir, yymm))
    Qsa_value, _ = load_picle_file(
        '/Users/JerryHan88/taxi/for_learning/ALPHA-0.70-GAMMA-0.30/ALPHA-0.70-GAMMA-0.30-q-value-fare-dur-0904.pkl'
    )
    decision_inAP, decision_outAP = [], []
    for s1 in DAY_OF_WEEK:
        for s2 in TIME_SLOTS:
            for s3 in [IN_AP, OUT_AP]:
                #                 print (s1, s2, s3), Qsa_value[(s1, s2, s3, IN_AP)], Qsa_value[(s1, s2, s3, OUT_AP)]
                max_a = IN_AP if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(
                    s1, s2, s3, OUT_AP)] else OUT_AP
                c = 0 if max_a == IN_AP else 1
                if s3 == IN_AP:
                    decision_inAP.append([s1, s2, c])
                else:
                    decision_outAP.append([s1, s2, c])
    for s1, s2, a in decision_inAP:
        print 'inAP', s1, s2, a
    for s1, s2, a in decision_outAP:
        print 'outAP', s1, s2, a
    _data = [[(s2, id_dow[s1], a) for s1, s2, a in decision_inAP],
             [(s2, id_dow[s1], a) for s1, s2, a in decision_outAP]]
    #     titles = ['Decision in the airport', 'Decision outside of the airport']
    #     grid_charts(('24 Time slots', []), ('', DAY_OF_WEEK), ['In Airport', 'Out Airport'], titles, _data, '%s/q-value-fare-dur-%s.pdf' % (for_learning_dir, yymm))
    one_grid_chart(('Time slot', []), ('', DAY_OF_WEEK),
                   ['In Airport', 'Out Airport'], '',
                   [(s2, id_dow[s1], a)
                    for s1, s2, a in decision_inAP], 'decision_inAP_q_e')
def test():
    extreme_drivers_policy = load_picle_file('extreme_drivers_policy.pkl')
    q_learning_policy = load_picle_file('q_learning_policy.pkl')
    for s1 in DAY_OF_WEEK:
        for s2 in TIME_SLOTS:
            k = (s1, s2, IN_AP)
            if extreme_drivers_policy.has_key(k) and q_learning_policy.has_key(
                    k):
                print k, extreme_drivers_policy[k], q_learning_policy[k]
    print ''
    for s1 in DAY_OF_WEEK:
        for s2 in TIME_SLOTS:
            k = (s1, s2, OUT_AP)
            if extreme_drivers_policy.has_key(k) and q_learning_policy.has_key(
                    k):
                print k, extreme_drivers_policy[k], q_learning_policy[k]
def process_files(yymm):
#     Qsa_value, _ = load_picle_file('%s/q-value-fare-dur-%s.pkl' % (for_learning_dir, yymm))
    Qsa_value, _ = load_picle_file('/Users/JerryHan88/taxi/for_learning/ALPHA-0.70-GAMMA-0.30/ALPHA-0.70-GAMMA-0.30-q-value-fare-dur-0904.pkl')
    decision_inAP, decision_outAP = [], []
    for s1 in DAY_OF_WEEK:
        for s2 in TIME_SLOTS:
            for s3 in [IN_AP, OUT_AP]:
#                 print (s1, s2, s3), Qsa_value[(s1, s2, s3, IN_AP)], Qsa_value[(s1, s2, s3, OUT_AP)]
                max_a = IN_AP if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else OUT_AP
                c = 0 if max_a == IN_AP else 1
                if s3 == IN_AP:
                    decision_inAP.append([s1, s2, c])
                else:
                    decision_outAP.append([s1, s2, c])
    for s1, s2, a in decision_inAP:
        print 'inAP', s1, s2, a
    for s1, s2, a in decision_outAP:
        print 'outAP', s1, s2, a  
    _data = [
             [(s2, id_dow[s1], a) for s1, s2, a in decision_inAP],
             [(s2, id_dow[s1], a) for s1, s2, a in decision_outAP]
             ]
#     titles = ['Decision in the airport', 'Decision outside of the airport']
#     grid_charts(('24 Time slots', []), ('', DAY_OF_WEEK), ['In Airport', 'Out Airport'], titles, _data, '%s/q-value-fare-dur-%s.pdf' % (for_learning_dir, yymm))
    one_grid_chart(('Time slot', []), ('', DAY_OF_WEEK), ['In Airport', 'Out Airport'], '', [(s2, id_dow[s1], a) for s1, s2, a in decision_inAP], 'decision_inAP_q_e')
Beispiel #5
0
def process_file(fn):
    _, _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #
    is_driver_vehicle = load_picle_file('%s/driver-vehicle-%s.pkl' % (shifts_dir, yymm))
    full_drivers = set()
    with open('%s/%s' % (shifts_dir, fn), 'rt') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_yy, id_mm, id_dd, id_hh = headers.index('yy'), headers.index('mm'), headers.index('dd'), headers.index('hh')
        id_vid, id_did = headers.index('vid'), headers.index('did')
        id_pro_dur, id_x_pro_dur = headers.index('pro-dur'), headers.index('x-pro-dur')
        with open('%s/shift-full-time-%s.csv' % (full_shift_dir, yymm), 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            new_headers = ['year', 'month', 'day', 'hour', 'vehicle-id', 'driver-id', 'productive-duration', 'x-productive-duration']
            writer.writerow(new_headers)
            for row in reader:
                if len(is_driver_vehicle[row[id_vid]]) > 1:
                    continue
                writer.writerow([row[id_yy], row[id_mm], row[id_dd], row[id_hh], row[id_vid], row[id_did], row[id_pro_dur], row[id_x_pro_dur]])
                full_drivers.add(row[id_did])
    save_pickle_file('%s/full-time-drivers-%s.pkl' % (full_shift_dir, yymm), full_drivers)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
Beispiel #6
0
def run():
    op_policies_q_learning = load_picle_file(
        '/Users/JerryHan88/git/workspace_SMU/taxi_py/learning/q_learning_policy.pkl'
    )
    op_policies_ext_drivers = load_picle_file(
        '/Users/JerryHan88/git/workspace_SMU/taxi_py/learning/extreme_drivers_policy.pkl'
    )

    decision_inAP_Q, decision_outAP_Q = [], []
    decision_inAP_D, decision_outAP_D = [], []
    for s1 in DAY_OF_WEEK:
        for s2 in TIME_SLOTS:
            for s3 in [IN_AP, OUT_AP]:
                p_IN_AP, p_OUT_AP = op_policies_q_learning[(s1, s2, s3)]
                max_a_Q = IN_AP if eval(p_IN_AP) >= eval(p_OUT_AP) else OUT_AP
                c = 0 if max_a_Q == IN_AP else 1
                if s3 == IN_AP:
                    decision_inAP_Q.append([s1, s2, c])
                else:
                    decision_outAP_Q.append([s1, s2, c])
                #
                try:
                    p_IN_AP, p_OUT_AP = op_policies_ext_drivers[(s1, s2, s3)]
                    max_a_D = IN_AP if eval(p_IN_AP) >= eval(
                        p_OUT_AP) else OUT_AP
                    c = 0 if max_a_D == IN_AP else 1
                except KeyError:
                    c = 2
                    print s1, s2, s3
                if s3 == IN_AP:
                    decision_inAP_D.append([s1, s2, c])
                else:
                    decision_outAP_D.append([s1, s2, c])
    inAP_Q = [(s2, id_dow[s1], a) for s1, s2, a in decision_inAP_Q]
    outAP_Q = [(s2, id_dow[s1], a) for s1, s2, a in decision_outAP_Q]
    inAP_D = [(s2, id_dow[s1], a) for s1, s2, a in decision_inAP_D]
    outAP_D = [(s2, id_dow[s1], a) for s1, s2, a in decision_outAP_D]

    d = [inAP_Q, outAP_Q, inAP_D, outAP_D]

    n = [
        'decision_inAP_Q', 'decision_outAP_Q', 'decision_inAP_D',
        'decision_outAP_D'
    ]
    for i in xrange(4):
        one_grid_chart(('Time slot', []), ('', DAY_OF_WEEK),
                       ['Go to AP', 'Go to OA'], '', d[i], n[i])
def run():
    dir_path = '/Users/JerryHan88/taxi/full_drivers_trips_q_comparision'
    pickle_files = get_all_files(dir_path, 
                                 'comparision-', '.pkl')
    for pkl_file in pickle_files:
        yymm = pkl_file[:-len('.pkl')].split('-')[-1]
        yy, mm = int(yymm[:2]), int(yymm[2:])
        whole_rev, whole_count, sub_rev, sub_count = load_picle_file('%s/comparision-%s.pkl' % (dir_path, yymm))
        with open(fn, 'a') as w_csvfile:
            writer = csv.writer(w_csvfile)
            writer.writerow([yy, mm, whole_rev / whole_count, sub_rev / sub_count])
Beispiel #8
0
def run():
    dir_path = '/Users/JerryHan88/taxi/full_drivers_trips_q_comparision'
    pickle_files = get_all_files(dir_path, 'comparision-', '.pkl')
    for pkl_file in pickle_files:
        yymm = pkl_file[:-len('.pkl')].split('-')[-1]
        yy, mm = int(yymm[:2]), int(yymm[2:])
        whole_rev, whole_count, sub_rev, sub_count = load_picle_file(
            '%s/comparision-%s.pkl' % (dir_path, yymm))
        with open(fn, 'a') as w_csvfile:
            writer = csv.writer(w_csvfile)
            writer.writerow(
                [yy, mm, whole_rev / whole_count, sub_rev / sub_count])
def process_files(ALPHA, GAMMA):
    ALPHA_GAMMA_dir = for_learning_dir + '/ALPHA-%.2f-GAMMA-%.2f' % (ALPHA, GAMMA)
    if not os.path.exists(ALPHA_GAMMA_dir):
        return None
    pickle_files = get_all_files(ALPHA_GAMMA_dir, 'ALPHA-%.2f-GAMMA-%.2f' % (ALPHA, GAMMA), '.pkl')
    for pkl_file in pickle_files:
        yymm = pkl_file[:-len('.pkl')].split('-')[-1]
        yy, mm = int(yymm[:2]), int(yymm[2:])
        whole_rev, whole_count, sub_rev, sub_count = load_picle_file('%s/results-%s.pkl' % (ALPHA_GAMMA_dir, yymm))
        with open(fn, 'a') as w_csvfile:
            writer = csv.writer(w_csvfile)
            writer.writerow([ALPHA, GAMMA, yy, mm, whole_rev / whole_count, sub_rev / sub_count])
def run():
    op_policies_q_learning = load_picle_file('/Users/JerryHan88/git/workspace_SMU/taxi_py/learning/q_learning_policy.pkl') 
    op_policies_ext_drivers = load_picle_file('/Users/JerryHan88/git/workspace_SMU/taxi_py/learning/extreme_drivers_policy.pkl')
    
    decision_inAP_Q, decision_outAP_Q = [], []
    decision_inAP_D, decision_outAP_D = [], []
    for s1 in DAY_OF_WEEK:
        for s2 in TIME_SLOTS:
            for s3 in [IN_AP, OUT_AP]:
                p_IN_AP, p_OUT_AP = op_policies_q_learning[(s1, s2, s3)]
                max_a_Q = IN_AP if eval(p_IN_AP) >= eval(p_OUT_AP) else OUT_AP
                c = 0 if max_a_Q == IN_AP else 1
                if s3 == IN_AP:
                    decision_inAP_Q.append([s1, s2, c])
                else:
                    decision_outAP_Q.append([s1, s2, c])
                #
                try:
                    p_IN_AP, p_OUT_AP = op_policies_ext_drivers[(s1, s2, s3)]
                    max_a_D = IN_AP if eval(p_IN_AP) >= eval(p_OUT_AP) else OUT_AP
                    c = 0 if max_a_D == IN_AP else 1
                except KeyError:
                    c = 2
                    print s1, s2, s3
                if s3 == IN_AP:
                    decision_inAP_D.append([s1, s2, c])
                else:
                    decision_outAP_D.append([s1, s2, c])
    inAP_Q = [(s2, id_dow[s1], a) for s1, s2, a in decision_inAP_Q]
    outAP_Q = [(s2, id_dow[s1], a) for s1, s2, a in decision_outAP_Q]
    inAP_D = [(s2, id_dow[s1], a) for s1, s2, a in decision_inAP_D]
    outAP_D = [(s2, id_dow[s1], a) for s1, s2, a in decision_outAP_D]
    
    d = [inAP_Q, outAP_Q, inAP_D, 
         outAP_D]
    
    n = ['decision_inAP_Q', 'decision_outAP_Q', 'decision_inAP_D', 
         'decision_outAP_D']
    for i in xrange(4):
        one_grid_chart(('Time slot', []), ('', DAY_OF_WEEK), ['Go to AP', 'Go to OA'], '', d[i], n[i])
Beispiel #11
0
def test():
    def difference(data0, data1):
        diff = {}
        for k, v in data0.iteritems():
            diff[k] = data1[k] - v
        return diff
    def ordering(dids_values):
        order_v_did = []
        for did, v in dids_values.iteritems():
            order_v_did.append([v, did])
        order_v_did.sort()
        order_v_did.reverse()
        return order_v_did
    def find_extreme_range(order_v_did):
        # more than mean's 50 percent
        values = [v for v, _ in order_v_did]
        mu, std = np.mean(values), np.std(values)
        i = 0
        while order_v_did[i][0] > mu + std * 2.0:
            i += 1
        return (0, i / len(order_v_did))
    both_years_full_drivers, \
    Y09_driver_genprod_hour, Y10_driver_genprod_hour, \
    Y09_pin_driver_aprod_hour, Y10_pin_driver_aprod_hour, \
    Y09_pout_driver_aprod_hour, Y10_pout_driver_aprod_hour, \
    Y09_pin_driver_epro_month, Y10_pin_driver_epro_month, \
    Y09_pout_driver_epro_month, Y10_pout_driver_epro_month = load_picle_file('%s/productivities_ext.pkl' % (individual_detail_dir))
    #
    diff_general_prod = difference(Y09_driver_genprod_hour, Y10_driver_genprod_hour)
    diff_pin_prod = difference(Y09_pin_driver_aprod_hour, Y10_pin_driver_aprod_hour)
    diff_pout_prod = difference(Y09_pout_driver_aprod_hour, Y10_pout_driver_aprod_hour)
    diff_pin_eco = difference(Y09_pin_driver_epro_month, Y10_pin_driver_epro_month)
    diff_pout_eco = difference(Y09_pout_driver_epro_month, Y10_pout_driver_epro_month)
    
    order_v_did = ordering(diff_pin_eco)
    print len(diff_pin_eco)
    r1, r2 = find_extreme_range(order_v_did)
    extreme_drivers = [int(did) for _, did in order_v_did[int(r1 * len(order_v_did)):int(r2 * len(order_v_did))]]
    for fn in get_all_files(for_full_driver_dir, 'full-drivers-trips-', '.csv'):
        _, _, _, yymm = fn[:-len('.csv')].split('-')
        with open('%s/%s' % (for_full_driver_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            id_did = headers.index('did')
            with open('%s/diff-pin-eco-extreme-drivers-trip-%s.csv' % (for_full_driver_dir, yymm), 'wt') as w_csvfile:
                writer = csv.writer(w_csvfile)
                writer.writerow(headers)
                for row in reader:
                    did = int(row[id_did])
                    if did not in extreme_drivers:
                        continue
                    writer.writerow(row)
def process_files(yymm):
    print "handle the file; %s" % yymm
    logging_msg("handle the file; %s" % yymm)
    #
    init_csv_files(yymm)
    #
    full_dids = sorted(
        [eval(x) for x in load_picle_file("%s/%s%s.pkl" % (full_shift_dir, monthly_full_did_prefix, yymm))]
    )
    s_df = pd.read_csv("%s/%s%s.csv" % (full_shift_dir, sh_full_prefix, yymm))
    trip_df = pd.read_csv("%s/%s%s.csv" % (trips_dir, trip_prefix, yymm))
    ap_trip_df = pd.read_csv("%s/%s%s.csv" % (airport_trips_dir, ap_trip_op_ep_prefix, yymm))
    #
    yy, mm = int(yymm[:2]), int(yymm[2:])
    for did in full_dids:
        # General
        did_sh = s_df[(s_df["driver-id"] == did)]
        pro_dur = sum(did_sh["productive-duration"]) * SEC
        did_wt = trip_df[(trip_df["did"] == did)]
        total_fare = sum(did_wt["fare"])
        if pro_dur > 0 and total_fare != 0:
            total_prod = total_fare / pro_dur
            with open("%s/%s%s.csv" % (individual_detail_dir, general_prefix, yymm), "a") as w_csvfile:
                writer = csv.writer(w_csvfile)
                writer.writerow([yy, mm, did, pro_dur, total_fare, total_prod])
        #
        did_ap = ap_trip_df[(ap_trip_df["did"] == did)]
        prev_in_ap_trip = did_ap[(did_ap["trip-mode"] == DInAP_PInAP)]
        prev_out_ap_trip = did_ap[(did_ap["trip-mode"] == DOutAP_PInAP)]
        #
        if len(did_ap) != 0:
            # prev in ap trip
            ap_qu, ap_dur = sum(prev_in_ap_trip["queue-time"]), sum(prev_in_ap_trip["duration"])
            ap_fare = sum(prev_in_ap_trip["fare"])
            ap_op_cost, ap_eco_profit = sum(prev_in_ap_trip["op-cost"]), sum(prev_in_ap_trip["economic"])
            if ap_qu + ap_dur > 0 and ap_fare != 0:
                ap_prod = ap_fare / (ap_qu + ap_dur)
                with open("%s/%s%s.csv" % (individual_detail_dir, prev_in_ap_prefix, yymm), "a") as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    writer.writerow([yy, mm, did, ap_qu, ap_dur, ap_fare, ap_prod, ap_op_cost, ap_eco_profit])
            #
            # prev out ap trip
            ap_qu, ap_dur = sum(prev_out_ap_trip["queue-time"]), sum(prev_out_ap_trip["duration"])
            ap_fare = sum(prev_out_ap_trip["fare"])
            ap_op_cost, ap_eco_profit = sum(prev_out_ap_trip["op-cost"]), sum(prev_out_ap_trip["economic"])
            if ap_qu + ap_dur > 0 and ap_fare != 0:
                ap_prod = ap_fare / (ap_qu + ap_dur)
                with open("%s/%s%s.csv" % (individual_detail_dir, prev_out_ap_prefix, yymm), "a") as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    writer.writerow([yy, mm, did, ap_qu, ap_dur, ap_fare, ap_prod, ap_op_cost, ap_eco_profit])
    print "End the file; %s" % yymm
    logging_msg("End the file; %s" % yymm)
def q_learning():
    policies_dir = for_learning_dir + '/%s' % ('ALPHA-0.10-GAMMA-0.50')
    policies = {}
    for fn in get_all_files(policies_dir, 'ALPHA-', '.pkl'):
        Qsa_value, _ = load_picle_file(policies_dir + '/%s' % fn)
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in [IN_AP, OUT_AP]:
                    if not policies.has_key((s1, s2, s3)): 
                        policies[(s1, s2, s3)] = [0, 0]
                    i = index_IN_OUT_AP[IN_AP] if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else index_IN_OUT_AP[OUT_AP]
                    policies[(s1, s2, s3)][i] += 1
    op_policies = {}
    for k, v in policies.iteritems():
        op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])), '%.2f' % (v[1] / (v[0] + v[1])))
    save_pickle_file('q_learning_policy.pkl', op_policies)     
Beispiel #14
0
def process_files(yymm):
    Qsa_value, state_action_fare_dur = load_picle_file('%s/ALPHA-0.10-GAMMA-0.50/ALPHA-0.10-GAMMA-0.50-q-value-fare-dur-%s.pkl'%(for_learning_dir,yymm))
    argmax_as = {}
    for s1 in DAY_OF_WEEK:
        for s2 in TIME_SLOTS:
            for s3 in [IN_AP, OUT_AP]:
                argmax_as[(s1, s2, s3)] = IN_AP if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else OUT_AP
    #
    whole_rev, sub_rev = 0, 0
    whole_count, sub_count = 0,0
    count = 0        
    with open('%s/diff-pin-eco-extreme-drivers-trip-%s.csv' % (for_full_driver_dir, yymm), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_prev_tetime, id_prev_teloc = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location')
        id_stime, id_sloc = headers.index('start-time'), headers.index('start-location'),
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        for row in reader:
            prev_tetime, stime = eval(row[id_prev_tetime]), eval(row[id_stime]) 
            setup_time = stime - prev_tetime
            #
            prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime)
            s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour
            s3 = row[id_prev_teloc]
            #
            a = row[id_sloc]
            #
            dur, fare = eval(row[id_dur]), eval(row[id_fare])
            alter_a = OUT_AP if a == IN_AP else IN_AP
            if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0:
                op_cost = 0
            else:
                op_cost = (setup_time + dur) * state_action_fare_dur[(s1, s2, s3, alter_a)][0] / state_action_fare_dur[(s1, s2, s3, alter_a)][1] 
            economic_profit = fare - op_cost
            #
            whole_rev += economic_profit
            whole_count += 1
            if argmax_as[(s1, s2, s3)] == a:
                sub_rev += economic_profit
                sub_count += 1
            count += 1
            if count % MOD_STAN == 0:
                print '%s, %d' % (yymm, count)
                logging_msg('%s, %d' % (yymm, count))
    save_pickle_file('%s/comparision-%s.pkl'%(for_full_driver_dir, yymm), [whole_rev, whole_count, sub_rev, sub_count])
def q_learning():
    policies_dir = for_learning_dir + '/%s' % ('ALPHA-0.10-GAMMA-0.50')
    policies = {}
    for fn in get_all_files(policies_dir, 'ALPHA-', '.pkl'):
        Qsa_value, _ = load_picle_file(policies_dir + '/%s' % fn)
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in [IN_AP, OUT_AP]:
                    if not policies.has_key((s1, s2, s3)):
                        policies[(s1, s2, s3)] = [0, 0]
                    i = index_IN_OUT_AP[IN_AP] if Qsa_value[(
                        s1, s2, s3, IN_AP)] >= Qsa_value[(
                            s1, s2, s3, OUT_AP)] else index_IN_OUT_AP[OUT_AP]
                    policies[(s1, s2, s3)][i] += 1
    op_policies = {}
    for k, v in policies.iteritems():
        op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])),
                          '%.2f' % (v[1] / (v[0] + v[1])))
    save_pickle_file('q_learning_policy.pkl', op_policies)
Beispiel #16
0
def process_files(ALPHA, GAMMA):
    ALPHA_GAMMA_dir = for_learning_dir + '/ALPHA-%.2f-GAMMA-%.2f' % (ALPHA,
                                                                     GAMMA)
    if not os.path.exists(ALPHA_GAMMA_dir):
        return None
    pickle_files = get_all_files(ALPHA_GAMMA_dir,
                                 'ALPHA-%.2f-GAMMA-%.2f' % (ALPHA, GAMMA),
                                 '.pkl')
    for pkl_file in pickle_files:
        yymm = pkl_file[:-len('.pkl')].split('-')[-1]
        yy, mm = int(yymm[:2]), int(yymm[2:])
        whole_rev, whole_count, sub_rev, sub_count = load_picle_file(
            '%s/results-%s.pkl' % (ALPHA_GAMMA_dir, yymm))
        with open(fn, 'a') as w_csvfile:
            writer = csv.writer(w_csvfile)
            writer.writerow([
                ALPHA, GAMMA, yy, mm, whole_rev / whole_count,
                sub_rev / sub_count
            ])
Beispiel #17
0
def process_file(ALPHA, GAMMA, ALPHA_GAMMA_dir, yymm):
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #
    print yymm
    if yymm == '0901':
        prev_yymm = None
    elif yymm == '1001':
        prev_yymm = '0911'
    elif yymm == '1011':
        prev_yymm = '1009'
    else:
        yy, mm = int(yymm[:2]), int(yymm[2:])
        prev_yymm = '%02d%02d' % (yy, mm - 1)
    #
    if not prev_yymm:
        Qsa_value, state_action_fare_dur = {}, {}
        locations = [IN_AP, OUT_AP]
        actions = [IN_AP, OUT_AP]
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in locations:
                    for a in actions:
                        Qsa_value[(s1, s2, s3, a)] = 0
                        state_action_fare_dur[(s1, s2, s3, a)] = [0, 0]
    else:
        Qsa_value, state_action_fare_dur = load_picle_file(
            '%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' %
            (ALPHA_GAMMA_dir, ALPHA, GAMMA, prev_yymm))
    #

    with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm),
              'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_prev_tetime, id_prev_teloc = headers.index(
            'prev-trip-end-time'), headers.index('prev-trip-end-location')
        id_stime, id_sloc = headers.index('start-time'), headers.index(
            'start-location'),
        id_etime, id_eloc = headers.index('end-time'), headers.index(
            'end-location'),
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        #
        count = 0
        for row in reader:
            prev_tetime, stime, etime = eval(row[id_prev_tetime]), eval(
                row[id_stime]), eval(row[id_etime])
            setup_time = stime - prev_tetime
            #
            if setup_time < 0 or HOUR * 2 < setup_time:
                continue
            #
            prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime)
            s1, s2 = prev_tetime_datetime.strftime(
                "%a"), prev_tetime_datetime.hour
            s3 = row[id_prev_teloc]
            #
            etime_datetime = datetime.datetime.fromtimestamp(etime)
            new_s1, new_s2 = etime_datetime.strftime("%a"), etime_datetime.hour
            new_s3 = row[id_eloc]
            #
            a = row[id_sloc]
            dur, fare = eval(row[id_dur]), eval(row[id_fare])
            #
            state_action_fare_dur[(s1, s2, s3, a)][0] += fare
            state_action_fare_dur[(s1, s2, s3, a)][1] += setup_time + dur
            #
            if Qsa_value[(new_s1, new_s2, new_s3, IN_AP)] > Qsa_value[(
                    new_s1, new_s2, new_s3, OUT_AP)]:
                future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, IN_AP)]
            else:
                future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3,
                                                OUT_AP)]
            #
            alter_a = OUT_AP if a == IN_AP else IN_AP
            if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0:
                op_cost = 0
            else:
                op_cost = (setup_time + dur) * state_action_fare_dur[
                    (s1, s2, s3, alter_a)][0] / state_action_fare_dur[
                        (s1, s2, s3, alter_a)][1]
            qrs = fare - op_cost + GAMMA * future_max_q_value
            Qsa_value[(s1, s2, s3, a)] = \
                        (1 - ALPHA) * Qsa_value[(s1, s2, s3, a)] + ALPHA * qrs
            count += 1
            if count % MOD_STAN == 0:
                print '%s, %d' % (yymm, count)
                logging_msg('%s, %d' % (yymm, count))
                save_pickle_file(
                    '%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' %
                    (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm),
                    [Qsa_value, state_action_fare_dur])
        save_pickle_file(
            '%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' %
            (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm),
            [Qsa_value, state_action_fare_dur])
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
Beispiel #18
0
def process_file(ALPHA, GAMMA, ALPHA_GAMMA_dir, yymm):            
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #
    print yymm
    if yymm == '0901':
        prev_yymm = None
    elif yymm == '1001':
        prev_yymm = '0911'
    elif yymm == '1011':
        prev_yymm = '1009'
    else:
        yy, mm = int(yymm[:2]), int(yymm[2:]) 
        prev_yymm = '%02d%02d' % (yy, mm - 1)
    #
    if not prev_yymm:
        Qsa_value, state_action_fare_dur = {}, {}
        locations = [IN_AP, OUT_AP]
        actions = [IN_AP, OUT_AP]
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in locations:
                    for a in actions:
                        Qsa_value[(s1, s2, s3, a)] = 0
                        state_action_fare_dur[(s1, s2, s3, a)] = [0, 0]
    else:
        Qsa_value, state_action_fare_dur = load_picle_file('%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, prev_yymm))
    #
    
    with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_prev_tetime, id_prev_teloc = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location')
        id_stime, id_sloc = headers.index('start-time'), headers.index('start-location'),
        id_etime, id_eloc = headers.index('end-time'), headers.index('end-location'),
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        #
        count = 0
        for row in reader:
            prev_tetime, stime, etime = eval(row[id_prev_tetime]), eval(row[id_stime]), eval(row[id_etime]) 
            setup_time = stime - prev_tetime
            #
            if setup_time < 0 or HOUR * 2 < setup_time:
                continue 
            #
            prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime)
            s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour
            s3 = row[id_prev_teloc]
            #
            etime_datetime = datetime.datetime.fromtimestamp(etime)
            new_s1, new_s2 = etime_datetime.strftime("%a"), etime_datetime.hour
            new_s3 = row[id_eloc]
            #
            a = row[id_sloc]
            dur, fare = eval(row[id_dur]), eval(row[id_fare])
            #
            state_action_fare_dur[(s1, s2, s3, a)][0] += fare
            state_action_fare_dur[(s1, s2, s3, a)][1] += setup_time + dur
            #
            if Qsa_value[(new_s1, new_s2, new_s3, IN_AP)] > Qsa_value[(new_s1, new_s2, new_s3, OUT_AP)] :
                future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, IN_AP)]
            else:
                future_max_q_value = Qsa_value[(new_s1, new_s2, new_s3, OUT_AP)]
            #
            alter_a = OUT_AP if a == IN_AP else IN_AP
            if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0:
                op_cost = 0
            else:
                op_cost = (setup_time + dur) * state_action_fare_dur[(s1, s2, s3, alter_a)][0] / state_action_fare_dur[(s1, s2, s3, alter_a)][1] 
            qrs = fare - op_cost + GAMMA * future_max_q_value
            Qsa_value[(s1, s2, s3, a)] = \
                        (1 - ALPHA) * Qsa_value[(s1, s2, s3, a)] + ALPHA * qrs
            count += 1
            if count % MOD_STAN == 0:
                print '%s, %d' % (yymm, count)
                logging_msg('%s, %d' % (yymm, count))
                save_pickle_file('%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm), [Qsa_value, state_action_fare_dur])
        save_pickle_file('%s/ALPHA-%.2f-GAMMA-%.2f-q-value-fare-dur-%s.pkl' % (ALPHA_GAMMA_dir, ALPHA, GAMMA, yymm), [Qsa_value, state_action_fare_dur])
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
def process_files(yymm, q_lerning_ended_dir):
    candi_pkl_files = []
    for dn in q_lerning_ended_dir:
        if os.path.exists('%s/%s/results-%s.pkl' %
                          (for_learning_dir, dn, yymm)):
            continue
        candi_pkl_files.append('%s/%s/%s-q-value-fare-dur-%s.pkl' %
                               (for_learning_dir, dn, dn, yymm))
    result_pkls = [
        os.path.dirname(pkl_path) + '/results-%s.pkl' % yymm
        for pkl_path in candi_pkl_files
    ]
    #
    list_argmax_as = []
    state_action_fare_dur = None
    for pkl_file_path in candi_pkl_files:
        Qsa_value, state_action_fare_dur = load_picle_file(pkl_file_path)
        argmax_as = {}
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in [IN_AP, OUT_AP]:
                    argmax_as[(s1, s2, s3)] = IN_AP if Qsa_value[(
                        s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3,
                                                          OUT_AP)] else OUT_AP
        list_argmax_as.append(argmax_as)
    #
    whole_rev, whole_count = 0, 0
    list_sub_rev, list_sub_count = [0 for _ in xrange(len(candi_pkl_files))], [
        0 for _ in xrange(len(candi_pkl_files))
    ]

    count = 0
    with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm),
              'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_prev_tetime, id_prev_teloc = headers.index(
            'prev-trip-end-time'), headers.index('prev-trip-end-location')
        id_stime, id_sloc = headers.index('start-time'), headers.index(
            'start-location'),
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        for row in reader:
            prev_tetime, stime = eval(row[id_prev_tetime]), eval(row[id_stime])
            setup_time = stime - prev_tetime
            #
            prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime)
            s1, s2 = prev_tetime_datetime.strftime(
                "%a"), prev_tetime_datetime.hour
            s3 = row[id_prev_teloc]
            #
            a = row[id_sloc]
            #
            dur, fare = eval(row[id_dur]), eval(row[id_fare])
            alter_a = OUT_AP if a == IN_AP else IN_AP
            if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0:
                op_cost = 0
            else:
                op_cost = (setup_time + dur) * state_action_fare_dur[
                    (s1, s2, s3, alter_a)][0] / state_action_fare_dur[
                        (s1, s2, s3, alter_a)][1]
            economic_profit = fare - op_cost
            #
            whole_rev += economic_profit
            whole_count += 1

            for i, argmax_as in enumerate(list_argmax_as):
                if argmax_as[(s1, s2, s3)] == a:
                    list_sub_rev[i] += economic_profit
                    list_sub_count[i] += 1
            count += 1
            if count % MOD_STAN == 0:
                print '%s, %d' % (yymm, count)
                logging_msg('%s, %d' % (yymm, count))
                for i in xrange(len(result_pkls)):
                    result_fn = result_pkls[i]
                    save_pickle_file(result_fn, [
                        whole_rev, whole_count, list_sub_rev[i],
                        list_sub_count[i]
                    ])
    for i in xrange(len(result_pkls)):
        result_fn = result_pkls[i]
        save_pickle_file(
            result_fn,
            [whole_rev, whole_count, list_sub_rev[i], list_sub_count[i]])
from __future__ import division
#
import os, sys
sys.path.append(os.getcwd() + '/..')
#
from supports.handling_pkl import load_picle_file
from supports.etc_functions import get_all_files, remove_creat_dir
from supports._setting import merged_trip_dir, for_full_driver_dir, individual_detail_dir
from supports.logger import logging_msg
from supports.location_check import is_in_airport
from supports._setting import OUT_AP
from supports.multiprocess import init_multiprocessor, put_task, end_multiprocessor
#
import csv
#
full_time_drivers, _, _, _, _, _, _, _, _, _, _ = load_picle_file('%s/productivities_ext.pkl' % (individual_detail_dir))
driver_full_or_not = [False] * (max(full_time_drivers) + 1)
for did in full_time_drivers:
    driver_full_or_not[int(did)] = True
check_progress = 10000000
#
def run():
    remove_creat_dir(for_full_driver_dir)
    init_multiprocessor()
    count_num_jobs = 0
    for fn in get_all_files(merged_trip_dir, 'trips', '.csv'):
#         process_file(fn)
        put_task(process_file, [fn])
        count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
def process_files(yymm, q_lerning_ended_dir):
    candi_pkl_files = []
    for dn in q_lerning_ended_dir:
        if os.path.exists('%s/%s/results-%s.pkl' % (for_learning_dir, dn, yymm)):
            continue
        candi_pkl_files.append('%s/%s/%s-q-value-fare-dur-%s.pkl' % (for_learning_dir, dn, dn, yymm))
    result_pkls = [os.path.dirname(pkl_path) + '/results-%s.pkl'% yymm for pkl_path in candi_pkl_files]
    #
    list_argmax_as = []
    state_action_fare_dur = None
    for pkl_file_path in candi_pkl_files:
        Qsa_value, state_action_fare_dur = load_picle_file(pkl_file_path)
        argmax_as = {}
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in [IN_AP, OUT_AP]:
                    argmax_as[(s1, s2, s3)] = IN_AP if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else OUT_AP
        list_argmax_as.append(argmax_as)
    #
    whole_rev, whole_count = 0, 0
    list_sub_rev, list_sub_count = [0 for _ in xrange(len(candi_pkl_files))], [0 for _ in xrange(len(candi_pkl_files))]
    
    count = 0        
    with open('%s/whole-trip-%s.csv' % (for_learning_dir, yymm), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        id_prev_tetime, id_prev_teloc = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location')
        id_stime, id_sloc = headers.index('start-time'), headers.index('start-location'),
        id_dur, id_fare = headers.index('duration'), headers.index('fare')
        for row in reader:
            prev_tetime, stime = eval(row[id_prev_tetime]), eval(row[id_stime]) 
            setup_time = stime - prev_tetime
            #
            prev_tetime_datetime = datetime.datetime.fromtimestamp(prev_tetime)
            s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour
            s3 = row[id_prev_teloc]
            #
            a = row[id_sloc]
            #
            dur, fare = eval(row[id_dur]), eval(row[id_fare])
            alter_a = OUT_AP if a == IN_AP else IN_AP
            if state_action_fare_dur[(s1, s2, s3, alter_a)][1] == 0:
                op_cost = 0
            else:
                op_cost = (setup_time + dur) * state_action_fare_dur[(s1, s2, s3, alter_a)][0] / state_action_fare_dur[(s1, s2, s3, alter_a)][1] 
            economic_profit = fare - op_cost
            #
            whole_rev += economic_profit
            whole_count += 1
            
            for i, argmax_as in enumerate(list_argmax_as):
                if argmax_as[(s1, s2, s3)] == a:
                    list_sub_rev[i] += economic_profit
                    list_sub_count[i] += 1
            count += 1
            if count % MOD_STAN == 0:
                print '%s, %d' % (yymm, count)
                logging_msg('%s, %d' % (yymm, count))
                for i in xrange(len(result_pkls)):
                    result_fn = result_pkls[i]
                    save_pickle_file(result_fn, [whole_rev, whole_count, list_sub_rev[i], list_sub_count[i]])
    for i in xrange(len(result_pkls)):
        result_fn = result_pkls[i]
        save_pickle_file(result_fn, [whole_rev, whole_count, list_sub_rev[i], list_sub_count[i]])
def process_file(fn):
    _, _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm 
    logging_msg('handle the file; %s' % yymm)
    #
    ap_pkl_files = get_all_files(logs_dir, 'ap-crossing-time-', '.pkl')
    ap_pkl_file_path = None
    for pkl_fn in ap_pkl_files:
        _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-')
        if pkl_yymm == yymm:
            ap_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn)
            break
    else:
        assert False, yymm
    ap_crossing_times = load_picle_file(ap_pkl_file_path)
    #
    ns_pkl_files = get_all_files(logs_dir, 'ns-crossing-time-', '.pkl')
    ns_pkl_file_path = None
    for pkl_fn in ns_pkl_files:
        _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-')
        if pkl_yymm == yymm:
            ns_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn)
            break
    else:
        assert False, yymm
    ns_crossing_times = load_picle_file(ns_pkl_file_path)
    #
    init_csv_files(yymm)
    
    with open('%s/%s' % (trips_dir, fn), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        header_id = {h : i for i, h in enumerate(headers)}
        for row in reader:
            tid, did = row[header_id['tid']], row[header_id['did']]
            et, duration = row[header_id['end-time']], row[header_id['duration']]
            fare = row[header_id['fare']]
            #
            ap_tm, ns_tm = int(row[header_id['ap-trip-mode']]), int(row[header_id['ns-trip-mode']]) 
            vid, st, prev_tet = row[header_id['vid']], eval(row[header_id['start-time']]), eval(row[header_id['prev-trip-end-time']])
            #
            is_ap_trip, is_ns_trip = False, False 
            #
            if ap_tm == DInAP_PInAP:
                is_ap_trip = True
                ap_join_queue_time = prev_tet
            elif ap_tm == DOutAP_PInAP:
                is_ap_trip = True
                try:
                    i = bisect(ap_crossing_times[vid], st)
                except KeyError:
                    logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']]))
                    continue
                ap_join_queue_time = ap_crossing_times[vid][i - 1] if i != 0 else ap_crossing_times[vid][0]
            if is_ap_trip:
                with open('%s/airport-trip-%s.csv' % (airport_trips_dir, yymm), 'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    ap_queue_time = st - ap_join_queue_time
                    new_row = [tid, vid, did, st, et, duration, fare, prev_tet,
                                ap_tm, ap_join_queue_time, ap_queue_time]
                    writer.writerow(new_row)
            #
            if ns_tm == DInNS_PInNS:
                is_ns_trip = True
                ns_join_queue_time = prev_tet
            elif ns_tm == DOutNS_PInNS:
                is_ns_trip = True
                try:
                    i = bisect(ns_crossing_times[vid], st)
                except KeyError:
                    logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']]))
                    continue
                ns_join_queue_time = ns_crossing_times[vid][i - 1] if i != 0 else ns_crossing_times[vid][0]
            if is_ns_trip:
                with open('%s/nightsafari-trip-%s.csv' % (nightsafari_trips_dir, yymm), 'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    ns_queue_time = st - ns_join_queue_time
                    new_row = [tid, vid, did, st, et, duration, fare, prev_tet,
                                ns_tm, ns_join_queue_time, ns_queue_time]
                    writer.writerow(new_row)        
    print 'end the file; %s' % yymm 
    logging_msg('end the file; %s' % yymm)
Beispiel #23
0
def process_files(yymm):
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #
    init_csv_files(yymm)
    #
    full_dids = sorted([
        eval(x) for x in load_picle_file('%s/%s%s.pkl' %
                                         (full_shift_dir,
                                          monthly_full_did_prefix, yymm))
    ])
    s_df = pd.read_csv('%s/%s%s.csv' % (full_shift_dir, sh_full_prefix, yymm))
    trip_df = pd.read_csv('%s/%s%s.csv' % (trips_dir, trip_prefix, yymm))
    ap_trip_df = pd.read_csv('%s/%s%s.csv' %
                             (airport_trips_dir, ap_trip_op_ep_prefix, yymm))
    #
    yy, mm = int(yymm[:2]), int(yymm[2:])
    for did in full_dids:
        # General
        did_sh = s_df[(s_df['driver-id'] == did)]
        pro_dur = sum(did_sh['productive-duration']) * SEC
        did_wt = trip_df[(trip_df['did'] == did)]
        total_fare = sum(did_wt['fare'])
        if pro_dur > 0 and total_fare != 0:
            total_prod = total_fare / pro_dur
            with open(
                    '%s/%s%s.csv' %
                (individual_detail_dir, general_prefix, yymm),
                    'a') as w_csvfile:
                writer = csv.writer(w_csvfile)
                writer.writerow([yy, mm, did, pro_dur, total_fare, total_prod])
        #
        did_ap = ap_trip_df[(ap_trip_df['did'] == did)]
        prev_in_ap_trip = did_ap[(did_ap['trip-mode'] == DInAP_PInAP)]
        prev_out_ap_trip = did_ap[(did_ap['trip-mode'] == DOutAP_PInAP)]
        #
        if len(did_ap) != 0:
            # prev in ap trip
            ap_qu, ap_dur = sum(prev_in_ap_trip['queue-time']), sum(
                prev_in_ap_trip['duration'])
            ap_fare = sum(prev_in_ap_trip['fare'])
            ap_op_cost, ap_eco_profit = sum(prev_in_ap_trip['op-cost']), sum(
                prev_in_ap_trip['economic'])
            if ap_qu + ap_dur > 0 and ap_fare != 0:
                ap_prod = ap_fare / (ap_qu + ap_dur)
                with open(
                        '%s/%s%s.csv' %
                    (individual_detail_dir, prev_in_ap_prefix, yymm),
                        'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    writer.writerow([
                        yy, mm, did, ap_qu, ap_dur, ap_fare, ap_prod,
                        ap_op_cost, ap_eco_profit
                    ])
            #
            # prev out ap trip
            ap_qu, ap_dur = sum(prev_out_ap_trip['queue-time']), sum(
                prev_out_ap_trip['duration'])
            ap_fare = sum(prev_out_ap_trip['fare'])
            ap_op_cost, ap_eco_profit = sum(prev_out_ap_trip['op-cost']), sum(
                prev_out_ap_trip['economic'])
            if ap_qu + ap_dur > 0 and ap_fare != 0:
                ap_prod = ap_fare / (ap_qu + ap_dur)
                with open(
                        '%s/%s%s.csv' %
                    (individual_detail_dir, prev_out_ap_prefix, yymm),
                        'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    writer.writerow([
                        yy, mm, did, ap_qu, ap_dur, ap_fare, ap_prod,
                        ap_op_cost, ap_eco_profit
                    ])
    print 'End the file; %s' % yymm
    logging_msg('End the file; %s' % yymm)
Beispiel #24
0
def process_file(fn):
    _, _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #
    ap_pkl_files = get_all_files(logs_dir, 'ap-crossing-time-', '.pkl')
    ap_pkl_file_path = None
    for pkl_fn in ap_pkl_files:
        _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-')
        if pkl_yymm == yymm:
            ap_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn)
            break
    else:
        assert False, yymm
    ap_crossing_times = load_picle_file(ap_pkl_file_path)
    #
    ns_pkl_files = get_all_files(logs_dir, 'ns-crossing-time-', '.pkl')
    ns_pkl_file_path = None
    for pkl_fn in ns_pkl_files:
        _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-')
        if pkl_yymm == yymm:
            ns_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn)
            break
    else:
        assert False, yymm
    ns_crossing_times = load_picle_file(ns_pkl_file_path)
    #
    init_csv_files(yymm)

    with open('%s/%s' % (trips_dir, fn), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        header_id = {h: i for i, h in enumerate(headers)}
        for row in reader:
            tid, did = row[header_id['tid']], row[header_id['did']]
            et, duration = row[header_id['end-time']], row[
                header_id['duration']]
            fare = row[header_id['fare']]
            #
            ap_tm, ns_tm = int(row[header_id['ap-trip-mode']]), int(
                row[header_id['ns-trip-mode']])
            vid, st, prev_tet = row[header_id['vid']], eval(
                row[header_id['start-time']]), eval(
                    row[header_id['prev-trip-end-time']])
            #
            is_ap_trip, is_ns_trip = False, False
            #
            if ap_tm == DInAP_PInAP:
                is_ap_trip = True
                ap_join_queue_time = prev_tet
            elif ap_tm == DOutAP_PInAP:
                is_ap_trip = True
                try:
                    i = bisect(ap_crossing_times[vid], st)
                except KeyError:
                    logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']]))
                    continue
                ap_join_queue_time = ap_crossing_times[vid][
                    i - 1] if i != 0 else ap_crossing_times[vid][0]
            if is_ap_trip:
                with open('%s/airport-trip-%s.csv' % (airport_trips_dir, yymm),
                          'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    ap_queue_time = st - ap_join_queue_time
                    new_row = [
                        tid, vid, did, st, et, duration, fare, prev_tet, ap_tm,
                        ap_join_queue_time, ap_queue_time
                    ]
                    writer.writerow(new_row)
            #
            if ns_tm == DInNS_PInNS:
                is_ns_trip = True
                ns_join_queue_time = prev_tet
            elif ns_tm == DOutNS_PInNS:
                is_ns_trip = True
                try:
                    i = bisect(ns_crossing_times[vid], st)
                except KeyError:
                    logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']]))
                    continue
                ns_join_queue_time = ns_crossing_times[vid][
                    i - 1] if i != 0 else ns_crossing_times[vid][0]
            if is_ns_trip:
                with open(
                        '%s/nightsafari-trip-%s.csv' %
                    (nightsafari_trips_dir, yymm), 'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    ns_queue_time = st - ns_join_queue_time
                    new_row = [
                        tid, vid, did, st, et, duration, fare, prev_tet, ns_tm,
                        ns_join_queue_time, ns_queue_time
                    ]
                    writer.writerow(new_row)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)