def process_file(fn):
    _, yymm = fn[:-len('.csv')].split('-')
    #
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not = {}, {}
    vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not = {}, {}
    if yymm not in ['0901', '1001', '1011']:
        path_to_last_day_csv_file = None
        temp_csv_files = get_all_files(log_last_day_dir, '', '.csv')
        prev_fn = None
        y, m = int(yymm[:2]), int(yymm[2:])
        prev_m = m - 1
        prev_yymm = '%02d%02d' % (y, prev_m)
        for temp_fn in temp_csv_files:
            if temp_fn.startswith('log-last-day-%s' % prev_yymm):
                prev_fn = temp_fn
                break
        assert prev_fn, yymm
        path_to_last_day_csv_file = '%s/%s' % (log_last_day_dir, prev_fn)
        vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not, vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not = \
                        record_crossing_time(path_to_last_day_csv_file, vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not,
                                             vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not)
    path_to_csv_file = '%s/%s' % (logs_dir, fn)
    vehicle_ap_crossing_time_from_out_to_in, _, vehicle_ns_crossing_time_from_out_to_in, _ = \
            record_crossing_time(path_to_csv_file, vehicle_ap_crossing_time_from_out_to_in, vehicle_last_log_ap_or_not,
                                 vehicle_ns_crossing_time_from_out_to_in, vehicle_last_log_ns_or_not)
    #
    save_pickle_file('%s/ap-crossing-time-%s.pkl' % (logs_dir, yymm),
                     vehicle_ap_crossing_time_from_out_to_in)
    save_pickle_file('%s/ns-crossing-time-%s.pkl' % (logs_dir, yymm),
                     vehicle_ns_crossing_time_from_out_to_in)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
def drivers():
    policies = {}
    for fn in get_all_files(for_full_driver_dir,
                            'diff-pin-eco-extreme-drivers-trip-', '.csv'):
        _, _, _, _, _, _, yymm = fn[:-len('.csv')].split('-')
        with open('%s/%s' % (for_full_driver_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            id_ptet, id_ptel = headers.index(
                'prev-trip-end-time'), headers.index('prev-trip-end-location')
            id_sl = headers.index('start-location')
            for row in reader:
                prev_tetime_datetime = datetime.datetime.fromtimestamp(
                    int(row[id_ptet]))
                s1, s2 = prev_tetime_datetime.strftime(
                    "%a"), prev_tetime_datetime.hour
                s3 = row[id_ptel]
                if not policies.has_key((s1, s2, s3)):
                    policies[(s1, s2, s3)] = [0, 0]
                i = index_IN_OUT_AP[row[id_sl]]
                policies[(s1, s2, s3)][i] += 1
    op_policies = {}
    for k, v in policies.iteritems():
        op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])),
                          '%.2f' % (v[1] / (v[0] + v[1])))
    save_pickle_file('extreme_drivers_policy.pkl', op_policies)
def run():
    remove_creat_dir(shift_pro_dur_dir)
    init_multiprocessor()
    count_num_jobs = 0
    for fn in get_all_files(shifts_dir, 'shift-hour-state-', '.csv'):
        put_task(process_file, [fn])
        count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
def run():
    remove_creat_dir(for_full_driver_dir)
    init_multiprocessor()
    count_num_jobs = 0
    for fn in get_all_files(merged_trip_dir, 'trips', '.csv'):
#         process_file(fn)
        put_task(process_file, [fn])
        count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
def run():
    remove_creat_dir(airport_trips_dir); remove_creat_dir(nightsafari_trips_dir)
    csv_files = get_all_files(trips_dir, 'whole-trip-', '.csv')
    init_multiprocessor()
    count_num_jobs = 0
    for fn in csv_files:
#         process_file(fn)
        put_task(process_file, [fn])
        count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
Beispiel #6
0
def run():
    remove_creat_dir(trips_dir)
    csv_files = get_all_files(merged_trip_dir, 'trips', '.csv')
    
    init_multiprocessor()
    counter = 0
    for fn in csv_files:
        counter += 1
        put_task(process_file, [fn])
    end_multiprocessor(counter)
def run():
    remove_creat_dir(hourly_summary)
    csv_files = get_all_files(trips_dir, 'whole-trip-', '.csv')
    #
    init_multiprocessor()
    count_num_jobs = 0
    for fn in csv_files:
        put_task(process_file, [fn])
#         process_file(fn)
        count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
Beispiel #8
0
def run():
    remove_creat_dir(airport_trips_dir)
    remove_creat_dir(nightsafari_trips_dir)
    csv_files = get_all_files(trips_dir, 'whole-trip-', '.csv')
    init_multiprocessor()
    count_num_jobs = 0
    for fn in csv_files:
        #         process_file(fn)
        put_task(process_file, [fn])
        count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
def run():
    remove_creat_dir(for_learning_dir)
    csv_files = get_all_files(merged_trip_dir, 'trips', '.csv')
    #
    init_multiprocessor()
    count_num_jobs = 0
    for fn in csv_files:
#         process_file(fn)
        put_task(process_file, [fn])
        count_num_jobs += 1
    end_multiprocessor(count_num_jobs)    
Beispiel #10
0
def run():
    remove_creat_dir(hourly_summary)
    csv_files = get_all_files(trips_dir, 'whole-trip-', '.csv')
    #
    init_multiprocessor()
    count_num_jobs = 0
    for fn in csv_files:
        put_task(process_file, [fn])
        #         process_file(fn)
        count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
def run():
    dir_path = '/Users/JerryHan88/taxi/full_drivers_trips_q_comparision'
    pickle_files = get_all_files(dir_path, 
                                 'comparision-', '.pkl')
    for pkl_file in pickle_files:
        yymm = pkl_file[:-len('.pkl')].split('-')[-1]
        yy, mm = int(yymm[:2]), int(yymm[2:])
        whole_rev, whole_count, sub_rev, sub_count = load_picle_file('%s/comparision-%s.pkl' % (dir_path, yymm))
        with open(fn, 'a') as w_csvfile:
            writer = csv.writer(w_csvfile)
            writer.writerow([yy, mm, whole_rev / whole_count, sub_rev / sub_count])
Beispiel #12
0
def run():
    dir_path = '/Users/JerryHan88/taxi/full_drivers_trips_q_comparision'
    pickle_files = get_all_files(dir_path, 'comparision-', '.pkl')
    for pkl_file in pickle_files:
        yymm = pkl_file[:-len('.pkl')].split('-')[-1]
        yy, mm = int(yymm[:2]), int(yymm[2:])
        whole_rev, whole_count, sub_rev, sub_count = load_picle_file(
            '%s/comparision-%s.pkl' % (dir_path, yymm))
        with open(fn, 'a') as w_csvfile:
            writer = csv.writer(w_csvfile)
            writer.writerow(
                [yy, mm, whole_rev / whole_count, sub_rev / sub_count])
def process_files(ALPHA, GAMMA):
    ALPHA_GAMMA_dir = for_learning_dir + '/ALPHA-%.2f-GAMMA-%.2f' % (ALPHA, GAMMA)
    if not os.path.exists(ALPHA_GAMMA_dir):
        return None
    pickle_files = get_all_files(ALPHA_GAMMA_dir, 'ALPHA-%.2f-GAMMA-%.2f' % (ALPHA, GAMMA), '.pkl')
    for pkl_file in pickle_files:
        yymm = pkl_file[:-len('.pkl')].split('-')[-1]
        yy, mm = int(yymm[:2]), int(yymm[2:])
        whole_rev, whole_count, sub_rev, sub_count = load_picle_file('%s/results-%s.pkl' % (ALPHA_GAMMA_dir, yymm))
        with open(fn, 'a') as w_csvfile:
            writer = csv.writer(w_csvfile)
            writer.writerow([ALPHA, GAMMA, yy, mm, whole_rev / whole_count, sub_rev / sub_count])
def run():
    csv_files = get_all_files(airport_trips_dir, 'airport-trip-', '.csv')
    init_multiprocessor()
    count_num_jobs = 0
    for fn in csv_files:
        try:
            put_task(process_file, [fn])
        except Exception as _:
            logging_msg('Algorithm runtime exception (%s)\n' % (fn) + format_exc())
            raise
        count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
Beispiel #15
0
def test():
    def difference(data0, data1):
        diff = {}
        for k, v in data0.iteritems():
            diff[k] = data1[k] - v
        return diff
    def ordering(dids_values):
        order_v_did = []
        for did, v in dids_values.iteritems():
            order_v_did.append([v, did])
        order_v_did.sort()
        order_v_did.reverse()
        return order_v_did
    def find_extreme_range(order_v_did):
        # more than mean's 50 percent
        values = [v for v, _ in order_v_did]
        mu, std = np.mean(values), np.std(values)
        i = 0
        while order_v_did[i][0] > mu + std * 2.0:
            i += 1
        return (0, i / len(order_v_did))
    both_years_full_drivers, \
    Y09_driver_genprod_hour, Y10_driver_genprod_hour, \
    Y09_pin_driver_aprod_hour, Y10_pin_driver_aprod_hour, \
    Y09_pout_driver_aprod_hour, Y10_pout_driver_aprod_hour, \
    Y09_pin_driver_epro_month, Y10_pin_driver_epro_month, \
    Y09_pout_driver_epro_month, Y10_pout_driver_epro_month = load_picle_file('%s/productivities_ext.pkl' % (individual_detail_dir))
    #
    diff_general_prod = difference(Y09_driver_genprod_hour, Y10_driver_genprod_hour)
    diff_pin_prod = difference(Y09_pin_driver_aprod_hour, Y10_pin_driver_aprod_hour)
    diff_pout_prod = difference(Y09_pout_driver_aprod_hour, Y10_pout_driver_aprod_hour)
    diff_pin_eco = difference(Y09_pin_driver_epro_month, Y10_pin_driver_epro_month)
    diff_pout_eco = difference(Y09_pout_driver_epro_month, Y10_pout_driver_epro_month)
    
    order_v_did = ordering(diff_pin_eco)
    print len(diff_pin_eco)
    r1, r2 = find_extreme_range(order_v_did)
    extreme_drivers = [int(did) for _, did in order_v_did[int(r1 * len(order_v_did)):int(r2 * len(order_v_did))]]
    for fn in get_all_files(for_full_driver_dir, 'full-drivers-trips-', '.csv'):
        _, _, _, yymm = fn[:-len('.csv')].split('-')
        with open('%s/%s' % (for_full_driver_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            id_did = headers.index('did')
            with open('%s/diff-pin-eco-extreme-drivers-trip-%s.csv' % (for_full_driver_dir, yymm), 'wt') as w_csvfile:
                writer = csv.writer(w_csvfile)
                writer.writerow(headers)
                for row in reader:
                    did = int(row[id_did])
                    if did not in extreme_drivers:
                        continue
                    writer.writerow(row)
Beispiel #16
0
def run():
    remove_creat_dir(full_shift_dir)
    csv_files = get_all_files(shifts_dir, '', '.csv')
    init_multiprocessor()
    count_num_jobs = 0
    for fn in csv_files:
        try:
            put_task(process_file, [fn])
        except Exception as _:
            logging_msg('Algorithm runtime exception (%s)\n' % (fn) + format_exc())
            raise
        count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
Beispiel #17
0
def run():
    csv_files = get_all_files(airport_trips_dir, 'airport-trip-', '.csv')
    init_multiprocessor()
    count_num_jobs = 0
    for fn in csv_files:
        try:
            put_task(process_file, [fn])
        except Exception as _:
            logging_msg('Algorithm runtime exception (%s)\n' % (fn) +
                        format_exc())
            raise
        count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
def run():
    candi_dirs = get_all_directories(for_learning_dir)
    q_lerning_ended_dir = [dn for dn in candi_dirs if len(get_all_files(for_learning_dir + '/%s' % (dn), 'ALPHA-', '.pkl')) == 22]
    init_multiprocessor()
    counter = 0
    for y in xrange(9, 11):
        for m in xrange(1, 13):
            yymm = '%02d%02d' % (y, m) 
            if yymm in ['0912', '1010']:
                continue
#             process_files(yymm, q_lerning_ended_dir)
            put_task(process_files, [yymm, q_lerning_ended_dir])
            counter += 1
    end_multiprocessor(counter)
def q_learning():
    policies_dir = for_learning_dir + '/%s' % ('ALPHA-0.10-GAMMA-0.50')
    policies = {}
    for fn in get_all_files(policies_dir, 'ALPHA-', '.pkl'):
        Qsa_value, _ = load_picle_file(policies_dir + '/%s' % fn)
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in [IN_AP, OUT_AP]:
                    if not policies.has_key((s1, s2, s3)): 
                        policies[(s1, s2, s3)] = [0, 0]
                    i = index_IN_OUT_AP[IN_AP] if Qsa_value[(s1, s2, s3, IN_AP)] >= Qsa_value[(s1, s2, s3, OUT_AP)] else index_IN_OUT_AP[OUT_AP]
                    policies[(s1, s2, s3)][i] += 1
    op_policies = {}
    for k, v in policies.iteritems():
        op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])), '%.2f' % (v[1] / (v[0] + v[1])))
    save_pickle_file('q_learning_policy.pkl', op_policies)     
def q_learning():
    policies_dir = for_learning_dir + '/%s' % ('ALPHA-0.10-GAMMA-0.50')
    policies = {}
    for fn in get_all_files(policies_dir, 'ALPHA-', '.pkl'):
        Qsa_value, _ = load_picle_file(policies_dir + '/%s' % fn)
        for s1 in DAY_OF_WEEK:
            for s2 in TIME_SLOTS:
                for s3 in [IN_AP, OUT_AP]:
                    if not policies.has_key((s1, s2, s3)):
                        policies[(s1, s2, s3)] = [0, 0]
                    i = index_IN_OUT_AP[IN_AP] if Qsa_value[(
                        s1, s2, s3, IN_AP)] >= Qsa_value[(
                            s1, s2, s3, OUT_AP)] else index_IN_OUT_AP[OUT_AP]
                    policies[(s1, s2, s3)][i] += 1
    op_policies = {}
    for k, v in policies.iteritems():
        op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])),
                          '%.2f' % (v[1] / (v[0] + v[1])))
    save_pickle_file('q_learning_policy.pkl', op_policies)
Beispiel #21
0
def process_files(ALPHA, GAMMA):
    ALPHA_GAMMA_dir = for_learning_dir + '/ALPHA-%.2f-GAMMA-%.2f' % (ALPHA,
                                                                     GAMMA)
    if not os.path.exists(ALPHA_GAMMA_dir):
        return None
    pickle_files = get_all_files(ALPHA_GAMMA_dir,
                                 'ALPHA-%.2f-GAMMA-%.2f' % (ALPHA, GAMMA),
                                 '.pkl')
    for pkl_file in pickle_files:
        yymm = pkl_file[:-len('.pkl')].split('-')[-1]
        yy, mm = int(yymm[:2]), int(yymm[2:])
        whole_rev, whole_count, sub_rev, sub_count = load_picle_file(
            '%s/results-%s.pkl' % (ALPHA_GAMMA_dir, yymm))
        with open(fn, 'a') as w_csvfile:
            writer = csv.writer(w_csvfile)
            writer.writerow([
                ALPHA, GAMMA, yy, mm, whole_rev / whole_count,
                sub_rev / sub_count
            ])
def run():
    candi_dirs = get_all_directories(for_learning_dir)
    q_lerning_ended_dir = [
        dn for dn in candi_dirs
        if len(get_all_files(for_learning_dir + '/%s' %
                             (dn), 'ALPHA-', '.pkl')) == 22
    ]
    init_multiprocessor()
    counter = 0
    for y in xrange(9, 11):
        for m in xrange(1, 13):
            yymm = '%02d%02d' % (y, m)
            if yymm in ['0912', '1010']:
                continue


#             process_files(yymm, q_lerning_ended_dir)
            put_task(process_files, [yymm, q_lerning_ended_dir])
            counter += 1
    end_multiprocessor(counter)
def drivers():
    policies = {}
    for fn in get_all_files(for_full_driver_dir, 'diff-pin-eco-extreme-drivers-trip-', '.csv'):
        _, _, _, _, _, _, yymm = fn[:-len('.csv')].split('-')
        with open('%s/%s' % (for_full_driver_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            id_ptet, id_ptel = headers.index('prev-trip-end-time'), headers.index('prev-trip-end-location')
            id_sl = headers.index('start-location') 
            for row in reader:
                prev_tetime_datetime = datetime.datetime.fromtimestamp(int(row[id_ptet]))
                s1, s2 = prev_tetime_datetime.strftime("%a"), prev_tetime_datetime.hour
                s3 = row[id_ptel]
                if not policies.has_key((s1, s2, s3)): 
                    policies[(s1, s2, s3)] = [0, 0]
                i = index_IN_OUT_AP[row[id_sl]]
                policies[(s1, s2, s3)][i] += 1
    op_policies = {}
    for k, v in policies.iteritems():
        op_policies[k] = ('%.2f' % (v[0] / (v[0] + v[1])), '%.2f' % (v[1] / (v[0] + v[1])))
    save_pickle_file('extreme_drivers_policy.pkl', op_policies)        
Beispiel #24
0
def process_file(fn):
    _, _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm
    logging_msg('handle the file; %s' % yymm)
    #
    ap_pkl_files = get_all_files(logs_dir, 'ap-crossing-time-', '.pkl')
    ap_pkl_file_path = None
    for pkl_fn in ap_pkl_files:
        _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-')
        if pkl_yymm == yymm:
            ap_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn)
            break
    else:
        assert False, yymm
    ap_crossing_times = load_picle_file(ap_pkl_file_path)
    #
    ns_pkl_files = get_all_files(logs_dir, 'ns-crossing-time-', '.pkl')
    ns_pkl_file_path = None
    for pkl_fn in ns_pkl_files:
        _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-')
        if pkl_yymm == yymm:
            ns_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn)
            break
    else:
        assert False, yymm
    ns_crossing_times = load_picle_file(ns_pkl_file_path)
    #
    init_csv_files(yymm)

    with open('%s/%s' % (trips_dir, fn), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        header_id = {h: i for i, h in enumerate(headers)}
        for row in reader:
            tid, did = row[header_id['tid']], row[header_id['did']]
            et, duration = row[header_id['end-time']], row[
                header_id['duration']]
            fare = row[header_id['fare']]
            #
            ap_tm, ns_tm = int(row[header_id['ap-trip-mode']]), int(
                row[header_id['ns-trip-mode']])
            vid, st, prev_tet = row[header_id['vid']], eval(
                row[header_id['start-time']]), eval(
                    row[header_id['prev-trip-end-time']])
            #
            is_ap_trip, is_ns_trip = False, False
            #
            if ap_tm == DInAP_PInAP:
                is_ap_trip = True
                ap_join_queue_time = prev_tet
            elif ap_tm == DOutAP_PInAP:
                is_ap_trip = True
                try:
                    i = bisect(ap_crossing_times[vid], st)
                except KeyError:
                    logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']]))
                    continue
                ap_join_queue_time = ap_crossing_times[vid][
                    i - 1] if i != 0 else ap_crossing_times[vid][0]
            if is_ap_trip:
                with open('%s/airport-trip-%s.csv' % (airport_trips_dir, yymm),
                          'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    ap_queue_time = st - ap_join_queue_time
                    new_row = [
                        tid, vid, did, st, et, duration, fare, prev_tet, ap_tm,
                        ap_join_queue_time, ap_queue_time
                    ]
                    writer.writerow(new_row)
            #
            if ns_tm == DInNS_PInNS:
                is_ns_trip = True
                ns_join_queue_time = prev_tet
            elif ns_tm == DOutNS_PInNS:
                is_ns_trip = True
                try:
                    i = bisect(ns_crossing_times[vid], st)
                except KeyError:
                    logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']]))
                    continue
                ns_join_queue_time = ns_crossing_times[vid][
                    i - 1] if i != 0 else ns_crossing_times[vid][0]
            if is_ns_trip:
                with open(
                        '%s/nightsafari-trip-%s.csv' %
                    (nightsafari_trips_dir, yymm), 'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    ns_queue_time = st - ns_join_queue_time
                    new_row = [
                        tid, vid, did, st, et, duration, fare, prev_tet, ns_tm,
                        ns_join_queue_time, ns_queue_time
                    ]
                    writer.writerow(new_row)
    print 'end the file; %s' % yymm
    logging_msg('end the file; %s' % yymm)
Beispiel #25
0
def run():
    check_dir_create(summary_dir)
    #
    cur_timestamp = datetime.datetime(2008, 12, 31, 23)
    last_timestamp = datetime.datetime(2011, 1, 1, 0)
    hp_summary, time_period_order = {}, []
    while cur_timestamp < last_timestamp:
        cur_timestamp += datetime.timedelta(hours=1)
        yyyy, mm, dd, hh = cur_timestamp.year, cur_timestamp.month, cur_timestamp.day, cur_timestamp.hour
        if yyyy == 2009 and mm == 12: continue
        if yyyy == 2010 and mm == 10: continue
        k = (str(yyyy - 2000), str(mm), str(dd), str(hh))
        hp_summary[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE, \
                                                AP_DUR, AP_FARE, AP_QUEUE, \
                                                NS_DUR, NS_FARE, NS_QUEUE]))]
        time_period_order.append(k)
        #
    yy_l, mm_l, dd_l, hh_l = 'yy', 'mm', 'dd', 'hh'
    # General
    for fn in get_all_files(general_dur_fare_dir, general_dur_fare_prefix,
                            '.csv'):
        print fn
        with open('%s/%s' % (general_dur_fare_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h: i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[
                    hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][GEN_DUR] += eval(row[hid['gen-duration']])
                hp_summary[k][GEN_FARE] += eval(row[hid['gen-fare']])
    # Aiport
    for fn in get_all_files(ap_dur_fare_q_time_dir, ap_dur_fare_q_time_prefix,
                            '.csv'):
        print fn
        with open('%s/%s' % (ap_dur_fare_q_time_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h: i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[
                    hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][AP_DUR] += eval(row[hid['ap-duration']])
                hp_summary[k][AP_FARE] += eval(row[hid['ap-fare']])
                hp_summary[k][AP_QUEUE] += eval(row[hid['ap-queue-time']])
    # Night Safari
    for fn in get_all_files(ns_dur_fare_q_time_dir, ns_dur_fare_q_time_prefix,
                            '.csv'):
        print fn
        with open('%s/%s' % (ns_dur_fare_q_time_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h: i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[
                    hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][NS_DUR] += eval(row[hid['ns-duration']])
                hp_summary[k][NS_FARE] += eval(row[hid['ns-fare']])
                hp_summary[k][NS_QUEUE] += eval(row[hid['ns-queue-time']])
    # Summary
    print 'summary'
    zero_dur = []
    with open(hourly_productivities, 'wt') as w_csvfile:
        writer = csv.writer(w_csvfile)
        header = [
            'yy', 'mm', 'dd', 'hh', 'gen-duration', 'gen-fare', 'ap-duration',
            'ap-fare', 'ap-queue-time', 'ns-duration', 'ns-fare',
            'ns-queue-time', 'gen-productivity', 'ap-productivity',
            'ap-out-productivity', 'ns-productivity', 'ns-out-productivity'
        ]
        writer.writerow(header)
        for k in time_period_order:
            gen_dur, gen_fare, \
            ap_dur, ap_fare, ap_queue, \
            ns_dur, ns_fare, ns_queue = hp_summary[k]
            yy, mm, dd, hh = k
            #
            try:
                gen_prod = gen_fare / gen_dur
            except ZeroDivisionError:
                gen_prod = -1
                zero_dur.append([GENERAL, k])
            try:
                ap_prod = ap_fare / (ap_dur + ap_queue)
            except ZeroDivisionError:
                ap_prod = -1
                zero_dur.append([AIRPORT, k])
            ap_out_prod = (gen_fare - ap_fare) / (gen_dur -
                                                  (ap_dur + ap_queue))
            try:
                ns_prod = ns_fare / (ns_dur + ns_queue)
            except ZeroDivisionError:
                ns_prod = -1
                zero_dur.append([NIGHTSAFARI, k])
            ns_out_prod = (gen_fare - ns_fare) / (gen_dur -
                                                  (ns_dur + ns_queue))
            #
            writer.writerow([
                yy, mm, dd, hh, gen_dur, gen_fare, ap_dur, ap_fare, ap_queue,
                ns_dur, ns_fare, ns_queue, gen_prod, ap_prod, ap_out_prod,
                ns_prod, ns_out_prod
            ])
    #
    save_pickle_file(zero_duration_time_slots, zero_dur)
def run():
    check_dir_create(summary_dir)
    #
    cur_timestamp = datetime.datetime(2008, 12, 31, 23) 
    last_timestamp = datetime.datetime(2011, 1, 1, 0)
    hp_summary, time_period_order = {}, []
    while cur_timestamp < last_timestamp:
        cur_timestamp += datetime.timedelta(hours=1)
        yyyy, mm, dd, hh = cur_timestamp.year, cur_timestamp.month, cur_timestamp.day, cur_timestamp.hour
        if yyyy == 2009 and mm == 12: continue
        if yyyy == 2010 and mm == 10: continue
        k = (str(yyyy - 2000), str(mm), str(dd), str(hh))
        hp_summary[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE, \
                                                AP_DUR, AP_FARE, AP_QUEUE, \
                                                NS_DUR, NS_FARE, NS_QUEUE]))]
        time_period_order.append(k)
        #
    yy_l, mm_l, dd_l, hh_l = 'yy', 'mm', 'dd', 'hh'
    # General
    for fn in get_all_files(general_dur_fare_dir, general_dur_fare_prefix, '.csv'):
        print fn 
        with open('%s/%s' % (general_dur_fare_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h : i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][GEN_DUR] += eval(row[hid['gen-duration']])
                hp_summary[k][GEN_FARE] += eval(row[hid['gen-fare']])
    # Aiport
    for fn in get_all_files(ap_dur_fare_q_time_dir, ap_dur_fare_q_time_prefix, '.csv'):
        print fn
        with open('%s/%s' % (ap_dur_fare_q_time_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h : i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][AP_DUR] += eval(row[hid['ap-duration']])
                hp_summary[k][AP_FARE] += eval(row[hid['ap-fare']])
                hp_summary[k][AP_QUEUE] += eval(row[hid['ap-queue-time']])
    # Night Safari
    for fn in get_all_files(ns_dur_fare_q_time_dir, ns_dur_fare_q_time_prefix, '.csv'):
        print fn
        with open('%s/%s' % (ns_dur_fare_q_time_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h : i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][NS_DUR] += eval(row[hid['ns-duration']])
                hp_summary[k][NS_FARE] += eval(row[hid['ns-fare']])
                hp_summary[k][NS_QUEUE] += eval(row[hid['ns-queue-time']])
    # Summary
    print 'summary'
    zero_dur = []
    with open(hourly_productivities, 'wt') as w_csvfile:
        writer = csv.writer(w_csvfile)
        header = ['yy', 'mm', 'dd', 'hh',
                    'gen-duration', 'gen-fare',
                    'ap-duration', 'ap-fare', 'ap-queue-time',
                    'ns-duration', 'ns-fare', 'ns-queue-time',
                    'gen-productivity',
                    'ap-productivity', 'ap-out-productivity',
                    'ns-productivity', 'ns-out-productivity']
        writer.writerow(header)
        for k in time_period_order:
            gen_dur, gen_fare, \
            ap_dur, ap_fare, ap_queue, \
            ns_dur, ns_fare, ns_queue = hp_summary[k]
            yy, mm, dd, hh = k
            #
            try:
                gen_prod = gen_fare / gen_dur
            except ZeroDivisionError:
                gen_prod = -1
                zero_dur.append([GENERAL, k])
            try:
                ap_prod = ap_fare / (ap_dur + ap_queue)
            except ZeroDivisionError:
                ap_prod = -1
                zero_dur.append([AIRPORT, k])
            ap_out_prod = (gen_fare - ap_fare) / (gen_dur - (ap_dur + ap_queue))
            try:
                ns_prod = ns_fare / (ns_dur + ns_queue)
            except ZeroDivisionError:
                ns_prod = -1
                zero_dur.append([NIGHTSAFARI, k])
            ns_out_prod = (gen_fare - ns_fare) / (gen_dur - (ns_dur + ns_queue))
            #
            writer.writerow([yy, mm, dd, hh,
                            gen_dur, gen_fare,
                            ap_dur, ap_fare, ap_queue,
                            ns_dur, ns_fare, ns_queue,
                            gen_prod,
                            ap_prod, ap_out_prod,
                            ns_prod, ns_out_prod])
    #
    save_pickle_file(zero_duration_time_slots, zero_dur)    
def process_file(fn):
    _, _, yymm = fn[:-len('.csv')].split('-')
    print 'handle the file; %s' % yymm 
    logging_msg('handle the file; %s' % yymm)
    #
    ap_pkl_files = get_all_files(logs_dir, 'ap-crossing-time-', '.pkl')
    ap_pkl_file_path = None
    for pkl_fn in ap_pkl_files:
        _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-')
        if pkl_yymm == yymm:
            ap_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn)
            break
    else:
        assert False, yymm
    ap_crossing_times = load_picle_file(ap_pkl_file_path)
    #
    ns_pkl_files = get_all_files(logs_dir, 'ns-crossing-time-', '.pkl')
    ns_pkl_file_path = None
    for pkl_fn in ns_pkl_files:
        _, _, _, pkl_yymm = pkl_fn[:-len('.pkl')].split('-')
        if pkl_yymm == yymm:
            ns_pkl_file_path = '%s/%s' % (logs_dir, pkl_fn)
            break
    else:
        assert False, yymm
    ns_crossing_times = load_picle_file(ns_pkl_file_path)
    #
    init_csv_files(yymm)
    
    with open('%s/%s' % (trips_dir, fn), 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        header_id = {h : i for i, h in enumerate(headers)}
        for row in reader:
            tid, did = row[header_id['tid']], row[header_id['did']]
            et, duration = row[header_id['end-time']], row[header_id['duration']]
            fare = row[header_id['fare']]
            #
            ap_tm, ns_tm = int(row[header_id['ap-trip-mode']]), int(row[header_id['ns-trip-mode']]) 
            vid, st, prev_tet = row[header_id['vid']], eval(row[header_id['start-time']]), eval(row[header_id['prev-trip-end-time']])
            #
            is_ap_trip, is_ns_trip = False, False 
            #
            if ap_tm == DInAP_PInAP:
                is_ap_trip = True
                ap_join_queue_time = prev_tet
            elif ap_tm == DOutAP_PInAP:
                is_ap_trip = True
                try:
                    i = bisect(ap_crossing_times[vid], st)
                except KeyError:
                    logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']]))
                    continue
                ap_join_queue_time = ap_crossing_times[vid][i - 1] if i != 0 else ap_crossing_times[vid][0]
            if is_ap_trip:
                with open('%s/airport-trip-%s.csv' % (airport_trips_dir, yymm), 'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    ap_queue_time = st - ap_join_queue_time
                    new_row = [tid, vid, did, st, et, duration, fare, prev_tet,
                                ap_tm, ap_join_queue_time, ap_queue_time]
                    writer.writerow(new_row)
            #
            if ns_tm == DInNS_PInNS:
                is_ns_trip = True
                ns_join_queue_time = prev_tet
            elif ns_tm == DOutNS_PInNS:
                is_ns_trip = True
                try:
                    i = bisect(ns_crossing_times[vid], st)
                except KeyError:
                    logging_msg('%s-tid-%s' % (yymm, row[header_id['tid']]))
                    continue
                ns_join_queue_time = ns_crossing_times[vid][i - 1] if i != 0 else ns_crossing_times[vid][0]
            if is_ns_trip:
                with open('%s/nightsafari-trip-%s.csv' % (nightsafari_trips_dir, yymm), 'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    ns_queue_time = st - ns_join_queue_time
                    new_row = [tid, vid, did, st, et, duration, fare, prev_tet,
                                ns_tm, ns_join_queue_time, ns_queue_time]
                    writer.writerow(new_row)        
    print 'end the file; %s' % yymm 
    logging_msg('end the file; %s' % yymm)
from __future__ import division
# Add the root path for packages I made
import os, sys  
sys.path.append(os.getcwd() + '/..')
#
from supports._setting import for_learning_dir
from supports.etc_functions import get_all_files
#
for i in xrange(11):
    for j in xrange(11):
        ALPHA, GAMMA = i / 10, j / 10 
        dn = for_learning_dir + '/ALPHA-%.2f-GAMMA-%.2f' % (ALPHA, GAMMA)
        print ALPHA, GAMMA, 
        if not os.path.exists(dn):
            print 'None'
            continue
        print len(get_all_files(dn, 'ALPHA-', '.pkl')), 
        print len(get_all_files(dn, 'results-', '.pkl'))