def run():
    check_dir_create(summary_dir)
    #
    Y09 = pd.read_csv("%s/%s" % (airport_trips_dir, "Y09-airport-trip.csv"))
    Y10 = pd.read_csv("%s/%s" % (airport_trips_dir, "Y10-airport-trip.csv"))
    Y09 = Y09[(Y09["did"] != -1)]
    Y10 = Y10[(Y10["did"] != -1)]
    #
    Y09_mm_did_gb, Y10_mm_did_gb = Y09.groupby(["mm", "did"]), Y10.groupby(["mm", "did"])
    Y09_ap_fares = [x / CENT for x in list(Y09_mm_did_gb.sum()["fare"])]
    Y10_ap_fares = [x / CENT for x in list(Y10_mm_did_gb.sum()["fare"])]
    #
    save_pickle_file(driver_monthly_fare_ap_trips, [Y09_ap_fares, Y10_ap_fares])
def run():
    check_dir_create(summary_dir)
    #
    Y09 = pd.read_csv('%s/%s' % (airport_trips_dir, 'Y09-airport-trip.csv'))
    Y10 = pd.read_csv('%s/%s' % (airport_trips_dir, 'Y10-airport-trip.csv'))
    Y09 = Y09[(Y09['did'] != -1)]
    Y10 = Y10[(Y10['did'] != -1)]
    #
    Y09_mm_did_gb, Y10_mm_did_gb = Y09.groupby(['mm', 'did'
                                                ]), Y10.groupby(['mm', 'did'])
    Y09_ap_fares = [x / CENT for x in list(Y09_mm_did_gb.sum()['fare'])]
    Y10_ap_fares = [x / CENT for x in list(Y10_mm_did_gb.sum()['fare'])]
    #
    save_pickle_file(driver_monthly_fare_ap_trips,
                     [Y09_ap_fares, Y10_ap_fares])
def run():
    check_dir_create(summary_dir)
    #
    Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare = [], []
    for y in xrange(9, 11):
        for m in xrange(1, 13):
            yymm = "%02d%02d" % (y, m)
            if yymm in ["0912", "1010"]:
                continue
            trip_df = pd.read_csv("%s/whole-trip-%s.csv" % (trips_dir, yymm))

            trip_df = trip_df[(trip_df["did"] != -1)]
            fares = [x / CENT for x in list(trip_df.groupby(["did"]).sum()["fare"])]
            if y == 9:
                Y09_driver_total_monthly_fare += fares
            else:
                Y10_driver_total_monthly_fare += fares
    save_pickle_file(monthly_fare_summary, [Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare])
Exemple #4
0
def run():
    check_dir_create(tables_dir)
    write_text_file(ftd_overall_analysis, 'Init', True)
    #
    # Load data
    #
    y09_gen_m_prod, y10_gen_m_prod = load_picle_file(ftd_general_prod_mb)
    (y09_piap_m_prod, y09_piap_m_eco_prof), \
    (y10_piap_m_prod, y10_piap_m_eco_prof), \
    (y09_poap_m_prod, y09_poap_m_eco_prof), \
    (y10_poap_m_prod, y10_poap_m_eco_prof) = load_picle_file(ftd_ap_prod_eco_prof_mb)
    (y09_pins_m_prod, y09_pins_m_eco_prof), \
    (y10_pins_m_prod, y10_pins_m_eco_prof), \
    (y09_pons_m_prod, y09_pons_m_eco_prof), \
    (y10_pons_m_prod, y10_pons_m_eco_prof) = load_picle_file(ftd_ns_prod_eco_prof_mb)
    #
    my_d = {
        # (Measure, Year): data
        ('General prod.', 'Y09'): y09_gen_m_prod,
        ('General prod.', 'Y10'): y10_gen_m_prod,
        #
        ('AP in prod.', 'Y09'): y09_piap_m_prod,
        ('AP in prod.', 'Y10'): y10_piap_m_prod,
        ('AP out prod.', 'Y09'): y09_poap_m_prod,
        ('AP out prod.', 'Y10'): y10_poap_m_prod,
        ('AP in eco.', 'Y09'): y09_piap_m_eco_prof,
        ('AP in eco.', 'Y10'): y10_piap_m_eco_prof,
        ('AP out eco.', 'Y09'): y09_poap_m_eco_prof,
        ('AP out eco.', 'Y10'): y10_poap_m_eco_prof,
        #
        ('NS in prod.', 'Y09'): y09_pins_m_prod,
        ('NS in prod.', 'Y10'): y10_pins_m_prod,
        ('NS out prod.', 'Y09'): y09_pons_m_prod,
        ('NS out prod.', 'Y10'): y10_pons_m_prod,
        ('NS in eco.', 'Y09'): y09_pins_m_eco_prof,
        ('NS in eco.', 'Y10'): y10_pins_m_eco_prof,
        ('NS out eco.', 'Y09'): y09_pons_m_eco_prof,
        ('NS out eco.', 'Y10'): y10_pons_m_eco_prof,
    }
    #
    #     both_year_comparison(my_d)
    draw_month_change(my_d)
def run():
    check_dir_create(tables_dir)
    write_text_file(ftd_overall_analysis, 'Init', True)
    #
    # Load data
    #
    y09_gen_m_prod, y10_gen_m_prod = load_picle_file(ftd_general_prod_mb)
    (y09_piap_m_prod, y09_piap_m_eco_prof), \
    (y10_piap_m_prod, y10_piap_m_eco_prof), \
    (y09_poap_m_prod, y09_poap_m_eco_prof), \
    (y10_poap_m_prod, y10_poap_m_eco_prof) = load_picle_file(ftd_ap_prod_eco_prof_mb)
    (y09_pins_m_prod, y09_pins_m_eco_prof), \
    (y10_pins_m_prod, y10_pins_m_eco_prof), \
    (y09_pons_m_prod, y09_pons_m_eco_prof), \
    (y10_pons_m_prod, y10_pons_m_eco_prof) = load_picle_file(ftd_ns_prod_eco_prof_mb)
    #
    my_d = {
             # (Measure, Year): data
             ('General prod.', 'Y09'): y09_gen_m_prod,
             ('General prod.', 'Y10'): y10_gen_m_prod,
             #
             ('AP in prod.'  , 'Y09'): y09_piap_m_prod,
             ('AP in prod.'  , 'Y10'): y10_piap_m_prod,
             ('AP out prod.' , 'Y09'): y09_poap_m_prod,
             ('AP out prod.' , 'Y10'): y10_poap_m_prod,
             ('AP in eco.'   , 'Y09'): y09_piap_m_eco_prof,
             ('AP in eco.'   , 'Y10'): y10_piap_m_eco_prof,
             ('AP out eco.'  , 'Y09'): y09_poap_m_eco_prof,
             ('AP out eco.'  , 'Y10'): y10_poap_m_eco_prof,
             #
             ('NS in prod.'  , 'Y09'): y09_pins_m_prod,
             ('NS in prod.'  , 'Y10'): y10_pins_m_prod,
             ('NS out prod.' , 'Y09'): y09_pons_m_prod,
             ('NS out prod.' , 'Y10'): y10_pons_m_prod,
             ('NS in eco.'   , 'Y09'): y09_pins_m_eco_prof,
             ('NS in eco.'   , 'Y10'): y10_pins_m_eco_prof,
             ('NS out eco.'  , 'Y09'): y09_pons_m_eco_prof,
             ('NS out eco.'  , 'Y10'): y10_pons_m_eco_prof,
             }
    #
#     both_year_comparison(my_d)
    draw_month_change(my_d)
def run():
    check_dir_create(summary_dir)
    #
    Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare = [], []
    for y in xrange(9, 11):
        for m in xrange(1, 13):
            yymm = '%02d%02d' % (y, m)
            if yymm in ['0912', '1010']:
                continue
            trip_df = pd.read_csv('%s/whole-trip-%s.csv' % (trips_dir, yymm))

            trip_df = trip_df[(trip_df['did'] != -1)]
            fares = [
                x / CENT for x in list(trip_df.groupby(['did']).sum()['fare'])
            ]
            if y == 9:
                Y09_driver_total_monthly_fare += fares
            else:
                Y10_driver_total_monthly_fare += fares
    save_pickle_file(
        monthly_fare_summary,
        [Y09_driver_total_monthly_fare, Y10_driver_total_monthly_fare])
Exemple #7
0
def run():
    check_dir_create(summary_dir)
    #
    cur_timestamp = datetime.datetime(2008, 12, 31, 23)
    last_timestamp = datetime.datetime(2011, 1, 1, 0)
    hp_summary, time_period_order = {}, []
    while cur_timestamp < last_timestamp:
        cur_timestamp += datetime.timedelta(hours=1)
        yyyy, mm, dd, hh = cur_timestamp.year, cur_timestamp.month, cur_timestamp.day, cur_timestamp.hour
        if yyyy == 2009 and mm == 12: continue
        if yyyy == 2010 and mm == 10: continue
        k = (str(yyyy - 2000), str(mm), str(dd), str(hh))
        hp_summary[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE, \
                                                AP_DUR, AP_FARE, AP_QUEUE, \
                                                NS_DUR, NS_FARE, NS_QUEUE]))]
        time_period_order.append(k)
        #
    yy_l, mm_l, dd_l, hh_l = 'yy', 'mm', 'dd', 'hh'
    # General
    for fn in get_all_files(general_dur_fare_dir, general_dur_fare_prefix,
                            '.csv'):
        print fn
        with open('%s/%s' % (general_dur_fare_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h: i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[
                    hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][GEN_DUR] += eval(row[hid['gen-duration']])
                hp_summary[k][GEN_FARE] += eval(row[hid['gen-fare']])
    # Aiport
    for fn in get_all_files(ap_dur_fare_q_time_dir, ap_dur_fare_q_time_prefix,
                            '.csv'):
        print fn
        with open('%s/%s' % (ap_dur_fare_q_time_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h: i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[
                    hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][AP_DUR] += eval(row[hid['ap-duration']])
                hp_summary[k][AP_FARE] += eval(row[hid['ap-fare']])
                hp_summary[k][AP_QUEUE] += eval(row[hid['ap-queue-time']])
    # Night Safari
    for fn in get_all_files(ns_dur_fare_q_time_dir, ns_dur_fare_q_time_prefix,
                            '.csv'):
        print fn
        with open('%s/%s' % (ns_dur_fare_q_time_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h: i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[
                    hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][NS_DUR] += eval(row[hid['ns-duration']])
                hp_summary[k][NS_FARE] += eval(row[hid['ns-fare']])
                hp_summary[k][NS_QUEUE] += eval(row[hid['ns-queue-time']])
    # Summary
    print 'summary'
    zero_dur = []
    with open(hourly_productivities, 'wt') as w_csvfile:
        writer = csv.writer(w_csvfile)
        header = [
            'yy', 'mm', 'dd', 'hh', 'gen-duration', 'gen-fare', 'ap-duration',
            'ap-fare', 'ap-queue-time', 'ns-duration', 'ns-fare',
            'ns-queue-time', 'gen-productivity', 'ap-productivity',
            'ap-out-productivity', 'ns-productivity', 'ns-out-productivity'
        ]
        writer.writerow(header)
        for k in time_period_order:
            gen_dur, gen_fare, \
            ap_dur, ap_fare, ap_queue, \
            ns_dur, ns_fare, ns_queue = hp_summary[k]
            yy, mm, dd, hh = k
            #
            try:
                gen_prod = gen_fare / gen_dur
            except ZeroDivisionError:
                gen_prod = -1
                zero_dur.append([GENERAL, k])
            try:
                ap_prod = ap_fare / (ap_dur + ap_queue)
            except ZeroDivisionError:
                ap_prod = -1
                zero_dur.append([AIRPORT, k])
            ap_out_prod = (gen_fare - ap_fare) / (gen_dur -
                                                  (ap_dur + ap_queue))
            try:
                ns_prod = ns_fare / (ns_dur + ns_queue)
            except ZeroDivisionError:
                ns_prod = -1
                zero_dur.append([NIGHTSAFARI, k])
            ns_out_prod = (gen_fare - ns_fare) / (gen_dur -
                                                  (ns_dur + ns_queue))
            #
            writer.writerow([
                yy, mm, dd, hh, gen_dur, gen_fare, ap_dur, ap_fare, ap_queue,
                ns_dur, ns_fare, ns_queue, gen_prod, ap_prod, ap_out_prod,
                ns_prod, ns_out_prod
            ])
    #
    save_pickle_file(zero_duration_time_slots, zero_dur)
Exemple #8
0
def run():
    def difference(data0, data1):
        diff = {}
        for k, v in data0.iteritems():
            diff[k] = data1[k] - v
        return diff

    check_dir_create(tables_dir)
    write_text_file(ftd_group_analysis, 'Init', True)
    #
    # Load data_20160826
    #
    y09_gen_d_prod_for_ap, y10_gen_d_prod_for_ap = load_picle_file(
        ftd_general_prod_db_for_ap)
    y09_gen_d_prod_for_ns, y10_gen_d_prod_for_ns = load_picle_file(
        ftd_general_prod_db_for_ns)
    (y09_piap_d_prod, y09_piap_d_eco_prof), \
    (y10_piap_d_prod, y10_piap_d_eco_prof), \
    (y09_poap_d_prod, y09_poap_d_eco_prof), \
    (y10_poap_d_prod, y10_poap_d_eco_prof) = load_picle_file(ftd_ap_prod_eco_prof_db)
    (y09_pins_d_prod, y09_pins_d_eco_prof), \
    (y10_pins_d_prod, y10_pins_d_eco_prof), \
    (y09_pons_d_prod, y09_pons_d_eco_prof), \
    (y10_pons_d_prod, y10_pons_d_eco_prof) = load_picle_file(ftd_ns_prod_eco_prof_db)
    #
    diffs_for_ap = {
        'diff_g_prod_for_ap':
        difference(y09_gen_d_prod_for_ap, y10_gen_d_prod_for_ap),
        #
        'diff_apin_prod':
        difference(y09_piap_d_prod, y10_piap_d_prod),
        'diff_apout_prod':
        difference(y09_poap_d_prod, y10_poap_d_prod),
        'diff_apin_eco':
        difference(y09_piap_d_eco_prof, y10_piap_d_eco_prof),
        'diff_apout_eco':
        difference(y09_poap_d_eco_prof, y10_poap_d_eco_prof),
    }

    diffs_for_ns = {
        'diff_g_prod_for_ns':
        difference(y09_gen_d_prod_for_ns, y10_gen_d_prod_for_ns),
        #
        'diff_nsin_prod':
        difference(y09_pins_d_prod, y10_pins_d_prod),
        'diff_nsout_prod':
        difference(y09_pons_d_prod, y10_pons_d_prod),
        'diff_nsin_eco':
        difference(y09_pins_d_eco_prof, y10_pins_d_eco_prof),
        'diff_nsout_eco':
        difference(y09_pons_d_eco_prof, y10_pons_d_eco_prof)
    }
    #
    write_text_file(
        ftd_group_analysis,
        'Airport analysis ---------------------------------------------------')
    an_analysis(diffs_for_ap)
    write_text_file(
        ftd_group_analysis,
        '\nNight safari analysis ---------------------------------------------------'
    )
    ns_analysis(diffs_for_ns)
def run():
    check_dir_create(summary_dir)
    #
    cur_timestamp = datetime.datetime(2008, 12, 31, 23) 
    last_timestamp = datetime.datetime(2011, 1, 1, 0)
    hp_summary, time_period_order = {}, []
    while cur_timestamp < last_timestamp:
        cur_timestamp += datetime.timedelta(hours=1)
        yyyy, mm, dd, hh = cur_timestamp.year, cur_timestamp.month, cur_timestamp.day, cur_timestamp.hour
        if yyyy == 2009 and mm == 12: continue
        if yyyy == 2010 and mm == 10: continue
        k = (str(yyyy - 2000), str(mm), str(dd), str(hh))
        hp_summary[k] = [0 for _ in range(len([GEN_DUR, GEN_FARE, \
                                                AP_DUR, AP_FARE, AP_QUEUE, \
                                                NS_DUR, NS_FARE, NS_QUEUE]))]
        time_period_order.append(k)
        #
    yy_l, mm_l, dd_l, hh_l = 'yy', 'mm', 'dd', 'hh'
    # General
    for fn in get_all_files(general_dur_fare_dir, general_dur_fare_prefix, '.csv'):
        print fn 
        with open('%s/%s' % (general_dur_fare_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h : i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][GEN_DUR] += eval(row[hid['gen-duration']])
                hp_summary[k][GEN_FARE] += eval(row[hid['gen-fare']])
    # Aiport
    for fn in get_all_files(ap_dur_fare_q_time_dir, ap_dur_fare_q_time_prefix, '.csv'):
        print fn
        with open('%s/%s' % (ap_dur_fare_q_time_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h : i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][AP_DUR] += eval(row[hid['ap-duration']])
                hp_summary[k][AP_FARE] += eval(row[hid['ap-fare']])
                hp_summary[k][AP_QUEUE] += eval(row[hid['ap-queue-time']])
    # Night Safari
    for fn in get_all_files(ns_dur_fare_q_time_dir, ns_dur_fare_q_time_prefix, '.csv'):
        print fn
        with open('%s/%s' % (ns_dur_fare_q_time_dir, fn), 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
            hid = {h : i for i, h in enumerate(headers)}
            for row in reader:
                yy, mm, dd, hh = row[hid[yy_l]], row[hid[mm_l]], row[hid[dd_l]], row[hid[hh_l]]
                k = (yy, mm, dd, hh)
                if not hp_summary.has_key(k): continue
                hp_summary[k][NS_DUR] += eval(row[hid['ns-duration']])
                hp_summary[k][NS_FARE] += eval(row[hid['ns-fare']])
                hp_summary[k][NS_QUEUE] += eval(row[hid['ns-queue-time']])
    # Summary
    print 'summary'
    zero_dur = []
    with open(hourly_productivities, 'wt') as w_csvfile:
        writer = csv.writer(w_csvfile)
        header = ['yy', 'mm', 'dd', 'hh',
                    'gen-duration', 'gen-fare',
                    'ap-duration', 'ap-fare', 'ap-queue-time',
                    'ns-duration', 'ns-fare', 'ns-queue-time',
                    'gen-productivity',
                    'ap-productivity', 'ap-out-productivity',
                    'ns-productivity', 'ns-out-productivity']
        writer.writerow(header)
        for k in time_period_order:
            gen_dur, gen_fare, \
            ap_dur, ap_fare, ap_queue, \
            ns_dur, ns_fare, ns_queue = hp_summary[k]
            yy, mm, dd, hh = k
            #
            try:
                gen_prod = gen_fare / gen_dur
            except ZeroDivisionError:
                gen_prod = -1
                zero_dur.append([GENERAL, k])
            try:
                ap_prod = ap_fare / (ap_dur + ap_queue)
            except ZeroDivisionError:
                ap_prod = -1
                zero_dur.append([AIRPORT, k])
            ap_out_prod = (gen_fare - ap_fare) / (gen_dur - (ap_dur + ap_queue))
            try:
                ns_prod = ns_fare / (ns_dur + ns_queue)
            except ZeroDivisionError:
                ns_prod = -1
                zero_dur.append([NIGHTSAFARI, k])
            ns_out_prod = (gen_fare - ns_fare) / (gen_dur - (ns_dur + ns_queue))
            #
            writer.writerow([yy, mm, dd, hh,
                            gen_dur, gen_fare,
                            ap_dur, ap_fare, ap_queue,
                            ns_dur, ns_fare, ns_queue,
                            gen_prod,
                            ap_prod, ap_out_prod,
                            ns_prod, ns_out_prod])
    #
    save_pickle_file(zero_duration_time_slots, zero_dur)