def run():
    a1_table_dir = tables_dir + '/c_individual_a1 both year t test'
    remove_create_dir(a1_table_dir)
    #
    for label, stat_both_fn, monthly_stat_fn in [('ap', ftd_stat_ap_both_fn, ftd_monthly_stats_ap_fn),
                                                 ('ns', ftd_stat_ns_both_fn, ftd_monthly_stats_ns_fn)]:
        headers = None
        with open(stat_both_fn, 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
        column_names = headers[len(['yy', 'mm', 'did']):]
        Y09Y10_df = pd.read_csv(monthly_stat_fn)
        Y09_df, Y10_df = Y09Y10_df[(Y09Y10_df['yy'] == 9)], Y09Y10_df[(Y09Y10_df['yy'] == 10)]
        #
        _table = PrettyTable(['Measeure', 'Diff.',
                                          'Y2010', 'Y2009', 't-statistic', 'p-value'])
        _table.align['Measeure'] = 'l'
        for l in ['Measeure', 'Diff.', 'Y2010', 'Y2009', 't-statistic', 'p-value']:
            _table.align[l] = 'r'

        for cn in column_names:
            Y10_values, Y09_values = Y10_df[cn + '-mean'], Y09_df[cn + '-mean']
            Y10_mean, Y09_mean = np.mean(Y10_values), np.mean(Y09_values)
            t_stats, p_value = stats.ttest_ind(Y10_values, Y09_values)
            _table.add_row([cn, Y10_mean - Y09_mean,
                            Y10_mean, Y09_mean, t_stats, p_value])
        write_text_file(a1_table_dir + '/t-test-%s.txt' % label, _table.get_string(), is_first=True)
def run():
    a1_table_dir = tables_dir + '/c_individual_a1 both year t test'
    remove_create_dir(a1_table_dir)
    #
    for label, stat_both_fn, monthly_stat_fn in [
        ('ap', ftd_stat_ap_both_fn, ftd_monthly_stats_ap_fn),
        ('ns', ftd_stat_ns_both_fn, ftd_monthly_stats_ns_fn)
    ]:
        headers = None
        with open(stat_both_fn, 'rb') as r_csvfile:
            reader = csv.reader(r_csvfile)
            headers = reader.next()
        column_names = headers[len(['yy', 'mm', 'did']):]
        Y09Y10_df = pd.read_csv(monthly_stat_fn)
        Y09_df, Y10_df = Y09Y10_df[(Y09Y10_df['yy'] == 9)], Y09Y10_df[(
            Y09Y10_df['yy'] == 10)]
        #
        _table = PrettyTable(
            ['Measeure', 'Diff.', 'Y2010', 'Y2009', 't-statistic', 'p-value'])
        _table.align['Measeure'] = 'l'
        for l in [
                'Measeure', 'Diff.', 'Y2010', 'Y2009', 't-statistic', 'p-value'
        ]:
            _table.align[l] = 'r'

        for cn in column_names:
            Y10_values, Y09_values = Y10_df[cn + '-mean'], Y09_df[cn + '-mean']
            Y10_mean, Y09_mean = np.mean(Y10_values), np.mean(Y09_values)
            t_stats, p_value = stats.ttest_ind(Y10_values, Y09_values)
            _table.add_row([
                cn, Y10_mean - Y09_mean, Y10_mean, Y09_mean, t_stats, p_value
            ])
        write_text_file(a1_table_dir + '/t-test-%s.txt' % label,
                        _table.get_string(),
                        is_first=True)
def run():
    remove_create_dir(ap_ep_dir); remove_create_dir(ns_ep_dir)
    #
    init_multiprocessor(3)
    count_num_jobs = 0
    for y in xrange(9, 11):
        for m in xrange(1, 13):
            yymm = '%02d%02d' % (y, m)
            if yymm in ['0912', '1010']:
                continue
            # process_files(yymm)
            put_task(process_files, [yymm])
            count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
def run():
    a4_chart_dir = charts_dir + '/c_individual_a4 monthly queueing time'
    remove_create_dir(a4_chart_dir)
    #
    months = ['0901', '0902', '0903', '0904', '0905', '0906', '0907', '0908', '0909', '0910', '0911',
              '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1011', '1012']
    for label, monthly_stat_fn in [('ap', ftd_monthly_stats_ap_fn),
                                   ('ns', ftd_monthly_stats_ns_fn)]:
        Y09Y10_df = pd.read_csv(monthly_stat_fn)
        yss = []
        productivity_label = ['pin-qu', 'pout-qu']
        for l in productivity_label:
            yss.append((Y09Y10_df['%s-mean' % l] / float(SEC60)).values)
        multiple_line_chart((15, 7.5), '', 'Year and Month', 'Minute', (months, 20), yss, productivity_label,
                            'upper left', a4_chart_dir + '/queuing-time-%s' % label)
def run():
    a2_chart_dir = charts_dir + '/c_individual_a2 monthly productivity'
    remove_create_dir(a2_chart_dir)
    #
    months = ['0901', '0902', '0903', '0904', '0905', '0906', '0907', '0908', '0909', '0910', '0911',
              '1001', '1002', '1003', '1004', '1005', '1006', '1007', '1008', '1009', '1011', '1012']
    for label, monthly_stat_fn in [('ap', ftd_monthly_stats_ap_fn),
                                   ('ns', ftd_monthly_stats_ns_fn)]:
        Y09Y10_df = pd.read_csv(monthly_stat_fn)
        yss = []
        productivity_label = ['gen-prod', 'pin-prod', 'pout-prod']
        for l in productivity_label:
            yss.append((Y09Y10_df['%s-mean' % l] * SEC3600 / float(CENT)).values)
        multiple_line_chart((15, 7.5), '', 'Year and Month', '$S/Hour', (months, 20), yss, productivity_label,
                            'upper left', a2_chart_dir + '/productivity-%s' % label)
def run():
    remove_create_dir(ap_ep_dir)
    remove_create_dir(ns_ep_dir)
    #
    init_multiprocessor(3)
    count_num_jobs = 0
    for y in xrange(9, 11):
        for m in xrange(1, 13):
            yymm = '%02d%02d' % (y, m)
            if yymm in ['0912', '1010']:
                continue
            # process_files(yymm)
            put_task(process_files, [yymm])
            count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
Esempio n. 7
0
def process_files(yymm):
    yymm_logs_dir = logs_dir + '/%s' % yymm
    remove_create_dir(yymm_logs_dir)
    def init_processed_file(h_dt):
        processed_fn = yymm_logs_dir + '/%d%02d%02d-%d.csv' % \
                                  (h_dt.year, h_dt.month, h_dt.day, int(h_dt.hour / float(HOUR1)))
        with open(processed_fn, 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            writer.writerow(['time', 'i', 'j', 'did', 'FREE-dur'])
        return processed_fn

    yy, mm = yymm[:2], yymm[2:]
    fn = '%s/20%s/%s/logs/logs-%s-normal.csv' % (taxi_home, yy, mm, yymm)

    with open(fn, 'rb') as r_csvfile:
        reader = csv.reader(r_csvfile)
        headers = reader.next()
        # {'longitude': 3, 'state': 6, 'vehicle-id': 1, 'time': 0, 'latitude': 4, 'speed': 5, 'driver-id': 2}
        hid = {h: i for i, h in enumerate(headers)}
        h_dt = datetime.datetime(2000 + int(yy), int(mm), 1, 0)
        the_last_slot = h_dt + relativedelta(months=1) - datetime.timedelta(hours=HOUR1)
        last_slot_writing = False
        processed_fn = init_processed_file(h_dt)
        drivers_lts = {} # drivers the last logging time stame
        x_points, y_points = get_sg_grid_xy_points()
        for row in reader:
            did = row[hid['driver-id']]
            if did == '-1':
                continue
            t = eval(row[hid['time']])
            longitude, latitude = eval(row[hid['longitude']]), eval(row[hid['latitude']])
            state = int(row[hid['state']])
            if not drivers_lts.has_key(did): drivers_lts[did] = t
            state_duration = t - drivers_lts[did]
            if state == FREE:
                i, j = bisect(x_points, longitude) - 1, bisect(y_points, latitude) - 1
                cur_dt = datetime.datetime.fromtimestamp(t)
                if h_dt + datetime.timedelta(hours=HOUR1) < cur_dt:
                    processed_fn = init_processed_file(h_dt)
                    h_dt = cur_dt
                if not last_slot_writing and the_last_slot < cur_dt:
                    processed_fn = init_processed_file(cur_dt)
                    last_slot_writing = True
                with open(processed_fn, 'a') as w_csvfile:
                    writer = csv.writer(w_csvfile)
                    writer.writerow([t, i, j, did, state_duration])
            drivers_lts[did] = t
Esempio n. 8
0
def run():
    for dn in [ftd_gen_stat_dir,
               ftd_prev_in_ap_stat_dir, ftd_prev_out_ap_stat_dir, 
               ftd_prev_in_ns_stat_dir, ftd_prev_out_ns_stat_dir]:
        remove_create_dir(dn)
    #
    init_multiprocessor()
    count_num_jobs = 0
    for y in xrange(9, 11):
        for m in xrange(1, 13):
            yymm = '%02d%02d' % (y, m) 
            if yymm in ['0912', '1010']:
                continue
#             process_files(yymm)
            put_task(process_files, [yymm])
            count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
Esempio n. 9
0
def run():
    for path in [ftd_trips_dir, ftd_shift_dir]:
        remove_create_dir(path)
    #
    init_multiprocessor(3)
    count_num_jobs = 0
    for y in xrange(9, 11):
        for m in xrange(1, 13):
            yymm = '%02d%02d' % (y, m)
            if yymm in ['0912', '1010']:
                continue


#             process_files(yymm)
            put_task(process_files, [yymm])
            count_num_jobs += 1
    end_multiprocessor(count_num_jobs)
def run():
    a3_chart_dir = charts_dir + '/c_individual_a3 monthly economic profit'
    remove_create_dir(a3_chart_dir)
    #
    months = [
        '0901', '0902', '0903', '0904', '0905', '0906', '0907', '0908', '0909',
        '0910', '0911', '1001', '1002', '1003', '1004', '1005', '1006', '1007',
        '1008', '1009', '1011', '1012'
    ]
    for label, monthly_stat_fn in [('ap', ftd_monthly_stats_ap_fn),
                                   ('ns', ftd_monthly_stats_ns_fn)]:
        Y09Y10_df = pd.read_csv(monthly_stat_fn)
        yss = []
        productivity_label = ['pin-eco-profit', 'pout-eco-profit']
        for l in productivity_label:
            yss.append((Y09Y10_df['%s-mean' % l] / float(CENT)).values)
        multiple_line_chart((15, 7.5), '', 'Year and Month', '$S',
                            (months, 20), yss, productivity_label,
                            'upper left',
                            a3_chart_dir + '/eco-profits-%s' % label)
Esempio n. 11
0
def run():
    a4_chart_dir = charts_dir + '/c_individual_a4 monthly queueing time'
    remove_create_dir(a4_chart_dir)
    #
    months = [
        '0901', '0902', '0903', '0904', '0905', '0906', '0907', '0908', '0909',
        '0910', '0911', '1001', '1002', '1003', '1004', '1005', '1006', '1007',
        '1008', '1009', '1011', '1012'
    ]
    for label, monthly_stat_fn in [('ap', ftd_monthly_stats_ap_fn),
                                   ('ns', ftd_monthly_stats_ns_fn)]:
        Y09Y10_df = pd.read_csv(monthly_stat_fn)
        yss = []
        productivity_label = ['pin-qu', 'pout-qu']
        for l in productivity_label:
            yss.append((Y09Y10_df['%s-mean' % l] / float(SEC60)).values)
        multiple_line_chart((15, 7.5), '', 'Year and Month', 'Minute',
                            (months, 20), yss, productivity_label,
                            'upper left',
                            a4_chart_dir + '/queuing-time-%s' % label)