コード例 #1
0
def run():
    for stat_both_fn, monthly_stats_fn in [
        (ftd_stat_ap_both_fn, ftd_monthly_stats_ap_fn),
        (ftd_stat_ns_both_fn, ftd_monthly_stats_ns_fn)
    ]:
        remove_file(monthly_stats_fn)
        #
        df = pd.read_csv(stat_both_fn)
        # remove outlier
        column_names = df.columns.values[len(['yy', 'mm', 'did']):]
        for cn in column_names:
            df = df[((df[cn] - df[cn].mean()) / float(df[cn].std())).abs() < 3]
        df_gb = df.groupby(['yy', 'mm'])
        _data = [['yy', 'mm', 'num-drivers']]
        for v in df_gb.count().reset_index()[['yy', 'mm', 'did']].values:
            _data.append(list(v))
        for cn in column_names:
            _data[0] += ['%s-mean' % cn, '%s-sum' % cn, '%s-std' % cn]
            for i, x in enumerate(
                    zip(df_gb.mean()[cn].values,
                        df_gb.sum()[cn].values,
                        df_gb.std()[cn].values)):
                _data[i + 1] += x

        with open(monthly_stats_fn, 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            for row in _data:
                writer.writerow(row)
コード例 #2
0
def run():
    for _, _, Y09, Y10 in _package:
        remove_file(Y09)
        remove_file(Y10)
    #
    for y in xrange(9, 11):
        for m in xrange(1, 13):
            yymm = '%02d%02d' % (y, m) 
            if yymm in ['0912', '1010']:
                continue
            process_files(yymm)
コード例 #3
0
def run():
    for _, _, Y09, Y10 in _package:
        remove_file(Y09)
        remove_file(Y10)
    #
    for y in xrange(9, 11):
        for m in xrange(1, 13):
            yymm = '%02d%02d' % (y, m)
            if yymm in ['0912', '1010']:
                continue
            process_files(yymm)
コード例 #4
0
def run():
    for Y09_stat_fn, Y10_stat_fn, driver_stats_fn in [
        (ftd_Y09_stat_ap_fn, ftd_Y10_stat_ap_fn, ftd_driver_stats_ap_fn),
        (ftd_Y09_stat_ns_fn, ftd_Y10_stat_ns_fn, ftd_driver_stats_ns_fn)
    ]:
        remove_file(driver_stats_fn)
        Y09_df, Y10_df = pd.read_csv(Y09_stat_fn), pd.read_csv(Y10_stat_fn)
        # remove outlier
        column_names = Y09_df.columns.values[len(['yy', 'mm', 'did']):]
        for cn in column_names:
            Y09_df = Y09_df[((Y09_df[cn] - Y09_df[cn].mean()) /
                             float(Y09_df[cn].std())).abs() < 3]
            Y10_df = Y10_df[((Y10_df[cn] - Y10_df[cn].mean()) /
                             float(Y10_df[cn].std())).abs() < 3]
        _data = [['did']]
        both_year_ftd = set(Y09_df['did']).intersection(set(Y10_df['did']))
        for cn in column_names:
            new_cn = cn.replace('-', '_')
            _data[0] += [
                'diff_%s' % new_cn,
                'Y10_%s_mean' % new_cn,
                'Y09_%s_mean' % new_cn,
                'Y10_%s_sum' % new_cn,
                'Y09_%s_sum' % new_cn,
                'Y10_%s_std' % new_cn,
                'Y09_%s_std' % new_cn
            ]
        print len(both_year_ftd)
        for i, did in enumerate(both_year_ftd):
            Y09_did_df, Y10_did_df = Y09_df[(Y09_df['did'] == did)], Y10_df[(
                Y10_df['did'] == did)]
            Y09_did_gb, Y10_did_gb = Y09_did_df.groupby(
                ['did']), Y10_did_df.groupby(['did'])
            _data.append([did])
            for j, cn in enumerate(column_names):
                for x in zip(
                        Y10_did_gb.mean()[cn].values -
                        Y09_did_gb.mean()[cn].values,
                        Y10_did_gb.mean()[cn].values,
                        Y09_did_gb.mean()[cn].values,
                        Y10_did_gb.sum()[cn].values,
                        Y09_did_gb.sum()[cn].values,
                        Y10_did_gb.std()[cn].values,
                        Y09_did_gb.std()[cn].values):
                    _data[-1] += x
            if i % 100 == 0:
                print '%d,' % i,
        #
        with open(driver_stats_fn, 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            for row in _data:
                writer.writerow(row)
        print ''
def run():
    for path in [
            ftd_general_prod_mb, ftd_ap_prod_eco_prof_mb,
            ftd_ns_prod_eco_prof_mb
    ]:
        remove_file(path)
    #
    save_pickle_file(ftd_general_prod_mb, general_productivity())
    save_pickle_file(ftd_ap_prod_eco_prof_mb,
                     ap_productivity_economical_profit())
    save_pickle_file(ftd_ns_prod_eco_prof_mb,
                     ns_productivity_economical_profit())
コード例 #6
0
def run():
    for stat_both_fn, monthly_stats_fn in [(ftd_stat_ap_both_fn, ftd_monthly_stats_ap_fn),
                                           (ftd_stat_ns_both_fn, ftd_monthly_stats_ns_fn)]:
        remove_file(monthly_stats_fn)
        #
        df = pd.read_csv(stat_both_fn)
        # remove outlier
        column_names = df.columns.values[len(['yy','mm','did']):]
        for cn in column_names:
            df = df[((df[cn] - df[cn].mean()) / float(df[cn].std())).abs() < 3]
        df_gb = df.groupby(['yy', 'mm'])
        _data = [['yy', 'mm', 'num-drivers']]
        for v in df_gb.count().reset_index()[['yy', 'mm', 'did']].values:
            _data.append(list(v))
        for cn in column_names:
            _data[0] += ['%s-mean' % cn, '%s-sum' % cn, '%s-std' % cn]
            for i, x in enumerate(zip(df_gb.mean()[cn].values, df_gb.sum()[cn].values, df_gb.std()[cn].values)):
                _data[i+1] += x

        with open(monthly_stats_fn, 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            for row in _data:
                writer.writerow(row)
コード例 #7
0
def run():
    for Y09_stat_fn, Y10_stat_fn, driver_stats_fn in [(ftd_Y09_stat_ap_fn, ftd_Y10_stat_ap_fn, ftd_driver_stats_ap_fn), 
                            (ftd_Y09_stat_ns_fn, ftd_Y10_stat_ns_fn, ftd_driver_stats_ns_fn)]:
        remove_file(driver_stats_fn)
        Y09_df, Y10_df = pd.read_csv(Y09_stat_fn), pd.read_csv(Y10_stat_fn)
        # remove outlier
        column_names = Y09_df.columns.values[len(['yy','mm','did']):]
        for cn in column_names:
            Y09_df = Y09_df[((Y09_df[cn] - Y09_df[cn].mean()) / float(Y09_df[cn].std())).abs() < 3]
            Y10_df = Y10_df[((Y10_df[cn] - Y10_df[cn].mean()) / float(Y10_df[cn].std())).abs() < 3]
        _data = [['did']]
        both_year_ftd = set(Y09_df['did']).intersection(set(Y10_df['did']))
        for cn in column_names:
            new_cn = cn.replace('-', '_')
            _data[0] += ['diff_%s' % new_cn,
                         'Y10_%s_mean' % new_cn, 'Y09_%s_mean' % new_cn,
                         'Y10_%s_sum' % new_cn, 'Y09_%s_sum' % new_cn,
                         'Y10_%s_std' % new_cn, 'Y09_%s_std' % new_cn]
        print len(both_year_ftd)
        for i, did in enumerate(both_year_ftd):
            Y09_did_df, Y10_did_df = Y09_df[(Y09_df['did'] == did)], Y10_df[(Y10_df['did'] == did)]
            Y09_did_gb, Y10_did_gb = Y09_did_df.groupby(['did']), Y10_did_df.groupby(['did'])
            _data.append([did])
            for j, cn in enumerate(column_names):
                for x in zip(Y10_did_gb.mean()[cn].values - Y09_did_gb.mean()[cn].values,
                             Y10_did_gb.mean()[cn].values, Y09_did_gb.mean()[cn].values,
                             Y10_did_gb.sum()[cn].values, Y09_did_gb.sum()[cn].values,
                             Y10_did_gb.std()[cn].values, Y09_did_gb.std()[cn].values):
                    _data[-1] += x
            if i % 100 == 0:
                print '%d,' % i,
        #
        with open(driver_stats_fn, 'wt') as w_csvfile:
            writer = csv.writer(w_csvfile)
            for row in _data:
                writer.writerow(row)
        print ''
コード例 #8
0
def run():
    for path in [ftd_gen_prod_db_for_ap, ftd_gen_prod_db_for_ns, ftd_ap_prod_eco_prof_db, ftd_ns_prod_eco_prof_db]:
        remove_file(path)
    #
    ap_productivity_economical_profit()
    ns_productivity_economical_profit()