def run(): for stat_both_fn, monthly_stats_fn in [ (ftd_stat_ap_both_fn, ftd_monthly_stats_ap_fn), (ftd_stat_ns_both_fn, ftd_monthly_stats_ns_fn) ]: remove_file(monthly_stats_fn) # df = pd.read_csv(stat_both_fn) # remove outlier column_names = df.columns.values[len(['yy', 'mm', 'did']):] for cn in column_names: df = df[((df[cn] - df[cn].mean()) / float(df[cn].std())).abs() < 3] df_gb = df.groupby(['yy', 'mm']) _data = [['yy', 'mm', 'num-drivers']] for v in df_gb.count().reset_index()[['yy', 'mm', 'did']].values: _data.append(list(v)) for cn in column_names: _data[0] += ['%s-mean' % cn, '%s-sum' % cn, '%s-std' % cn] for i, x in enumerate( zip(df_gb.mean()[cn].values, df_gb.sum()[cn].values, df_gb.std()[cn].values)): _data[i + 1] += x with open(monthly_stats_fn, 'wt') as w_csvfile: writer = csv.writer(w_csvfile) for row in _data: writer.writerow(row)
def run(): for _, _, Y09, Y10 in _package: remove_file(Y09) remove_file(Y10) # for y in xrange(9, 11): for m in xrange(1, 13): yymm = '%02d%02d' % (y, m) if yymm in ['0912', '1010']: continue process_files(yymm)
def run(): for Y09_stat_fn, Y10_stat_fn, driver_stats_fn in [ (ftd_Y09_stat_ap_fn, ftd_Y10_stat_ap_fn, ftd_driver_stats_ap_fn), (ftd_Y09_stat_ns_fn, ftd_Y10_stat_ns_fn, ftd_driver_stats_ns_fn) ]: remove_file(driver_stats_fn) Y09_df, Y10_df = pd.read_csv(Y09_stat_fn), pd.read_csv(Y10_stat_fn) # remove outlier column_names = Y09_df.columns.values[len(['yy', 'mm', 'did']):] for cn in column_names: Y09_df = Y09_df[((Y09_df[cn] - Y09_df[cn].mean()) / float(Y09_df[cn].std())).abs() < 3] Y10_df = Y10_df[((Y10_df[cn] - Y10_df[cn].mean()) / float(Y10_df[cn].std())).abs() < 3] _data = [['did']] both_year_ftd = set(Y09_df['did']).intersection(set(Y10_df['did'])) for cn in column_names: new_cn = cn.replace('-', '_') _data[0] += [ 'diff_%s' % new_cn, 'Y10_%s_mean' % new_cn, 'Y09_%s_mean' % new_cn, 'Y10_%s_sum' % new_cn, 'Y09_%s_sum' % new_cn, 'Y10_%s_std' % new_cn, 'Y09_%s_std' % new_cn ] print len(both_year_ftd) for i, did in enumerate(both_year_ftd): Y09_did_df, Y10_did_df = Y09_df[(Y09_df['did'] == did)], Y10_df[( Y10_df['did'] == did)] Y09_did_gb, Y10_did_gb = Y09_did_df.groupby( ['did']), Y10_did_df.groupby(['did']) _data.append([did]) for j, cn in enumerate(column_names): for x in zip( Y10_did_gb.mean()[cn].values - Y09_did_gb.mean()[cn].values, Y10_did_gb.mean()[cn].values, Y09_did_gb.mean()[cn].values, Y10_did_gb.sum()[cn].values, Y09_did_gb.sum()[cn].values, Y10_did_gb.std()[cn].values, Y09_did_gb.std()[cn].values): _data[-1] += x if i % 100 == 0: print '%d,' % i, # with open(driver_stats_fn, 'wt') as w_csvfile: writer = csv.writer(w_csvfile) for row in _data: writer.writerow(row) print ''
def run(): for path in [ ftd_general_prod_mb, ftd_ap_prod_eco_prof_mb, ftd_ns_prod_eco_prof_mb ]: remove_file(path) # save_pickle_file(ftd_general_prod_mb, general_productivity()) save_pickle_file(ftd_ap_prod_eco_prof_mb, ap_productivity_economical_profit()) save_pickle_file(ftd_ns_prod_eco_prof_mb, ns_productivity_economical_profit())
def run(): for stat_both_fn, monthly_stats_fn in [(ftd_stat_ap_both_fn, ftd_monthly_stats_ap_fn), (ftd_stat_ns_both_fn, ftd_monthly_stats_ns_fn)]: remove_file(monthly_stats_fn) # df = pd.read_csv(stat_both_fn) # remove outlier column_names = df.columns.values[len(['yy','mm','did']):] for cn in column_names: df = df[((df[cn] - df[cn].mean()) / float(df[cn].std())).abs() < 3] df_gb = df.groupby(['yy', 'mm']) _data = [['yy', 'mm', 'num-drivers']] for v in df_gb.count().reset_index()[['yy', 'mm', 'did']].values: _data.append(list(v)) for cn in column_names: _data[0] += ['%s-mean' % cn, '%s-sum' % cn, '%s-std' % cn] for i, x in enumerate(zip(df_gb.mean()[cn].values, df_gb.sum()[cn].values, df_gb.std()[cn].values)): _data[i+1] += x with open(monthly_stats_fn, 'wt') as w_csvfile: writer = csv.writer(w_csvfile) for row in _data: writer.writerow(row)
def run(): for Y09_stat_fn, Y10_stat_fn, driver_stats_fn in [(ftd_Y09_stat_ap_fn, ftd_Y10_stat_ap_fn, ftd_driver_stats_ap_fn), (ftd_Y09_stat_ns_fn, ftd_Y10_stat_ns_fn, ftd_driver_stats_ns_fn)]: remove_file(driver_stats_fn) Y09_df, Y10_df = pd.read_csv(Y09_stat_fn), pd.read_csv(Y10_stat_fn) # remove outlier column_names = Y09_df.columns.values[len(['yy','mm','did']):] for cn in column_names: Y09_df = Y09_df[((Y09_df[cn] - Y09_df[cn].mean()) / float(Y09_df[cn].std())).abs() < 3] Y10_df = Y10_df[((Y10_df[cn] - Y10_df[cn].mean()) / float(Y10_df[cn].std())).abs() < 3] _data = [['did']] both_year_ftd = set(Y09_df['did']).intersection(set(Y10_df['did'])) for cn in column_names: new_cn = cn.replace('-', '_') _data[0] += ['diff_%s' % new_cn, 'Y10_%s_mean' % new_cn, 'Y09_%s_mean' % new_cn, 'Y10_%s_sum' % new_cn, 'Y09_%s_sum' % new_cn, 'Y10_%s_std' % new_cn, 'Y09_%s_std' % new_cn] print len(both_year_ftd) for i, did in enumerate(both_year_ftd): Y09_did_df, Y10_did_df = Y09_df[(Y09_df['did'] == did)], Y10_df[(Y10_df['did'] == did)] Y09_did_gb, Y10_did_gb = Y09_did_df.groupby(['did']), Y10_did_df.groupby(['did']) _data.append([did]) for j, cn in enumerate(column_names): for x in zip(Y10_did_gb.mean()[cn].values - Y09_did_gb.mean()[cn].values, Y10_did_gb.mean()[cn].values, Y09_did_gb.mean()[cn].values, Y10_did_gb.sum()[cn].values, Y09_did_gb.sum()[cn].values, Y10_did_gb.std()[cn].values, Y09_did_gb.std()[cn].values): _data[-1] += x if i % 100 == 0: print '%d,' % i, # with open(driver_stats_fn, 'wt') as w_csvfile: writer = csv.writer(w_csvfile) for row in _data: writer.writerow(row) print ''
def run(): for path in [ftd_gen_prod_db_for_ap, ftd_gen_prod_db_for_ns, ftd_ap_prod_eco_prof_db, ftd_ns_prod_eco_prof_db]: remove_file(path) # ap_productivity_economical_profit() ns_productivity_economical_profit()