def main(idir): print "Reading data..." # remove month from primary key and sum table_names = ["ymp", "ymr", "ymrp", "yms", "ymsp", "ymsr", "ymsrp"] for table_name in table_names: master_frame = pd.DataFrame() print table_name for f in findFiles(idir, 'output_%s_*.csv' % table_name): print f, "FILE" if "00.csv" in f: print " ** SKIPPING POTENTIAL AGGREGATION MONTH", f continue ei_df = pd.read_csv(f, header=0, sep=";", quotechar="'") master_frame = pd.concat([master_frame, ei_df]) pk = lookup(table_name, with_month=False) print master_frame.head() yearly = master_frame.groupby(pk).agg(np.sum) yearly["month"] = "00" # yearly["bra_id_s"] yearly = yearly.set_index("month", append=True) yearly = yearly.reorder_levels(lookup(table_name, with_month=True)) yearly = add_helper_cols(table_name, yearly) # write out a zero month file output_path = os.path.join(idir,"output_%s_2013_00.csv" % table_name) yearly.to_csv(output_path, ";")
def make_table(ymbibip, table_name, output_values, odir, output_name, ignore_list=[], year=2013, month=-1): print table_name, "table in progress..." pk_cols = pk(table_name) print "table name", table_name, "pks=",pk_cols # if table_name == BIGGEST_TABLE: ymbibip = ymbibip.reset_index() big_table = ymbibip.groupby(pk_cols).sum() print "ADDING HELPERS!" big_table = add_helper_cols(table_name, big_table) tmp = output_values # big_table = big_table.reset_index() if "r" in table_name: tmp = tmp + ["bra_id_r1", "bra_id_r3", "cnae_id_r1"] if "s" in table_name: tmp = tmp + ["bra_id_s1", "bra_id_s3", "cnae_id_s1"] if "p" in table_name: tmp= tmp + ["hs_id2"] big_table["year"] = year big_table["month"] = month print "Writing csv to disk..." output_path = os.path.join(odir, "output_%s_%s.csv" % (table_name, output_name)) big_table.to_csv(output_path, ";", columns = tmp) return big_table
def main(idir): print "Reading data..." # remove month from primary key and sum table_names = ["ymp", "ymr", "ymrp", "yms", "ymsp", "ymsr", "ymsrp"] for table_name in table_names: master_frame = pd.DataFrame() print table_name for f in findFiles(idir, 'output_%s_*.csv' % table_name): print f, "FILE" if "00.csv" in f: print " ** SKIPPING POTENTIAL AGGREGATION MONTH", f continue ei_df = pd.read_csv(f, header=0, sep=";", quotechar="'") master_frame = pd.concat([master_frame, ei_df]) pk = lookup(table_name, with_month=False) print master_frame.head() yearly = master_frame.groupby(pk).agg(np.sum) yearly["month"] = "00" # yearly["bra_id_s"] yearly = yearly.set_index("month", append=True) yearly = yearly.reorder_levels(lookup(table_name, with_month=True)) yearly = add_helper_cols(table_name, yearly) # write out a zero month file output_path = os.path.join(idir, "output_%s_2013_00.csv" % table_name) yearly.to_csv(output_path, ";")
def make_table(ymbibip, table_name, output_values, odir, output_name, ignore_list=[]): print table_name, "table in progress..." pk_cols = pk(table_name) print "table name", table_name, "pks=",pk_cols # if table_name == BIGGEST_TABLE: ymbibip = ymbibip.reset_index() big_table = ymbibip.groupby(pk_cols).sum() print "ADDING HELPERS!" big_table = add_helper_cols(table_name, big_table) tmp = output_values # big_table = big_table.reset_index() if "r" in table_name: tmp = tmp + ["bra_id_r1", "bra_id_r3", "cnae_id_r1"] if "s" in table_name: tmp = tmp + ["bra_id_s1", "bra_id_s3", "cnae_id_s1"] if "p" in table_name: tmp= tmp + ["hs_id2"] # ymbibip["hs_id1"] = ymbibip["hs_id"].str.get(0) # output_values.append("hs_id1") # yearly = year_aggregation(table, table_name, pk_cols) # big_table = agg_depths(ymbibip, table_name) print "Writing csv to disk..." output_path = os.path.join(odir, "output_%s_%s.csv" % (table_name, output_name)) big_table.to_csv(output_path, ";", columns = tmp) return big_table
def make_table(ymbibip, table_name, output_values, odir, output_name, ignore_list=[]): print table_name, "table in progress..." pk_cols = pk(table_name) print "table name", table_name, "pks=", pk_cols # if table_name == BIGGEST_TABLE: ymbibip = ymbibip.reset_index() big_table = ymbibip.groupby(pk_cols).sum() print "ADDING HELPERS!" big_table = add_helper_cols(table_name, big_table) tmp = output_values # big_table = big_table.reset_index() if "r" in table_name: tmp = tmp + ["bra_id_r1", "bra_id_r3", "cnae_id_r1"] if "s" in table_name: tmp = tmp + ["bra_id_s1", "bra_id_s3", "cnae_id_s1"] if "p" in table_name: tmp = tmp + ["hs_id2"] # ymbibip["hs_id1"] = ymbibip["hs_id"].str.get(0) # output_values.append("hs_id1") # yearly = year_aggregation(table, table_name, pk_cols) # big_table = agg_depths(ymbibip, table_name) print "Writing csv to disk..." output_path = os.path.join(odir, "output_%s_%s.csv" % (table_name, output_name)) big_table.to_csv(output_path, ";", columns=tmp) return big_table
def make_table(ymbibip, table_name, output_values, odir, output_name, ignore_list=[], year=2013, month=-1): print table_name, "table in progress..." pk_cols = pk(table_name) print "table name", table_name, "pks=", pk_cols # if table_name == BIGGEST_TABLE: ymbibip = ymbibip.reset_index() big_table = ymbibip.groupby(pk_cols).sum() print "ADDING HELPERS!" big_table = add_helper_cols(table_name, big_table) tmp = output_values # big_table = big_table.reset_index() if "r" in table_name: tmp = tmp + ["bra_id_r1", "bra_id_r3", "cnae_id_r1"] if "s" in table_name: tmp = tmp + ["bra_id_s1", "bra_id_s3", "cnae_id_s1"] if "p" in table_name: tmp = tmp + ["hs_id2"] big_table["year"] = year big_table["month"] = month print "Writing csv to disk..." output_path = os.path.join(odir, "output_%s_%s.csv" % (table_name, output_name)) big_table.to_csv(output_path, ";", columns=tmp) return big_table