def main(idir):
    print "Reading data..."

    # remove month from primary key and sum

    
    table_names = ["ymp", "ymr", "ymrp", "yms", "ymsp", "ymsr", "ymsrp"]
    for table_name in table_names:
        master_frame = pd.DataFrame()
        print table_name

        for f in findFiles(idir, 'output_%s_*.csv' % table_name):
            print f, "FILE"
            if "00.csv" in f:
                print " ** SKIPPING POTENTIAL AGGREGATION MONTH", f
                continue
            ei_df = pd.read_csv(f, header=0, sep=";", quotechar="'")        
            master_frame = pd.concat([master_frame, ei_df])

        pk = lookup(table_name, with_month=False)
        print master_frame.head()
        
        yearly = master_frame.groupby(pk).agg(np.sum)
        yearly["month"] = "00"
        # yearly["bra_id_s"]
        yearly = yearly.set_index("month", append=True)
        yearly = yearly.reorder_levels(lookup(table_name, with_month=True))

        yearly = add_helper_cols(table_name, yearly)

        # write out a zero month file
        output_path = os.path.join(idir,"output_%s_2013_00.csv" % table_name)
        yearly.to_csv(output_path, ";")
def make_table(ymbibip, table_name, output_values, odir, output_name, ignore_list=[], year=2013, month=-1):
    print table_name, "table in progress..."

    pk_cols = pk(table_name)
    print "table name", table_name, "pks=",pk_cols
    # if table_name == BIGGEST_TABLE:
    ymbibip = ymbibip.reset_index()

    big_table = ymbibip.groupby(pk_cols).sum()

    print "ADDING HELPERS!"
    big_table = add_helper_cols(table_name, big_table)
    
    tmp = output_values
    # big_table = big_table.reset_index()
    if "r" in table_name:
        tmp = tmp + ["bra_id_r1", "bra_id_r3", "cnae_id_r1"]
    if "s" in table_name:
        tmp = tmp + ["bra_id_s1", "bra_id_s3", "cnae_id_s1"] 
    if "p" in table_name:
        tmp= tmp + ["hs_id2"]
    
    big_table["year"] = year
    big_table["month"] = month

    print "Writing csv to disk..."

    output_path = os.path.join(odir, "output_%s_%s.csv" % (table_name, output_name))
    big_table.to_csv(output_path, ";", columns = tmp)
    return big_table
def main(idir):
    print "Reading data..."

    # remove month from primary key and sum

    table_names = ["ymp", "ymr", "ymrp", "yms", "ymsp", "ymsr", "ymsrp"]
    for table_name in table_names:
        master_frame = pd.DataFrame()
        print table_name

        for f in findFiles(idir, 'output_%s_*.csv' % table_name):
            print f, "FILE"
            if "00.csv" in f:
                print " ** SKIPPING POTENTIAL AGGREGATION MONTH", f
                continue
            ei_df = pd.read_csv(f, header=0, sep=";", quotechar="'")
            master_frame = pd.concat([master_frame, ei_df])

        pk = lookup(table_name, with_month=False)
        print master_frame.head()

        yearly = master_frame.groupby(pk).agg(np.sum)
        yearly["month"] = "00"
        # yearly["bra_id_s"]
        yearly = yearly.set_index("month", append=True)
        yearly = yearly.reorder_levels(lookup(table_name, with_month=True))

        yearly = add_helper_cols(table_name, yearly)

        # write out a zero month file
        output_path = os.path.join(idir, "output_%s_2013_00.csv" % table_name)
        yearly.to_csv(output_path, ";")
def make_table(ymbibip, table_name, output_values, odir, output_name, ignore_list=[]):
    print table_name, "table in progress..."

    pk_cols = pk(table_name)
    print "table name", table_name, "pks=",pk_cols
    # if table_name == BIGGEST_TABLE:
    ymbibip = ymbibip.reset_index()

    big_table = ymbibip.groupby(pk_cols).sum()

    print "ADDING HELPERS!"
    big_table = add_helper_cols(table_name, big_table)
    
    tmp = output_values
    # big_table = big_table.reset_index()
    if "r" in table_name:
        tmp = tmp + ["bra_id_r1", "bra_id_r3", "cnae_id_r1"]
    if "s" in table_name:
        tmp = tmp + ["bra_id_s1", "bra_id_s3", "cnae_id_s1"] 
    if "p" in table_name:
        tmp= tmp + ["hs_id2"]
    
        # ymbibip["hs_id1"] = ymbibip["hs_id"].str.get(0)
        # output_values.append("hs_id1")


    # yearly = year_aggregation(table, table_name, pk_cols)
    # big_table = agg_depths(ymbibip, table_name)

    print "Writing csv to disk..."

    output_path = os.path.join(odir, "output_%s_%s.csv" % (table_name, output_name))
    big_table.to_csv(output_path, ";", columns = tmp)
    return big_table
def make_table(ymbibip,
               table_name,
               output_values,
               odir,
               output_name,
               ignore_list=[]):
    print table_name, "table in progress..."

    pk_cols = pk(table_name)
    print "table name", table_name, "pks=", pk_cols
    # if table_name == BIGGEST_TABLE:
    ymbibip = ymbibip.reset_index()

    big_table = ymbibip.groupby(pk_cols).sum()

    print "ADDING HELPERS!"
    big_table = add_helper_cols(table_name, big_table)

    tmp = output_values
    # big_table = big_table.reset_index()
    if "r" in table_name:
        tmp = tmp + ["bra_id_r1", "bra_id_r3", "cnae_id_r1"]
    if "s" in table_name:
        tmp = tmp + ["bra_id_s1", "bra_id_s3", "cnae_id_s1"]
    if "p" in table_name:
        tmp = tmp + ["hs_id2"]

        # ymbibip["hs_id1"] = ymbibip["hs_id"].str.get(0)
        # output_values.append("hs_id1")

    # yearly = year_aggregation(table, table_name, pk_cols)
    # big_table = agg_depths(ymbibip, table_name)

    print "Writing csv to disk..."

    output_path = os.path.join(odir,
                               "output_%s_%s.csv" % (table_name, output_name))
    big_table.to_csv(output_path, ";", columns=tmp)
    return big_table
Exemple #6
0
def make_table(ymbibip,
               table_name,
               output_values,
               odir,
               output_name,
               ignore_list=[],
               year=2013,
               month=-1):
    print table_name, "table in progress..."

    pk_cols = pk(table_name)
    print "table name", table_name, "pks=", pk_cols
    # if table_name == BIGGEST_TABLE:
    ymbibip = ymbibip.reset_index()

    big_table = ymbibip.groupby(pk_cols).sum()

    print "ADDING HELPERS!"
    big_table = add_helper_cols(table_name, big_table)

    tmp = output_values
    # big_table = big_table.reset_index()
    if "r" in table_name:
        tmp = tmp + ["bra_id_r1", "bra_id_r3", "cnae_id_r1"]
    if "s" in table_name:
        tmp = tmp + ["bra_id_s1", "bra_id_s3", "cnae_id_s1"]
    if "p" in table_name:
        tmp = tmp + ["hs_id2"]

    big_table["year"] = year
    big_table["month"] = month

    print "Writing csv to disk..."

    output_path = os.path.join(odir,
                               "output_%s_%s.csv" % (table_name, output_name))
    big_table.to_csv(output_path, ";", columns=tmp)
    return big_table