def main(fname, blpath, odir, year, month):
    print "Applying EI Rules 1 and 2."
    
    hdf_filepath = odir + "/%s_%s_store_df.h5" % (year, month)
    print "LOOKING for HDF file at location ", hdf_filepath

    if os.path.exists(hdf_filepath):
        print "READING HDF"
        ei_df = pd.read_hdf(hdf_filepath, 'ei_df')
        bl_df = pd.read_hdf(hdf_filepath, 'bl_df')
    else:
        ei_df = pd.read_csv(fname, header=0, sep=";", converters=converters, names=cols, quotechar="'", decimal=",")
        ei_df, bl_df = bl_prepare(ei_df, blpath)
        print "Doing setup..."
        ei_df, bl_df = setup(ei_df, bl_df)
        print "SAVING HDF to", hdf_filepath
        ei_df.to_hdf(hdf_filepath, 'ei_df')
        bl_df.to_hdf(hdf_filepath, 'bl_df')

    print "Entering rule 1..."
    ei_df = rule1(ei_df, bl_df, RECEIVER)
    ei_df = rule1(ei_df, bl_df, SENDER)
    print "Entering rule 2..."
    ei_df = rule2(ei_df)
    print ei_df
    output_values = ["purchase_value", "remit_value", "transfer_value", "devolution_value", "icms_credit_value",  "remit_value", "tax", "icms_tax", "transportation_cost", "year", "month"]
    output_name = "%s_%s" % (year,month)
    print "Making tables..."
    ymsrp = make_table(ei_df, "srp", output_values, odir, output_name, year=year, month=month)
def main(fname, odir):
    print "Reading data frame..."

    cols = ["ncm", "hs_id",
            "EconomicAtivity_ID_CNAE_Receiver_5d",
            "cnae_id_r",
            "EconomicAtivity_ID_CNAE_Sender_5d",
            "cnae_id_s",
            "CFOP_ID",
            "Receiver_foreign",
            "Sender_foreign",
            "bra_id_r",
            "bra_id_s",
            "year",
            "month",
            "transportation_cost",
            "ICMS_ST_Value",
            "ICMS_Value",
            "IPI_Value",
            "PIS_Value",
            "COFINS_Value",
            "II_Value",
            "product_value",
            "ISSQN_Value"]

    converters = {"hs_id": update_hs_id, "bra_id_s":lookup_location, "bra_id_r":lookup_location, "cnae_id_r": lookup_cnae, 
                "cnae_id_s":lookup_cnae} 
    
    ei_df, target = _check_hdf_cache(fname, odir)

    if ei_df is None:
        ei_df = pd.read_csv(fname, header=0, sep=";", converters=converters, names=cols, quotechar="'", decimal=",")    

        print "Processing..."
        ei_df['icms_tax'] = ei_df.ICMS_ST_Value + ei_df.ICMS_Value 
        ei_df['tax'] = ei_df.icms_tax + ei_df.IPI_Value + ei_df.PIS_Value + ei_df.COFINS_Value + ei_df.II_Value + ei_df.ISSQN_Value

        ei_df["purchase_value"] = 0
        ei_df["transfer_value"] = 0
        ei_df["devolution_value"] = 0
        ei_df["icms_credit_value"] = 0
        ei_df["remit_value"] = 0

        ei_df.loc[ei_df.CFOP_ID == PURCHASES, "purchase_value"] = ei_df.product_value
        ei_df.loc[ei_df.CFOP_ID == TRANSFERS, "transfer_value"] = ei_df.product_value
        ei_df.loc[ei_df.CFOP_ID == DEVOLUTIONS, "devolution_value"] = ei_df.product_value
        ei_df.loc[ei_df.CFOP_ID == CREDITS, "icms_credit_value"] = ei_df.product_value
        ei_df.loc[ei_df.CFOP_ID == REMITS, "remit_value"] = ei_df.product_value
        
        ei_df.to_hdf(target, HDF_CACHE, append=False)


    print "Aggregating..."
    primary_key =  ['year', 'month', 'bra_id_s', 'cnae_id_s', 
                    'bra_id_r', 'cnae_id_r',
                    'hs_id']

    output_values = ["purchase_value", "transfer_value", "devolution_value", "icms_credit_value",  "remit_value", "tax", "icms_tax", "transportation_cost"]

    output_name = ntpath.basename(fname).replace(".csv", "")

    print "Making tables..."
    ymsr = make_table(ei_df, "ymsr", output_values, odir, output_name)
    yms = make_table(ei_df, "yms", output_values, odir, output_name)
    ymr = make_table(ei_df, "ymr", output_values, odir, output_name)
def main(fname, odir):
    print "Reading data frame..."

    cols = [
        "ncm", "hs_id", "EconomicAtivity_ID_CNAE_Receiver_5d", "cnae_id_r",
        "EconomicAtivity_ID_CNAE_Sender_5d", "cnae_id_s", "CFOP_ID",
        "Receiver_foreign", "Sender_foreign", "bra_id_r", "bra_id_s", "year",
        "month", "transportation_cost", "ICMS_ST_Value", "ICMS_Value",
        "IPI_Value", "PIS_Value", "COFINS_Value", "II_Value", "product_value",
        "ISSQN_Value"
    ]

    converters = {
        "hs_id": update_hs_id,
        "bra_id_s": lookup_location,
        "bra_id_r": lookup_location,
        "cnae_id_r": lookup_cnae,
        "cnae_id_s": lookup_cnae
    }

    ei_df, target = _check_hdf_cache(fname, odir)

    if ei_df is None:
        ei_df = pd.read_csv(fname,
                            header=0,
                            sep=";",
                            converters=converters,
                            names=cols,
                            quotechar="'",
                            decimal=",")

        print "Processing..."
        ei_df['icms_tax'] = ei_df.ICMS_ST_Value + ei_df.ICMS_Value
        ei_df[
            'tax'] = ei_df.icms_tax + ei_df.IPI_Value + ei_df.PIS_Value + ei_df.COFINS_Value + ei_df.II_Value + ei_df.ISSQN_Value

        ei_df["purchase_value"] = 0
        ei_df["transfer_value"] = 0
        ei_df["devolution_value"] = 0
        ei_df["icms_credit_value"] = 0
        ei_df["remit_value"] = 0

        ei_df.loc[ei_df.CFOP_ID == PURCHASES,
                  "purchase_value"] = ei_df.product_value
        ei_df.loc[ei_df.CFOP_ID == TRANSFERS,
                  "transfer_value"] = ei_df.product_value
        ei_df.loc[ei_df.CFOP_ID == DEVOLUTIONS,
                  "devolution_value"] = ei_df.product_value
        ei_df.loc[ei_df.CFOP_ID == CREDITS,
                  "icms_credit_value"] = ei_df.product_value
        ei_df.loc[ei_df.CFOP_ID == REMITS, "remit_value"] = ei_df.product_value

        ei_df.to_hdf(target, HDF_CACHE, append=False)

    print "Aggregating..."
    primary_key = [
        'year', 'month', 'bra_id_s', 'cnae_id_s', 'bra_id_r', 'cnae_id_r',
        'hs_id'
    ]

    output_values = [
        "purchase_value", "transfer_value", "devolution_value",
        "icms_credit_value", "remit_value", "tax", "icms_tax",
        "transportation_cost"
    ]

    output_name = ntpath.basename(fname).replace(".csv", "")

    print "Making tables..."
    ymsr = make_table(ei_df, "ymsr", output_values, odir, output_name)
    yms = make_table(ei_df, "yms", output_values, odir, output_name)
    ymr = make_table(ei_df, "ymr", output_values, odir, output_name)
def main(fname, blpath, odir):
    print "Reading data frame HELLO..."

    cols = [
        "ncm", "hs_id", "EconomicAtivity_ID_CNAE_Receiver_5d", "cnae_id_r",
        "EconomicAtivity_ID_CNAE_Sender_5d", "cnae_id_s", "CFOP_ID",
        "Receiver_foreign", "Sender_foreign", "bra_id_r", "bra_id_s", "year",
        "month", "transportation_cost", "ICMS_ST_Value", "ICMS_Value",
        "IPI_Value", "PIS_Value", "COFINS_Value", "II_Value", "product_value",
        "ISSQN_Value"
    ]

    converters = {
        "hs_id": update_hs_id,
        "bra_id_s": lookup_location,
        "bra_id_r": lookup_location,
        "cnae_id_r": lookup_cnae,
        "cnae_id_s": lookup_cnae
    }

    ei_df = pd.read_csv(fname,
                        header=0,
                        sep=";",
                        converters=converters,
                        names=cols,
                        quotechar="'",
                        decimal=",")

    # -- Do blacklist filtering
    bl_cols = ["bra_id", "cnae_id", "num_est", "d_bl"]
    bl_converters = {"bra_id": lookup_location, "cnae_id": lookup_cnae}
    bl_df = pd.read_csv(blpath,
                        header=0,
                        sep=";",
                        converters=bl_converters,
                        names=bl_cols,
                        quotechar="'",
                        decimal=",")

    # sender/receiver merge bl
    ei_df = pd.merge(ei_df,
                     bl_df,
                     how='left',
                     left_on=['bra_id_s', 'cnae_id_s'],
                     right_on=['bra_id', 'cnae_id'])
    ei_df.cnae_id_s[ei_df.d_bl == 1] = CNAE_BLACKLISTED
    print "Blacklisting %s sending transactions" % (
        ei_df.cnae_id_s[ei_df.d_bl == 1].count())

    ei_df = ei_df.drop(labels=bl_cols, axis=1)
    ei_df = pd.merge(ei_df,
                     bl_df,
                     how='left',
                     left_on=['bra_id_r', 'cnae_id_r'],
                     right_on=['bra_id', 'cnae_id'])
    print "Blacklisting %s receiving transactions" % (
        ei_df.cnae_id_r[ei_df.d_bl == 1].count())
    ei_df.cnae_id_r[ei_df.d_bl == 1] = CNAE_BLACKLISTED

    # -- HS blacklist
    ei_df.hs_id[(ei_df.cnae_id_r == CNAE_BLACKLISTED)
                & (ei_df.cnae_id_s == CNAE_BLACKLISTED)] = HS_BLACKLIST
    print "Blacklisting %s products" % (
        ei_df.hs_id[ei_df.hs_id == HS_BLACKLIST].count())

    # -- Filter out any rows that are ICMS Credits transactions or transfers
    # print "Filtering ICMS credits and transfers"
    print "Processing..."
    ei_df['icms_tax'] = ei_df.ICMS_ST_Value + ei_df.ICMS_Value
    ei_df[
        'tax'] = ei_df.icms_tax + ei_df.IPI_Value + ei_df.PIS_Value + ei_df.COFINS_Value + ei_df.II_Value + ei_df.ISSQN_Value

    ei_df["purchase_value"] = ei_df.apply(lambda x: x["product_value"]
                                          if x["CFOP_ID"] == PURCHASES else 0,
                                          axis=1)
    ei_df["transfer_value"] = ei_df.apply(lambda x: x["product_value"]
                                          if x["CFOP_ID"] == TRANSFERS else 0,
                                          axis=1)
    ei_df["devolution_value"] = ei_df.apply(
        lambda x: x["product_value"] if x["CFOP_ID"] == DEVOLUTIONS else 0,
        axis=1)
    ei_df["icms_credit_value"] = ei_df.apply(lambda x: x["product_value"]
                                             if x["CFOP_ID"] == CREDITS else 0,
                                             axis=1)
    ei_df["remit_value"] = ei_df.apply(lambda x: x["product_value"]
                                       if x["CFOP_ID"] == REMITS else 0,
                                       axis=1)

    print "Aggregating..."
    primary_key = [
        'year', 'month', 'bra_id_s', 'cnae_id_s', 'bra_id_r', 'cnae_id_r',
        'hs_id'
    ]

    output_values = [
        "purchase_value", "transfer_value", "devolution_value",
        "icms_credit_value", "remit_value", "tax", "icms_tax",
        "transportation_cost"
    ]

    output_name = ntpath.basename(fname).replace(".csv", "")

    print "Making tables..."
    ymsrp = make_table(ei_df, "ymsrp", output_values, odir, output_name)
    ymsr = make_table(ei_df, "ymsr", output_values, odir, output_name)
    ymsp = make_table(ei_df, "ymsp", output_values, odir, output_name)
    ymrp = make_table(ei_df, "ymrp", output_values, odir, output_name)
    yms = make_table(ei_df, "yms", output_values, odir, output_name)
    ymr = make_table(ei_df, "ymr", output_values, odir, output_name)
    ymp = make_table(ei_df, "ymp", output_values, odir, output_name)
def main(fname, blpath, odir):
	print "Reading data frame HELLO..."

	cols = ["ncm", "hs_id",
			"EconomicAtivity_ID_CNAE_Receiver_5d",
			"cnae_id_r",
			"EconomicAtivity_ID_CNAE_Sender_5d",
			"cnae_id_s",
			"CFOP_ID",
			"Receiver_foreign",
			"Sender_foreign",
			"bra_id_r",
			"bra_id_s",
			"year",
			"month",
			"transportation_cost",
			"ICMS_ST_Value",
			"ICMS_Value",
			"IPI_Value",
			"PIS_Value",
			"COFINS_Value",
			"II_Value",
			"product_value",
			"ISSQN_Value"]

	converters = {"hs_id": update_hs_id, "bra_id_s":lookup_location, "bra_id_r":lookup_location, "cnae_id_r": lookup_cnae, 
				"cnae_id_s":lookup_cnae} 

	ei_df = pd.read_csv(fname, header=0, sep=";", converters=converters, names=cols, quotechar="'", decimal=",")    

	# -- Do blacklist filtering
	bl_cols = ["bra_id", "cnae_id", "num_est", "d_bl"]
	bl_converters = {"bra_id" : lookup_location, "cnae_id": lookup_cnae}
	bl_df = pd.read_csv(blpath, header=0, sep=";", converters=bl_converters, names=bl_cols, quotechar="'", decimal=",")

	# sender/receiver merge bl
	ei_df = pd.merge(ei_df, bl_df, how='left', left_on=['bra_id_s','cnae_id_s'], right_on=['bra_id', 'cnae_id'])
	ei_df.cnae_id_s[ei_df.d_bl == 1] = CNAE_BLACKLISTED
	print "Blacklisting %s sending transactions" % (ei_df.cnae_id_s[ei_df.d_bl == 1].count())
	
	ei_df = ei_df.drop(labels=bl_cols, axis=1)
	ei_df = pd.merge(ei_df, bl_df, how='left', left_on=['bra_id_r','cnae_id_r'], right_on=['bra_id', 'cnae_id'])
	print "Blacklisting %s receiving transactions" % (ei_df.cnae_id_r[ei_df.d_bl == 1].count())
	ei_df.cnae_id_r[ei_df.d_bl == 1] = CNAE_BLACKLISTED

	# -- HS blacklist
	ei_df.hs_id[(ei_df.cnae_id_r == CNAE_BLACKLISTED) & (ei_df.cnae_id_s == CNAE_BLACKLISTED)] = HS_BLACKLIST
	print "Blacklisting %s products" % (ei_df.hs_id[ei_df.hs_id == HS_BLACKLIST].count())

	# -- Filter out any rows that are ICMS Credits transactions or transfers
	# print "Filtering ICMS credits and transfers"
	print "Processing..."
	ei_df['icms_tax'] = ei_df.ICMS_ST_Value + ei_df.ICMS_Value 
	ei_df['tax'] = ei_df.icms_tax + ei_df.IPI_Value + ei_df.PIS_Value + ei_df.COFINS_Value + ei_df.II_Value + ei_df.ISSQN_Value


	ei_df["purchase_value"] = ei_df.apply(lambda x: x["product_value"] if x["CFOP_ID"] == PURCHASES else 0, axis=1)
	ei_df["transfer_value"] = ei_df.apply(lambda x: x["product_value"] if x["CFOP_ID"] == TRANSFERS else 0, axis=1)
	ei_df["devolution_value"] = ei_df.apply(lambda x: x["product_value"] if x["CFOP_ID"] == DEVOLUTIONS else 0, axis=1)
	ei_df["icms_credit_value"] = ei_df.apply(lambda x: x["product_value"] if x["CFOP_ID"] == CREDITS else 0, axis=1)
	ei_df["remit_value"] = ei_df.apply(lambda x: x["product_value"] if x["CFOP_ID"] == REMITS else 0, axis=1)

	print "Aggregating..."
	primary_key =  ['year', 'month', 'bra_id_s', 'cnae_id_s', 
					'bra_id_r', 'cnae_id_r',
					'hs_id']

	output_values = ["purchase_value", "transfer_value", "devolution_value", "icms_credit_value",  "remit_value", "tax", "icms_tax", "transportation_cost"]

	output_name = ntpath.basename(fname).replace(".csv", "")

	print "Making tables..."
	ymsrp = make_table(ei_df, "ymsrp", output_values, odir, output_name)
	ymsr = make_table(ei_df, "ymsr", output_values, odir, output_name)
	ymsp = make_table(ei_df, "ymsp", output_values, odir, output_name)
	ymrp = make_table(ei_df, "ymrp", output_values, odir, output_name)
	yms = make_table(ei_df, "yms", output_values, odir, output_name)
	ymr = make_table(ei_df, "ymr", output_values, odir, output_name)
	ymp = make_table(ei_df, "ymp", output_values, odir, output_name)