コード例 #1
0
ファイル: step_4_sent.py プロジェクト: radoraykov/datavivaetl
def clean_df_sent(dfSent):
    #'cnae',
    for column in ('wage_dec','emp_id','est_id','year'):
        dfSent = dfSent.drop(column, axis=1)
    
     
    dfSent = left_df(dfSent,'cbo',4,'cbo4')

    dfSent['cbo'] = dfSent.apply(lambda f : to_number(f['cbo4']) , axis = 1)
    dfSent['munic']=dfSent[dfSent.columns[0]]
    
    
    dfSent['munic'] = dfSent.apply(lambda f : to_number(f['munic']) , axis = 1)
    dfSent['munic'] = dfSent['munic'].astype(np.float64)   
    dfSent['wage'] = dfSent.apply(lambda f : to_number(f['wage']) , axis = 1) 
    dfSent['wage'] = dfSent['wage'].astype(np.float64)     

    
    return dfSent
コード例 #2
0
ファイル: testsender.py プロジェクト: radoraykov/datavivaetl
def openNFe(entrada,delimiter):
    '''
    TransactedProduct_ID_NCM,TransactedProduct_ID_HS,EconomicAtivity_ID_CNAE_Receiver,
    EconomicAtivity_ID_CNAE_Sender,CFOP_ID,CFOP_Reclassification,CFOP_Flow,Receiver_Type,
    Sender_Type,Municipality_ID_Receiver,Municipality_ID_Sender,Year,Monthly,
    Receiver_Situation,Sender_Situation,Cost_Value,ICMS_ST_Value,ICMS_ST_RET_Value,ICMS_Value,
    IPI_Value,PIS_Value,COFINS_Value,II_Value,Product_Value,ISSQN_Value,Origin
    '''
    
    orig_cols = ['TransactedProduct_ID_NCM','TransactedProduct_ID_HS','EconomicAtivity_ID_CNAE_Receiver','EconomicAtivity_ID_CNAE_Sender','CFOP_ID','CFOP_Reclassification','CFOP_Flow','Sender_Type','Sender_Type','Municipality_ID_Receiver','Municipality_ID_Sender','Year','Monthly','Sender_Situation','Sender_Situation','Cost_Value','ICMS_ST_Value','ICMS_ST_RET_Value','ICMS_Value','IPI_Value','PIS_Value','COFINS_Value','II_Value','Product_Value','ISSQN_Value','Origin']
    converters = {"EconomicAtivity_ID_CNAE_Sender": str, "Municipality_ID_Sender": str, "Product_Value": floatvert}
    
    dados = read_from_csv(entrada,delimiter=delimiter,cols=orig_cols,converters=converters,header=1)
    dados=dados[['EconomicAtivity_ID_CNAE_Sender','Municipality_ID_Sender','Product_Value']]
    
    #dados['EconomicAtivity_ID_CNAE_Sender'][ dados.EconomicAtivity_ID_CNAE_Sender=='1'] = '0'
    #dados['EconomicAtivity_ID_CNAE_Sender'][ dados.EconomicAtivity_ID_CNAE_Sender=='2'] = '0'
    #dados.EconomicAtivity_ID_CNAE_Sender = dados.EconomicAtivity_ID_CNAE_Sender.apply(lambda x: str(x).zfill(2))
    dados = left_df(dados,'EconomicAtivity_ID_CNAE_Sender',2) 
    return dados
コード例 #3
0
ファイル: transform.py プロジェクト: radoraykov/datavivaetl
def transform(year):

    cols = ['ANO','MES','HS','PAIS','UF','PORTO','MUNICIPIO','UNIDADE','QUANTIDADE','KGLIQUIDO','VALORFOB']

    source_file = 'dados/exportacao/sent/' + str(year) + str('_extract.csv')


    df = read_from_csv(source_file, 1, ",", cols)
    df = left_df(df, 'HS', 4)

    if year == 2000 or year == 2001:

        ## 1996x2002
        rdCols = ['HS96', 'HS02']
        rd = read_from_csv('docs/classificacao/HS/anos/1996x2002.csv', 2, ';', rdCols, converters={"HS96": str, "HS02": str})


        f = lambda x: rd['HS02'][rd.HS96 == str(x)]
        df = df.apply(f)

        # CONVERT TO 2007
        rdCols2 = ['HS2007']
        rd2 = read_from_csv('docs/classificacao/HS/anos/2007.csv', 2, '|', rdCols2, converters={"HS2007": str})

        f2 = lambda x: [rd2.HS96 == str(x)]
        df = df.apply(f2)

        print df

    elif year > 2001 and year <= 2006:
        ##
        rdCols = ['HS02', 'HS07']
        rd = read_from_csv('docs/classificacao/HS/anos/2002x2007.csv', 1, ';', rdCols, converters={"HS07": str})

        f = lambda x: rd['HS02'][rd.HS07 == str(x)]
        df = df.apply(f)

        rdCols2 = ['HS2007']
        rd2 = read_from_csv('docs/classificacao/HS/anos/2007.csv', 1, ';', rdCols2, converters={"HS2007": str})

        f2 = lambda x: [rd2.HS2007 == str(x)]
        df = df.apply(f2)

        print df

    elif year >= 2007 and year <= 2011:

        rdCols = ['HS2012', 'HS2007']
        rd = read_from_csv('docs/classificacao/HS/anos/2012x2007.csv', 1, ';', rdCols, converters={"HS2007": str})

        f = lambda x: rd['HS2012'][rd.HS2007 == str(x)]
        df = df.apply(f)

        print df

    elif year >= 2012 and year <= 2014:

        rdCols = ['HS2012', 'HS2007']
        rd = read_from_csv('docs/classificacao/HS/anos/2012x2007.csv', 1, ';', rdCols, converters={"HS2007": str})

        f = lambda x: rd['HS2012'][rd.HS2007 == str(x)]
        df.apply(f)

        print df