コード例 #1
0
ファイル: step_4_sent.py プロジェクト: radoraykov/datavivaetl
def sumColumns(f,columns):
    total=None
    for i in columns:
        if total:
            total=total+to_number(f[i])
        else:
            total=to_number(f[i])
    return total
コード例 #2
0
ファイル: step_4_sent.py プロジェクト: radoraykov/datavivaetl
    def test_main(self):

        #"Brasil;;;2.314;"
        cols=['bra','bra_sub1','bra_sub2','value',5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39]    
        idsites=['bra','bra_sub1','bra_sub2','value']
        
        dfSent = read_from_csv("docs\\check\\educacaosuperior\\sinopse_da_educacao_superior_2009-1.1.csv",delimiter=";",cols=cols,usecols=cols)
        dfSent=dfSent.drop(['bra_sub1','bra_sub2'],axis=1)
        dfSent = dfSent.dropna(thresh=2)
        
        mapStates=getMapStates()    
    
        format = lambda x:  city_fix(x,mapStates)
        dfSent['bra']= dfSent["bra"].map(format)
        dfSent['value'] = dfSent.apply(lambda f : to_number(f['value']) , axis = 1)
        dfSent['value'] = dfSent['value'].astype(np.float64)
        
    
        sql="SELECT bra_id as id,sum(enrolled) as value FROM hedu_ybucd where bra_id='4mg' and bra_id_len=3 AND d_id in ('A','B') and course_id_len=6 group by 1"
        dfDV = sql_to_df(sql,db)
        dfDV['value'] = dfDV['value'].astype(np.float64)
        
        total=run_check(dfDV,dfSent,'bra',0,'value')
        
        self.assertEqual(total, 0)
コード例 #3
0
ファイル: step_4_sent.py プロジェクト: radoraykov/datavivaetl
def clean_df_sent(dfSent):
    #'cnae',
    for column in ('wage_dec','emp_id','est_id','year'):
        dfSent = dfSent.drop(column, axis=1)
    
     
    dfSent = left_df(dfSent,'cbo',4,'cbo4')

    dfSent['cbo'] = dfSent.apply(lambda f : to_number(f['cbo4']) , axis = 1)
    dfSent['munic']=dfSent[dfSent.columns[0]]
    
    
    dfSent['munic'] = dfSent.apply(lambda f : to_number(f['munic']) , axis = 1)
    dfSent['munic'] = dfSent['munic'].astype(np.float64)   
    dfSent['wage'] = dfSent.apply(lambda f : to_number(f['wage']) , axis = 1) 
    dfSent['wage'] = dfSent['wage'].astype(np.float64)     

    
    return dfSent
コード例 #4
0
ファイル: step_4_sent.py プロジェクト: radoraykov/datavivaetl
def clean_df_dv(dfDV):
    print dfDV
    dfDV['id'] = dfDV.apply(lambda f : to_number(f['id']) , axis = 1) 
    dfDV['id'] = dfDV['id'].astype(np.float64)     
    return dfDV