def sumColumns(f,columns): total=None for i in columns: if total: total=total+to_number(f[i]) else: total=to_number(f[i]) return total
def test_main(self): #"Brasil;;;2.314;" cols=['bra','bra_sub1','bra_sub2','value',5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39] idsites=['bra','bra_sub1','bra_sub2','value'] dfSent = read_from_csv("docs\\check\\educacaosuperior\\sinopse_da_educacao_superior_2009-1.1.csv",delimiter=";",cols=cols,usecols=cols) dfSent=dfSent.drop(['bra_sub1','bra_sub2'],axis=1) dfSent = dfSent.dropna(thresh=2) mapStates=getMapStates() format = lambda x: city_fix(x,mapStates) dfSent['bra']= dfSent["bra"].map(format) dfSent['value'] = dfSent.apply(lambda f : to_number(f['value']) , axis = 1) dfSent['value'] = dfSent['value'].astype(np.float64) sql="SELECT bra_id as id,sum(enrolled) as value FROM hedu_ybucd where bra_id='4mg' and bra_id_len=3 AND d_id in ('A','B') and course_id_len=6 group by 1" dfDV = sql_to_df(sql,db) dfDV['value'] = dfDV['value'].astype(np.float64) total=run_check(dfDV,dfSent,'bra',0,'value') self.assertEqual(total, 0)
def clean_df_sent(dfSent): #'cnae', for column in ('wage_dec','emp_id','est_id','year'): dfSent = dfSent.drop(column, axis=1) dfSent = left_df(dfSent,'cbo',4,'cbo4') dfSent['cbo'] = dfSent.apply(lambda f : to_number(f['cbo4']) , axis = 1) dfSent['munic']=dfSent[dfSent.columns[0]] dfSent['munic'] = dfSent.apply(lambda f : to_number(f['munic']) , axis = 1) dfSent['munic'] = dfSent['munic'].astype(np.float64) dfSent['wage'] = dfSent.apply(lambda f : to_number(f['wage']) , axis = 1) dfSent['wage'] = dfSent['wage'].astype(np.float64) return dfSent
def clean_df_dv(dfDV): print dfDV dfDV['id'] = dfDV.apply(lambda f : to_number(f['id']) , axis = 1) dfDV['id'] = dfDV['id'].astype(np.float64) return dfDV