Beispiel #1
0
 def test_Municipality(self,year,size,column):
     print "Entering in checkBRA" 
 
     dfDV = sql_to_df("SELECT a.id_mdic as id,sum(export_val) as val,sum(import_val) as valimport FROM secex_ymb s,attrs_bra a where s.bra_id_len="+str(size)+" and  a.id=s.bra_id and s.month=0 and s.year="+str(year)+" group by 1;",db)
 
     dfSent = read_from_csv("dados\exportacao\sent\MDIC_"+str(year)+".csv",delimiter="|")
     dfGroup = dfSent.groupby(dfSent.columns[column])
 
     total=0
     for mdic in dfDV['id']:        
         mdicid=to_int(mdic)
         if not mdicid: #not isinstance(hs,int):
             continue
         
         valDV = dfDV[(dfDV['id']==mdicid)]['val'].values[0]
         try: 
             if valDV and str(valDV)<>'nan':
                 valCSV= dfGroup.get_group(mdicid)[dfSent.columns[9]].sum()
         except:
             total=total+1
             print "Not found in CSV a value for "+str(mdic)+" - (original bra "+str(mdic)+")  Exports of value  "+ str(valDV)+ " in the year "+str(year)
             continue
          
         valCSV=to_int(valCSV)        
         valDV=to_int(valDV)
         if valCSV and valDV and  valDV!=valCSV:
             total=total+1
             txt= "ERROR in BRA ("+str(year)+"): "+str(mdic)+" / "+str(mdicid)+" - Value in CSV "+ str(valCSV)+ " <> Value in DV "+str(valDV) + " - Difference: "+str(valCSV - valDV)
             print txt
         else:
             txt="OK"
             
     self.assertEqual(total, 0)
Beispiel #2
0
 def test_HS(self,year):
     print "Entering in checkHS" 
 
     dfDV = sql_to_df("SELECT s.hs_id as id,sum(export_val) as val,sum(import_val) as valimport FROM secex_ymp s where s.hs_id_len=6 and s.month=0 and s.year="+str(year)+" group by 1;",db)
 
     dfSent = read_from_csv("dados\exportacao\sent\MDIC_"+str(year)+".csv",delimiter="|")
     dfGroup = dfSent.groupby(dfSent.columns[10])
 
     total=0
     for hs in dfDV['id']:
         hsid= str(hs).zfill(6)   
         hsshort=hsid[2:6]
         hsint=to_int(hsshort)
         if not hsint: #not isinstance(hs,int):
             continue
 
         valDV = dfDV[(dfDV['id']==hsid)]['val'].values[0]
         try: 
             if valDV and str(valDV)<>'nan':
                 valCSV= sum(dfGroup.get_group(hsint)[dfSent.columns[9]])
         except: 
             total=total+1
             print "Not found in CSV a value for "+str(hsint)+" / "+str(hsshort)+" (original hs "+hs+") - Exports of value  "+ str(valDV)+ " in the year "+str(year)
             continue
          
         valCSV=to_int(valCSV)        
         valDV=to_int(valDV)
         if valCSV and valDV and valDV!=valCSV:
             total=total+1
             txt= "ERROR in HS ("+str(year)+"): "+str(hsint)+" / "+str(hs)+" - Value in CSV "+ str(valCSV)+ " <> Value in DV "+str(valDV) + " - Difference: "+str(valCSV - valDV)
             print txt
         else:
             txt="OK"
             #print txt
     self.assertEqual(total, 0)
Beispiel #3
0
    def test_main(self):

        #"Brasil;;;2.314;"
        cols=['bra','bra_sub1','bra_sub2','value',5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39]    
        idsites=['bra','bra_sub1','bra_sub2','value']
        
        dfSent = read_from_csv("docs\\check\\educacaosuperior\\sinopse_da_educacao_superior_2009-1.1.csv",delimiter=";",cols=cols,usecols=cols)
        dfSent=dfSent.drop(['bra_sub1','bra_sub2'],axis=1)
        dfSent = dfSent.dropna(thresh=2)
        
        mapStates=getMapStates()    
    
        format = lambda x:  city_fix(x,mapStates)
        dfSent['bra']= dfSent["bra"].map(format)
        dfSent['value'] = dfSent.apply(lambda f : to_number(f['value']) , axis = 1)
        dfSent['value'] = dfSent['value'].astype(np.float64)
        
    
        sql="SELECT bra_id as id,sum(enrolled) as value FROM hedu_ybucd where bra_id='4mg' and bra_id_len=3 AND d_id in ('A','B') and course_id_len=6 group by 1"
        dfDV = sql_to_df(sql,db)
        dfDV['value'] = dfDV['value'].astype(np.float64)
        
        total=run_check(dfDV,dfSent,'bra',0,'value')
        
        self.assertEqual(total, 0)
Beispiel #4
0
 def test_Municipality(self,year,size):
     print "Entering in checkBRA"    
     dfDV = sql_to_df("SELECT left(a.id_ibge,6) as id,sum(wage) as val FROM rais_yb s,attrs_bra a where s.bra_id_len="+str(size)+" and  a.id=s.bra_id and year="+str(year)+" group by 1;",db)
     dfSent = read_from_csv("dados\emprego\sent\Rais"+str(year)+".csv",delimiter=";",cols=cols)
     #dfSent = read_from_csv("dados\emprego\sent\Rais"+str(year)+"Teste.csv",delimiter=",")    
     total=run_check(dfDV,dfSent,'munic',year) 
     self.assertEqual(total, 0)
Beispiel #5
0
 def test_CNAE(self,year):
     print "Entering in checkCNAE" 
     #before right 4 and len 5
     dfDV = sql_to_df("SELECT right(s.cnae_id,4) as id,sum(wage) as val FROM rais_yi s where s.cnae_id_len=6 and year="+str(year)+" group by 1;",db)
     dfSent = read_from_csv("dados\emprego\sent\Rais"+str(year)+".csv",delimiter=";",cols=cols) 
     total=run_check(dfDV,dfSent,'cnae',year)  
     self.assertEqual(total, 0)
Beispiel #6
0
 def test_CBO(self,year):
     print "Entering in checkCBO" 
     dfDV = sql_to_df("SELECT s.cbo_id as id,sum(wage) as val FROM rais_yo s where s.cbo_id_len=4 and s.year="+str(year)+" group by 1;",db)
     dfSent = read_from_csv("dados\emprego\sent\Rais"+str(year)+".csv",delimiter=";",cols=cols)   
     total=run_check(dfDV,dfSent,'cbo',year)  
     self.assertEqual(total, 0)