def get_orcamento_executado_consolidado(): years = [str(i) for i in range(2011, 2022)] for year in years: print("execucao : ", year) path = os.getcwd() path = path.split( "notebooks")[0] + f"data/orcamento_consolidado/execucao" files_before = glob.glob(f"{path}/*") profile = webdriver.FirefoxProfile() profile.set_preference("browser.download.dir", path) profile.set_preference("browser.download.folderList", 2) profile.set_preference( "browser.helperApps.neverAsk.saveToDisk", "application/csv,application/excel,application/vnd.msexcel,application/vnd.ms-excel,text/anytext,text/comma-separated-values,text/csv,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/octet-stream", ) profile.set_preference("browser.download.manager.showWhenStarting", False) profile.set_preference( "browser.helperApps.neverAsk.openFile", "application/csv,application/excel,application/vnd.msexcel,application/vnd.ms-excel,text/anytext,text/comma-separated-values,text/csv,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/octet-stream", ) profile.set_preference("browser.helperApps.alwaysAsk.force", False) profile.set_preference("browser.download.manager.useWindow", False) profile.set_preference("browser.download.manager.focusWhenStarting", False) profile.set_preference("browser.download.manager.alertOnEXEOpen", False) profile.set_preference("browser.download.manager.showAlertOnComplete", False) profile.set_preference("browser.download.manager.closeWhenDone", True) profile.set_preference("pdfjs.disabled", True) options = Options() ### run quiet options.headless = False firefox = webdriver.Firefox( options=options, firefox_profile=profile, executable_path=GeckoDriverManager().install(), ) url = "https://www.fazenda.sp.gov.br/SigeoLei131/Paginas/FlexConsDespesa.aspx" firefox.get(url) # firefox.request('POST', url,) ano = Select( firefox.find_element_by_name("ctl00$ContentPlaceHolder1$ddlAno")) ano.select_by_value(year) firefox.find_element_by_name( "ctl00$ContentPlaceHolder1$cblFase$0").click() time.sleep(2) firefox.find_element_by_name( "ctl00$ContentPlaceHolder1$cblFase$1").click() time.sleep(2) firefox.find_element_by_name( "ctl00$ContentPlaceHolder1$cblFase$2").click() time.sleep(2) firefox.find_element_by_name( "ctl00$ContentPlaceHolder1$cblFase$3").click() time.sleep(2) firefox.find_element_by_name( "ctl00$ContentPlaceHolder1$cblFase$4").click() options = { "ctl00$ContentPlaceHolder1$ddlOrgao": "", "ctl00$ContentPlaceHolder1$ddlCategoria": "", "ctl00$ContentPlaceHolder1$ddlUo": "", "ctl00$ContentPlaceHolder1$ddlGrupo": "", "ctl00$ContentPlaceHolder1$ddlUge": "", "ctl00$ContentPlaceHolder1$ddlModalidade": "", "ctl00$ContentPlaceHolder1$ddlFonteRecursos": "", "ctl00$ContentPlaceHolder1$ddlElemento": "", "ctl00$ContentPlaceHolder1$ddlFuncao": "", "ctl00$ContentPlaceHolder1$ddlSubFuncao": "", "ctl00$ContentPlaceHolder1$ddlPrograma": "", "ctl00$ContentPlaceHolder1$ddlAcao": "", "ctl00$ContentPlaceHolder1$ddlProgramaTrabalho": "", } for op in options.keys(): selected = Select(firefox.find_element_by_name(op)) selected.select_by_value(options[op]) time.sleep(5) firefox.find_element_by_name( "ctl00$ContentPlaceHolder1$btnPesquisar").click() time.sleep(60) firefox.find_element_by_name( "ctl00$ContentPlaceHolder1$btnExcel").click() time.sleep(60) firefox.quit() files_after = glob.glob(f"{path}/*") file_now = [file for file in files_after if file not in files_before][0] os.rename( file_now, path + "/orcamento_executado_{}.csv".format(year), )
def get_orcamento_executado(): today = datetime.datetime.today().strftime("%Y-%m-%d") ano = int(datetime.datetime.today().strftime("%Y")) years = [str(i) for i in range(2020, ano + 1)] for year in years: print("execucao : ", year) path = os.getcwd() year_path = path.split("notebooks")[0] + f"data/orcamento/{year}" if not os.path.exists(year_path): os.mkdir(year_path) os.mkdir(year_path + "/executado") os.mkdir(year_path + "/receita") os.mkdir(year_path + "/receita/arrecadado") os.mkdir(year_path + "/receita/previsto") path = path.split("notebooks")[0] + f"data/orcamento/{year}/executado" files_before = glob.glob(f"{path}/*") profile = webdriver.FirefoxProfile() profile.set_preference("browser.download.dir", path) profile.set_preference("browser.download.folderList", 2) profile.set_preference( "browser.helperApps.neverAsk.saveToDisk", "application/csv,application/excel,application/vnd.msexcel,application/vnd.ms-excel,text/anytext,text/comma-separated-values,text/csv,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/octet-stream", ) profile.set_preference("browser.download.manager.showWhenStarting", False) profile.set_preference( "browser.helperApps.neverAsk.openFile", "application/csv,application/excel,application/vnd.msexcel,application/vnd.ms-excel,text/anytext,text/comma-separated-values,text/csv,application/vnd.ms-excel,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/octet-stream", ) profile.set_preference("browser.helperApps.alwaysAsk.force", False) profile.set_preference("browser.download.manager.useWindow", False) profile.set_preference("browser.download.manager.focusWhenStarting", False) profile.set_preference("browser.download.manager.alertOnEXEOpen", False) profile.set_preference("browser.download.manager.showAlertOnComplete", False) profile.set_preference("browser.download.manager.closeWhenDone", True) profile.set_preference("pdfjs.disabled", True) options = Options() ### run quiet options.headless = True firefox = webdriver.Firefox( options=options, firefox_profile=profile, executable_path=GeckoDriverManager().install(), ) url = "https://www.fazenda.sp.gov.br/SigeoLei131/Paginas/FlexConsDespesa.aspx" firefox.get(url) # firefox.request('POST', url,) ano = Select( firefox.find_element_by_name("ctl00$ContentPlaceHolder1$ddlAno")) ano.select_by_value(year) firefox.find_element_by_name( "ctl00$ContentPlaceHolder1$cblFase$0").click() time.sleep(2) firefox.find_element_by_name( "ctl00$ContentPlaceHolder1$cblFase$1").click() time.sleep(2) firefox.find_element_by_name( "ctl00$ContentPlaceHolder1$cblFase$2").click() time.sleep(2) firefox.find_element_by_name( "ctl00$ContentPlaceHolder1$cblFase$3").click() time.sleep(2) firefox.find_element_by_name( "ctl00$ContentPlaceHolder1$cblFase$4").click() time.sleep(2) options = { "ctl00$ContentPlaceHolder1$ddlOrgao": "", "ctl00$ContentPlaceHolder1$ddlCategoria": "", "ctl00$ContentPlaceHolder1$ddlUo": "", "ctl00$ContentPlaceHolder1$ddlGrupo": "", "ctl00$ContentPlaceHolder1$ddlUge": "", "ctl00$ContentPlaceHolder1$ddlModalidade": "", "ctl00$ContentPlaceHolder1$ddlFonteRecursos": "", "ctl00$ContentPlaceHolder1$ddlElemento": "", "ctl00$ContentPlaceHolder1$ddlFuncao": "", "ctl00$ContentPlaceHolder1$ddlSubFuncao": "", "ctl00$ContentPlaceHolder1$ddlPrograma": "", "ctl00$ContentPlaceHolder1$ddlAcao": "", "ctl00$ContentPlaceHolder1$ddlProgramaTrabalho": "", } for op in options.keys(): selected = Select(firefox.find_element_by_name(op)) selected.select_by_value(options[op]) time.sleep(5) firefox.find_element_by_name( "ctl00$ContentPlaceHolder1$btnPesquisar").click() time.sleep(60) firefox.find_element_by_name( "ctl00$ContentPlaceHolder1$btnExcel").click() time.sleep(60) firefox.quit() files_after = glob.glob(f"{path}/*") file_now = [file for file in files_after if file not in files_before][0] os.rename( file_now, path + "/{}_orcamento_{}.csv".format(today, year), ) df = pd.read_csv(path + "/{}_orcamento_{}.csv".format(today, year), encoding="windows-1254") df = df[df["Órgão"].notnull()] df["date"] = today df.columns = manipulate.normalize_cols(df.columns) cols = [ "dotacao_inicial", "dotacao_atual", "empenhado", "liquidado", "pago", "pago_restos", ] for col in cols: df[col] = df[col].str.replace(".", "").str.replace(",", ".") df[col] = pd.to_numeric(df[col], errors="coerce") df = df.loc[:, df.isnull().mean() < 0.98] df.to_csv(path + "/last_data.csv", encoding="utf-8", index=False) df.to_csv( path + "/{}_orcamento_{}.csv".format(today, year), encoding="utf-8", index=False, ) print("\n")