def test_read_sql_out_of_bound_date(): read_sql( query="select DATE('1000-01-01')", billing_project_id=TEST_PROJECT_ID, from_file=True, )
def test_read_sql_invalid_billing_project_id(): pattern = r"You are using an invalid `billing_project_id`" with pytest.raises(BaseDosDadosInvalidProjectIDException, match=pattern): read_sql( query="select * from `basedosdados.br_ibge_pib.municipio` limit 10", billing_project_id="inexistent_project_id", from_file=True, )
def test_read_sql_no_billing_project_id(): with pytest.raises(BaseDosDadosNoBillingProjectIDException) as excinfo: read_sql( query="select * from `basedosdados.br_ibge_pib.municipio` limit 10", ) assert "We are not sure which Google Cloud project should be billed." in str( excinfo.value )
def test_read_sql_syntax_error(): with pytest.raises(GenericGBQException) as excinfo: read_sql( query="invalid_statement * from `basedosdados.br_ibge_pib.municipio` limit 10", billing_project_id=TEST_PROJECT_ID, from_file=True, ) assert "Reason: 400 Syntax error" in str(excinfo.value)
def test_read_sql_inexistent_table(): with pytest.raises(GenericGBQException) as excinfo: read_sql( query="select * from `basedosdados.br_ibge_pib.inexistent` limit 10", billing_project_id=TEST_PROJECT_ID, from_file=True, ) assert "Reason: 404 Not found: Table" in str(excinfo.value)
def test_read_sql_inexistent_dataset(): with pytest.raises(GenericGBQException) as excinfo: read_sql( query="select * from `basedosdados-dev.br_ibge_inexistent.municipio` limit 10", billing_project_id=TEST_PROJECT_ID, from_file=True, ) assert "Reason: 404 Not found: Dataset" in str(excinfo.value)
def test_read_sql(): assert isinstance( read_sql( query="select * from `basedosdados.br_suporte.diretorio_municipios` limit 10", ), pd.DataFrame, )
def test_read_sql(): assert isinstance( read_sql( query="select * from `basedosdados.br_ibge_pib.municipios` limit 10", billing_project_id=TEST_PROJECT_ID, ), pd.DataFrame, )
def save_header_files(dataset_id, table_id): ### save table header in storage query = f""" SELECT * FROM `basedosdados.{dataset_id}.{table_id}` LIMIT 20 """ df = bd.read_sql(query, billing_project_id="basedosdados", from_file=True) df.to_csv("header.csv", index=False, encoding="utf-8") st = bd.Storage(dataset_id=dataset_id, table_id=table_id) st.upload("header.csv", mode="header", if_exists="replace")
def fetch_data(config): """Fetch data from Big Query with basedosdados package""" if not config["query"].strip(): config["name"] += " (QueryDoesNotExist Exception)" store_skip(config) try: return bd.read_sql( query=config["query"].replace("\n", " "), billing_project_id= "basedosdados-dev", # change this value for local debugging from_file=True, # comment this line for local debugging ) except: config["name"] += " (BaseDosDados Exception)" store_error(config)
when idade >= 55 and idade <= 59 then '55 a 59 anos' when idade >= 60 and idade <= 64 then '60 a 64 anos' when idade >= 65 and idade <= 69 then '65 a 69 anos' when idade >= 70 and idade <= 74 then '70 a 74 anos' when idade >= 75 and idade <= 79 then '75 a 79 anos' else '80+' END) as grupo, count(titulo_eleitoral) FROM `basedosdados.br_tse_eleicoes.candidatos` where idade is not null group by 1,2,3 ORDER BY ano DESC """ # utiliza a API para importa os dados com a query filiacao_tse = bd.read_sql(query, billing_project_id='basedosdados-dev') #define os anos das eleições gerais ano_geral = [1994,1998,2002,2006,2010,2014,2018] # laço para criação de dataframes de cada ano (filtra o dataset principal) # laço para criação de cada gráfico e salvamento for ano in ano_geral: exec('df_{} = filiacao_tse[filiacao_tse["ano"]==int(ano)]'.format(ano)) exec('df_{} = df_{}[["genero","grupo","n_candidatos"]]'.format(ano,ano)) exec('df_{} = pd.pivot_table(df_{}, values=["n_candidatos"], index=["grupo"],columns="genero")'.format(ano,ano)) exec('y_age = df_{}.index'.format(ano)) exec('x_M = df_{}[("n_candidatos", "masculino")]'.format(ano)) exec('x_F = df_{}[("n_candidatos", "feminino")] * -1'.format(ano)) # inicia a criação do gráfico
WHERE p.ano = 2021 and EXTRACT(MONTH FROM p.data_coleta) = 9 GROUP BY 1,2) SELECT p.ano, EXTRACT(MONTH from p.data_coleta) AS mes, p.produto, ROUND(AVG(p.preco_venda),3) AS preco_medio, ROUND((AVG(sub.indice_atual)/AVG(ip.indice))*AVG(p.preco_venda),3) AS preco_corrigido, FROM basedosdados.br_anp_precos_combustiveis.microdados p LEFT JOIN basedosdados.br_ibge_ipca.mes_brasil ip on p.ano=ip.ano and EXTRACT(MONTH from p.data_coleta)=ip.mes INNER JOIN sub ON p.produto=sub.produto WHERE ip.mes is not null GROUP BY 1,2,3 """ # utiliza a API para importa os dados com a query df = bd.read_sql(query, billing_project_id='basedosdados-dev') #Altera os formatos de data df = df.dropna(subset=['mes']) df['date'] = pd.to_datetime(df['date']).dt.to_period('m') df['date'] = df['date'].apply(lambda x: x.strftime('%Y-%m')) df['date'] = df['date'].replace('-', '_', regex=True) df[['ano', 'mes']] = df[['ano', 'mes']].astype('int64') df = df.sort_values(['ano', 'mes']) #cria uma lista de produtos produto = pd.DataFrame(df['produto'].drop_duplicates()).replace(' ', '_', regex=True) produto = produto['produto'].values.tolist()
tb.append(filepath, if_exists="replace") if __name__ == "__main__": # GET MUNICIPIOS FROM BD print("====== ", today, " ======") query = """ SELECT sigla_uf, id_municipio, id_municipio_6 FROM `basedosdados.br_bd_diretorios_brasil.municipio` """ municipios = bd.read_sql(query, billing_project_id="basedosdados-dev") print("\n") # deleta pasta if os.path.isdir(CLEAN_PATH): shutil.rmtree(CLEAN_PATH) if os.path.isdir(RAW_PATH): shutil.rmtree(RAW_PATH) print("Get download links") download_dict = get_download_links() print("\n") for tipo in list(download_dict.keys()): download_opt = get_filtered_download_dict(