def get_daily_brt_gps_data(context, gtfs_path): run_date = context.resources.schedule_run_date["date"] query = f""" with brt_daily as ( SELECT codigo AS vehicle_id, timestamp_gps AS datetime, latitude, longitude, linha FROM {context.solid_config["query_table"]} as t WHERE t.data = DATE_SUB(DATE("{run_date}"), INTERVAL 1 DAY) OR (t.data = DATE_SUB(DATE("{run_date}"), INTERVAL 2 DAY) AND t.hora BETWEEN 20 AND 23) ) SELECT * FROM brt_daily """ gps_path = f"tmp_data/brt_daily_{date_from_datetime(run_date)}.csv" bd.download( savepath=gps_path, query=query, billing_project_id=context.resources.bd_client.project, from_file=True, index=False, ) return {"gps_path": gps_path, "gtfs_path": gtfs_path}
def test_download_no_query_or_table(): with pytest.raises(BaseDosDadosException): download( SAVEFILE, limit=10, )
def cli_download( ctx, dataset_id, table_id, savepath, query, query_project_id, billing_project_id, limit, ): pandas_kwargs = dict() for item in ctx.args: pandas_kwargs.update([item.replace("--", "").split("=")]) download( savepath=savepath, dataset_id=dataset_id, table_id=table_id, query=query, query_project_id=query_project_id, billing_project_id=billing_project_id, limit=limit, **pandas_kwargs, ) click.echo( click.style( f"Table was downloaded to `{savepath}`", fg="green", ))
def cli_download( ctx, savepath, query, dataset_id, table_id, query_project_id, billing_project_id, limit, ): bd.download( savepath=savepath, dataset_id=dataset_id, table_id=table_id, query=query, query_project_id=query_project_id, billing_project_id=billing_project_id, limit=limit, ) click.echo( click.style( f"Table was downloaded to `{savepath}`", fg="green", ))
def test_download_large_file(): download( SAVEFILE, query="select * from basedosdados.br_me_rais.microdados_vinculos limit 10000000", billing_project_id=TEST_PROJECT_ID, from_file=True, ) assert (SAVEFILE).exists()
def test_download_by_query(): download( SAVEFILE, query="select * from `basedosdados.br_ibge_pib.municipio` limit 10", billing_project_id=TEST_PROJECT_ID, from_file=True, ) assert SAVEFILE.exists()
def test_download_save_to_path(): download( SAVEPATH, dataset_id="br_ibge_pib", table_id="municipios", billing_project_id=TEST_PROJECT_ID, limit=10, ) assert (SAVEPATH / "municipios.csv").exists()
def test_download_by_table(): download( SAVEFILE, dataset_id="br_ibge_pib", table_id="municipio", billing_project_id=TEST_PROJECT_ID, limit=10, from_file=True, ) assert SAVEFILE.exists()
def test_download_pandas_kwargs(): download( SAVEFILE, dataset_id="br_ibge_pib", table_id="municipios", billing_project_id=TEST_PROJECT_ID, limit=10, sep="|", index=False, ) assert SAVEFILE.exists()
def test_download_by_query(): download( SAVEFILE, query="select * from `basedosdados.br_ibge_pib.municipios` limit 10", billing_project_id=TEST_PROJECT_ID, ) assert SAVEFILE.exists() # No billing with pytest.raises(BaseDosDadosException): download( SAVEFILE, query="select * from `basedosdados.br_ibge_pib.municipios` limit 10", )
def query_data(context): project = context.resources.bd_client.project context.log.info(f""" ##### Solid Config: query_table: {context.solid_config['query_table']} date_format: {context.solid_config['date_format']} #### Resources: bd_client.project: {project} schedule_run_date: {context.resources.schedule_run_date} """) run_date = context.resources.schedule_run_date["date"] filename = f"{run_date}/multas{run_date.replace('-','')}.csv" context.log.info( f"Fetching data from {project}.{context.solid_config['query_table']}") # Exception: Methodology changed version to v1.1 after 2022-02-14, # only deployed on 2022-02-15. if run_date == "2022-02-15": query = f""" SELECT * except(data) FROM {context.solid_config['query_table']} WHERE data IN ('2022-02-15', '2022-02-14') """ else: query = f""" SELECT * except(data) FROM {context.solid_config['query_table']} WHERE data = '{run_date}' """ context.log.info(f"Running query\n {query}") context.log.info(f"Downloading query results and saving as {filename}") bd.download( savepath=filename, query=query, billing_project_id=project, from_file=True, index=False, sep=";", ) return filename
def test_download_by_table(): download( SAVEFILE, dataset_id="br_ibge_pib", table_id="municipios", billing_project_id=TEST_PROJECT_ID, limit=10, ) assert SAVEFILE.exists() # No billing with pytest.raises(BaseDosDadosException): download( SAVEFILE, dataset_id="br_ibge_pib", table_id="municipios", limit=10, )
def test_download(): savepath = Path("tests/tmp_bases/test.csv") download( savepath, query= "select * from `basedosdados.br_basedosdados_diretorios_brasil.municipios` limit 10", ) assert savepath.exists() savepath = Path("tests/tmp_bases/") download( savepath, query= "select * from `basedosdados.br_basedosdados_diretorios_brasil.municipios` limit 10", ) assert (savepath / "query_result.csv").exists() with pytest.raises(Exception): download()
def create_or_append_table(context, csv_path, which_table, _df, date): table_obj = Table( dataset_id=context.resources.basedosdados_config["dataset_id"], table_id=which_table, ) query = f"""SELECT * FROM {table_obj.table_full_name['prod']} as t """ if which_table == "realized_trips": query += f"""WHERE EXTRACT(DATE FROM t.departure_datetime) = DATE_SUB(DATE("{date}"), INTERVAL 1 DAY)""" if which_table == "unplanned": query += f"""WHERE DATE(t.dia) = DATE_SUB(DATE("{date}"), INTERVAL 1 DAY)""" try: ref = table_obj._get_table_obj("prod") except google.api_core.exceptions.NotFound: ref = None if ref: savepath = f"tmp_data/{which_table}_{date}_from_bq.csv" bd.download( savepath=savepath, query=query, billing_project_id=context.resources.bd_client.project, from_file=True, index=False, ) tb = pd.read_csv(savepath) df = drop_overlap(tb, _df) df.to_csv(csv_path, index=False) table_obj.append(csv_path, if_exists="replace") else: _df.to_csv(csv_path, index=False) table_obj.create(csv_path, if_table_config_exists="pass", if_storage_data_exists="replace") table_obj.publish(if_exists="replace")