Example #1
0
def get_daily_brt_gps_data(context, gtfs_path):

    run_date = context.resources.schedule_run_date["date"]

    query = f"""
    with brt_daily as (
    SELECT codigo AS vehicle_id, timestamp_gps AS datetime, latitude, longitude, linha
    FROM {context.solid_config["query_table"]} as t
    WHERE t.data =  DATE_SUB(DATE("{run_date}"), INTERVAL 1 DAY)
    OR (t.data = DATE_SUB(DATE("{run_date}"), INTERVAL 2 DAY) AND t.hora BETWEEN 20 AND 23)
    )
    SELECT * FROM brt_daily
    """

    gps_path = f"tmp_data/brt_daily_{date_from_datetime(run_date)}.csv"

    bd.download(
        savepath=gps_path,
        query=query,
        billing_project_id=context.resources.bd_client.project,
        from_file=True,
        index=False,
    )

    return {"gps_path": gps_path, "gtfs_path": gtfs_path}
Example #2
0
def test_download_no_query_or_table():

    with pytest.raises(BaseDosDadosException):
        download(
            SAVEFILE,
            limit=10,
        )
Example #3
0
def cli_download(
    ctx,
    dataset_id,
    table_id,
    savepath,
    query,
    query_project_id,
    billing_project_id,
    limit,
):

    pandas_kwargs = dict()
    for item in ctx.args:
        pandas_kwargs.update([item.replace("--", "").split("=")])

    download(
        savepath=savepath,
        dataset_id=dataset_id,
        table_id=table_id,
        query=query,
        query_project_id=query_project_id,
        billing_project_id=billing_project_id,
        limit=limit,
        **pandas_kwargs,
    )

    click.echo(
        click.style(
            f"Table was downloaded to `{savepath}`",
            fg="green",
        ))
Example #4
0
def cli_download(
    ctx,
    savepath,
    query,
    dataset_id,
    table_id,
    query_project_id,
    billing_project_id,
    limit,
):

    bd.download(
        savepath=savepath,
        dataset_id=dataset_id,
        table_id=table_id,
        query=query,
        query_project_id=query_project_id,
        billing_project_id=billing_project_id,
        limit=limit,
    )

    click.echo(
        click.style(
            f"Table was downloaded to `{savepath}`",
            fg="green",
        ))
Example #5
0
def test_download_large_file():

    download(
        SAVEFILE,
        query="select * from basedosdados.br_me_rais.microdados_vinculos limit 10000000",
        billing_project_id=TEST_PROJECT_ID,
        from_file=True,
    )

    assert (SAVEFILE).exists()
Example #6
0
def test_download_by_query():

    download(
        SAVEFILE,
        query="select * from `basedosdados.br_ibge_pib.municipio` limit 10",
        billing_project_id=TEST_PROJECT_ID,
        from_file=True,
    )

    assert SAVEFILE.exists()
Example #7
0
def test_download_save_to_path():

    download(
        SAVEPATH,
        dataset_id="br_ibge_pib",
        table_id="municipios",
        billing_project_id=TEST_PROJECT_ID,
        limit=10,
    )

    assert (SAVEPATH / "municipios.csv").exists()
Example #8
0
def test_download_by_table():

    download(
        SAVEFILE,
        dataset_id="br_ibge_pib",
        table_id="municipio",
        billing_project_id=TEST_PROJECT_ID,
        limit=10,
        from_file=True,
    )

    assert SAVEFILE.exists()
Example #9
0
def test_download_pandas_kwargs():

    download(
        SAVEFILE,
        dataset_id="br_ibge_pib",
        table_id="municipios",
        billing_project_id=TEST_PROJECT_ID,
        limit=10,
        sep="|",
        index=False,
    )

    assert SAVEFILE.exists()
Example #10
0
def test_download_by_query():

    download(
        SAVEFILE,
        query="select * from `basedosdados.br_ibge_pib.municipios` limit 10",
        billing_project_id=TEST_PROJECT_ID,
    )

    assert SAVEFILE.exists()

    # No billing
    with pytest.raises(BaseDosDadosException):
        download(
            SAVEFILE,
            query="select * from `basedosdados.br_ibge_pib.municipios` limit 10",
        )
Example #11
0
def query_data(context):
    project = context.resources.bd_client.project
    context.log.info(f"""
    ##### Solid Config:
        query_table: {context.solid_config['query_table']}
        date_format: {context.solid_config['date_format']}
    #### Resources:
        bd_client.project: {project}
        schedule_run_date: {context.resources.schedule_run_date}
    """)
    run_date = context.resources.schedule_run_date["date"]
    filename = f"{run_date}/multas{run_date.replace('-','')}.csv"

    context.log.info(
        f"Fetching data from {project}.{context.solid_config['query_table']}")

    # Exception: Methodology changed version to v1.1 after 2022-02-14,
    # only deployed on 2022-02-15.
    if run_date == "2022-02-15":
        query = f"""
            SELECT * except(data)
            FROM {context.solid_config['query_table']}
            WHERE data IN ('2022-02-15', '2022-02-14')
        """
    else:
        query = f"""
            SELECT * except(data)
            FROM {context.solid_config['query_table']}
            WHERE data = '{run_date}'
        """
    context.log.info(f"Running query\n {query}")

    context.log.info(f"Downloading query results and saving as {filename}")

    bd.download(
        savepath=filename,
        query=query,
        billing_project_id=project,
        from_file=True,
        index=False,
        sep=";",
    )

    return filename
Example #12
0
def test_download_by_table():

    download(
        SAVEFILE,
        dataset_id="br_ibge_pib",
        table_id="municipios",
        billing_project_id=TEST_PROJECT_ID,
        limit=10,
    )

    assert SAVEFILE.exists()

    # No billing
    with pytest.raises(BaseDosDadosException):
        download(
            SAVEFILE,
            dataset_id="br_ibge_pib",
            table_id="municipios",
            limit=10,
        )
Example #13
0
def test_download():

    savepath = Path("tests/tmp_bases/test.csv")

    download(
        savepath,
        query=
        "select * from `basedosdados.br_basedosdados_diretorios_brasil.municipios` limit 10",
    )

    assert savepath.exists()

    savepath = Path("tests/tmp_bases/")

    download(
        savepath,
        query=
        "select * from `basedosdados.br_basedosdados_diretorios_brasil.municipios` limit 10",
    )

    assert (savepath / "query_result.csv").exists()

    with pytest.raises(Exception):

        download()
Example #14
0
def create_or_append_table(context, csv_path, which_table, _df, date):
    table_obj = Table(
        dataset_id=context.resources.basedosdados_config["dataset_id"],
        table_id=which_table,
    )
    query = f"""SELECT * FROM {table_obj.table_full_name['prod']} as t
            """
    if which_table == "realized_trips":
        query += f"""WHERE EXTRACT(DATE FROM t.departure_datetime) = DATE_SUB(DATE("{date}"), INTERVAL 1 DAY)"""
    if which_table == "unplanned":
        query += f"""WHERE DATE(t.dia) = DATE_SUB(DATE("{date}"), INTERVAL 1 DAY)"""

    try:
        ref = table_obj._get_table_obj("prod")
    except google.api_core.exceptions.NotFound:
        ref = None
    if ref:
        savepath = f"tmp_data/{which_table}_{date}_from_bq.csv"
        bd.download(
            savepath=savepath,
            query=query,
            billing_project_id=context.resources.bd_client.project,
            from_file=True,
            index=False,
        )

        tb = pd.read_csv(savepath)
        df = drop_overlap(tb, _df)
        df.to_csv(csv_path, index=False)

        table_obj.append(csv_path, if_exists="replace")
    else:
        _df.to_csv(csv_path, index=False)
        table_obj.create(csv_path,
                         if_table_config_exists="pass",
                         if_storage_data_exists="replace")
        table_obj.publish(if_exists="replace")