Beispiel #1
0
def insert_rows():

    insert_ts = datetime.utcnow()

    pg_hook = PostgresHook(postgres_conn_id='postgres_default')
    sql_insert = f"""INSERT INTO {table_variables['name']}
                     VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""

    http_hook = HttpHook(http_conn_id=table_variables['http_conn_id'],
                         method='GET')
    res = http_hook.run(endpoint=table_variables['endpoint'],
                        data={'codigo': table_variables['codigo']})
    http_hook.check_response(response=res)

    cases_df = pd.DataFrame(res.json()['timeline'])

    for row in cases_df.itertuples(index=False):
        date = row.fecha

        information = pd.Series(row.regiones[0]['data'])

        information = information[['casosConfirmados', 'casosUci', 'casosFallecidos', 'casosHospitalizados', 'casosRecuperados',
                                   'casosConfirmadosDiario', 'casosUciDiario', 'casosFallecidosDiario',
                                   'casosHospitalizadosDiario', 'casosRecuperadosDiario']]
        pg_hook.run(sql_insert, parameters=(date, information[0], information[1],
                                            information[2], information[3], information[4],
                                            information[5], information[6], information[7],
                                            information[8], information[9], insert_ts))
Beispiel #2
0
def insert_rows():

    pg_hook = PostgresHook(postgres_conn_id='postgres_default')
    sql_insert = f"""INSERT INTO {table_variables['name']}
                     VALUES (%s, %s, %s, %s, %s, %s ,%s, %s, %s, %s)"""

    http_hook = HttpHook(http_conn_id=table_variables['http_conn_id'],
                         method='GET')
    res = http_hook.run(endpoint=table_variables['endpoint'],
                        data={
                            'resource_id': table_variables['resource_id'],
                            'limit': '10000000'
                        })

    http_hook.check_response(response=res)

    unemployment_measures = res.json()['result']['records']

    unemployment_df = pd.DataFrame(unemployment_measures)
    unemployment_df = unemployment_df[[
        '_id', 'Any', 'Mes', 'Codi_Districte', 'Nom_Districte', 'Codi_Barri',
        'Nom_Barri', 'Durada_atur', 'Nombre'
    ]]
    unemployment_df.replace({
        'NA': np.nan,
        '-Inf': np.nan,
        'Inf': np.nan
    },
                            inplace=True)
    insert_ts = datetime.utcnow()

    for row in unemployment_df.itertuples(index=False):
        pg_hook.run(sql_insert,
                    parameters=(row[0], row[1], row[2], row[3], row[4], row[5],
                                row[6], row[7], row[8], insert_ts))
Beispiel #3
0
def insert_rows():

    pg_hook = PostgresHook(postgres_conn_id='postgres_default')
    sql_insert = f"""INSERT INTO {table_variables['name']}
                     VALUES (%s, %s, %s, %s, %s, %s ,%s, %s, %s, %s)"""

    http_hook = HttpHook(http_conn_id=table_variables['http_conn_id'],
                         method='GET')
    res = http_hook.run(endpoint=table_variables['endpoint'],
                        data={
                            'resource_id': table_variables['resource_id'],
                            'limit': '10000000'
                        })

    http_hook.check_response(response=res)

    bcn_covid_measures = res.json()['result']['records']

    bcn_covid_df = pd.DataFrame(bcn_covid_measures)
    bcn_covid_df = bcn_covid_df[[
        '_id', 'Data_Indicador', 'Font', 'Frequencia_Indicador',
        'Nom_Indicador', 'Nom_Variable', 'Territori', 'Unitat', 'Valor'
    ]]
    bcn_covid_df.replace({
        'NA': np.nan,
        '-Inf': np.nan,
        'Inf': np.nan
    },
                         inplace=True)
    insert_ts = datetime.utcnow()

    for row in bcn_covid_df.itertuples(index=False):
        pg_hook.run(sql_insert,
                    parameters=(row[0], row[1], row[2], row[3], row[4], row[5],
                                row[6], row[7], row[8], insert_ts))
Beispiel #4
0
def download_file(
    url: str, destination, data=None, headers=None, http_conn_id="http_default"
):
    """Perform an HTTP download using Airflow's HttpHook"""
    if http_conn_id and "://" in url:
        raise ValueError("Use http_conn_id=None when downloading from FQDN-urls")

    http = HttpHook(method="GET", http_conn_id=http_conn_id)
    response = http.run(
        url,
        data=data,
        headers=headers,
        extra_options={"check_response": False, "stream": True, "verify": True},
    )
    with response:
        # this has to be inside with statement for stream=True
        http.check_response(response)

        # Write the stream, decode gzip on the fly.
        with open(destination, "wb") as file:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:  # filter out keep-alive new chunks
                    file.write(chunk)
    return response