def insert_rows(): insert_ts = datetime.utcnow() pg_hook = PostgresHook(postgres_conn_id='postgres_default') sql_insert = f"""INSERT INTO {table_variables['name']} VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""" http_hook = HttpHook(http_conn_id=table_variables['http_conn_id'], method='GET') res = http_hook.run(endpoint=table_variables['endpoint'], data={'codigo': table_variables['codigo']}) http_hook.check_response(response=res) cases_df = pd.DataFrame(res.json()['timeline']) for row in cases_df.itertuples(index=False): date = row.fecha information = pd.Series(row.regiones[0]['data']) information = information[['casosConfirmados', 'casosUci', 'casosFallecidos', 'casosHospitalizados', 'casosRecuperados', 'casosConfirmadosDiario', 'casosUciDiario', 'casosFallecidosDiario', 'casosHospitalizadosDiario', 'casosRecuperadosDiario']] pg_hook.run(sql_insert, parameters=(date, information[0], information[1], information[2], information[3], information[4], information[5], information[6], information[7], information[8], information[9], insert_ts))
def insert_rows(): pg_hook = PostgresHook(postgres_conn_id='postgres_default') sql_insert = f"""INSERT INTO {table_variables['name']} VALUES (%s, %s, %s, %s, %s, %s ,%s, %s, %s, %s)""" http_hook = HttpHook(http_conn_id=table_variables['http_conn_id'], method='GET') res = http_hook.run(endpoint=table_variables['endpoint'], data={ 'resource_id': table_variables['resource_id'], 'limit': '10000000' }) http_hook.check_response(response=res) unemployment_measures = res.json()['result']['records'] unemployment_df = pd.DataFrame(unemployment_measures) unemployment_df = unemployment_df[[ '_id', 'Any', 'Mes', 'Codi_Districte', 'Nom_Districte', 'Codi_Barri', 'Nom_Barri', 'Durada_atur', 'Nombre' ]] unemployment_df.replace({ 'NA': np.nan, '-Inf': np.nan, 'Inf': np.nan }, inplace=True) insert_ts = datetime.utcnow() for row in unemployment_df.itertuples(index=False): pg_hook.run(sql_insert, parameters=(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], insert_ts))
def insert_rows(): pg_hook = PostgresHook(postgres_conn_id='postgres_default') sql_insert = f"""INSERT INTO {table_variables['name']} VALUES (%s, %s, %s, %s, %s, %s ,%s, %s, %s, %s)""" http_hook = HttpHook(http_conn_id=table_variables['http_conn_id'], method='GET') res = http_hook.run(endpoint=table_variables['endpoint'], data={ 'resource_id': table_variables['resource_id'], 'limit': '10000000' }) http_hook.check_response(response=res) bcn_covid_measures = res.json()['result']['records'] bcn_covid_df = pd.DataFrame(bcn_covid_measures) bcn_covid_df = bcn_covid_df[[ '_id', 'Data_Indicador', 'Font', 'Frequencia_Indicador', 'Nom_Indicador', 'Nom_Variable', 'Territori', 'Unitat', 'Valor' ]] bcn_covid_df.replace({ 'NA': np.nan, '-Inf': np.nan, 'Inf': np.nan }, inplace=True) insert_ts = datetime.utcnow() for row in bcn_covid_df.itertuples(index=False): pg_hook.run(sql_insert, parameters=(row[0], row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], insert_ts))
def download_file( url: str, destination, data=None, headers=None, http_conn_id="http_default" ): """Perform an HTTP download using Airflow's HttpHook""" if http_conn_id and "://" in url: raise ValueError("Use http_conn_id=None when downloading from FQDN-urls") http = HttpHook(method="GET", http_conn_id=http_conn_id) response = http.run( url, data=data, headers=headers, extra_options={"check_response": False, "stream": True, "verify": True}, ) with response: # this has to be inside with statement for stream=True http.check_response(response) # Write the stream, decode gzip on the fly. with open(destination, "wb") as file: for chunk in response.iter_content(chunk_size=8192): if chunk: # filter out keep-alive new chunks file.write(chunk) return response