def vaccinations(**kwargs): airflow_home = os.getenv("AIRFLOW_HOME") if airflow_home: pg_hook = PostgresHook( postgres_conn_id='my_database', schema='business_intelligence').get_sqlalchemy_engine() file = f'{airflow_home}/data/country_vaccinations.csv' df = pd.read_csv(file, dtype={ "total_vaccinations": "Int64", "people_vaccinated": "Int64", "people_fully_vaccinated": "Int64", "daily_vaccinations": "Int64", "daily_vaccinations_per_million": "Int64" }, encoding='utf-8') with pg_hook.begin() as transaction: transaction.execute('DELETE FROM vacinations_per_country') df.to_sql('vacinations_per_country', con=transaction, if_exists='append', index=False)
def etl_process(**kwargs): #logger.info('Inicio: confirmed') print(11) #file_path = FILE_CONNECTION_ID #FSHook(FILE_CONNECTION_ID) file_path = FSHook(conn_id=FILE_CONNECTION_ID).get_path() full_path = f'{file_path}/{FILE_NAME}' df = pd.read_csv(full_path, encoding="ISO-8859-1") total_cols = df.keys() #logger.info('Lectura: confirmed') print(22) prov = [] country = [] lat = [] lon = [] date = [] val = [] # transformacion de datos fila = 0 for idx, item in df.iterrows(): fila += 1 for coldate in total_cols[4:]: prov.append(item['Province/State']) #country.append(item['Country/Region']) if str(item['Province/State']) == 'nan': country.append(item['Country/Region']) else: country.append(item['Country/Region'] + '(' + item['Province/State'] + ')') lat.append(item['Lat']) lon.append(item['Long']) date_time_obj = datetime.strptime(coldate, '%m/%d/%y') date.append(date_time_obj) val.append(item[coldate]) #print(fila) carga = pd.DataFrame({}) d = { 'provincia': prov, 'country': country, 'lat': lat, 'long': lon, 'dates': date, 'value': val } carga = pd.DataFrame(data=d) locallog = pd.DataFrame({'tipo': ['confirmed'], 'fecha': [datetime.now()]}) #logger.info('Tranformado') resumen = carga.groupby(['dates']).sum() resumen = resumen.reset_index() resumen['provincia'] = '' resumen['country'] = '-Global-' resumen['lat'] = None resumen['long'] = None carga = carga.append(resumen) #print('Transformado') #connection_string = 'postgresql://*****:*****@repositorio:5432/final' psql_connection = PostgresHook('pgsql').get_sqlalchemy_engine() with psql_connection.begin() as connection: connection.execute("truncate confirmed") carga.to_sql('confirmed', con=connection, if_exists='append', index=False) locallog.to_sql('log_carga', con=connection, if_exists='append', index=False)