def save_data_into_db(): sqlite_hook = SqliteHook(sqlite_conn_id='playgroundDB') with open('data.json') as f: data = json.load(f) insert = """ INSERT INTO Covid19Report ( confirmed, recovered, hospitalized, deaths, new_confirmed, new_recovered, new_hospitalized, new_deaths, update_date, source, dev_by, sever_by) VALUES (?,?,?,?,?,?,?,?,?,?,?,?); """ sqlite_hook.run( insert, parameters=(data['Confirmed'], data['Recovered'], data['Hospitalized'], data['Deaths'], data['NewConfirmed'], data['NewRecovered'], data['NewHospitalized'], data['NewDeaths'], datetime.strptime(data['UpdateDate'], '%d/%m/%Y %H:%M'), data['Source'], data['DevBy'], data['SeverBy']))
def get_weather(url_params): db_hook = SqliteHook(conn_name_attr='sqlite_default ') api_hook = HttpHook(http_conn_id='http_default', method='GET') url = add_or_replace_parameters(f'v1/history/daily', url_params) resp = api_hook.run(url) data = resp.json()['data'] # usually I don't really care about this, but in case of big data, I guess it may be very useful del resp weather_insert = """ insert or ignore into weather (station_id, record_date, temperature, temperature_min, temperature_max, winddirection, windspeed, sunshine, pressure) values (?, ?, ?, ?, ?, ?, ?, ?, ?); """ for day in data: db_hook.run(weather_insert, parameters=(url_params['station'], day['date'], day['temperature'], day['temperature_min'], day['temperature_max'], day['winddirection'], day['windspeed'], day['sunshine'], day['pressure']))
def process_weather(station, field_to_process): db_hook = SqliteHook(conn_name_attr='sqlite_default') weather_select = (f'select record_date, {field_to_process} ' f'from weather where station_id={station} ' f'order by record_date;') data = db_hook.get_pandas_df(weather_select) average = data.rolling( 3, center=True).mean().rename(columns={field_to_process: 'average'}) data = data.merge(average, left_index=True, right_index=True) del average weather_update = """ update weather set average = ? where station_id=? and record_date=?; """ # iteration over data is used like a hack to avoid using either DataFrame.itertuples(), iteritems() or iterrows(), # which may be a bottleneck in case if number of rows is more then several thousands data.apply(lambda row: db_hook.run(weather_update, parameters=(row['average'], station, row['record_date'])), axis=1)
def create_covid19_report_table(): sqlite_hook = SqliteHook(sqlite_conn_id='playgroundDB') create = """ CREATE TABLE "Covid19Report" ( "confirmed" INTEGER, "recovered" INTEGER, "hospitalized" INTEGER, "deaths" INTEGER, "new_confirmed" INTEGER, "new_recovered" INTEGER, "new_hospitalized" INTEGER, "new_deaths" INTEGER, "update_date" TEXT, "source" TEXT, "dev_by" TEXT, "sever_by" TEXT); """ sqlite_hook.run(create)
def execute(self, context): _log.info('Executing: ' + self.sql) hook = SqliteHook(sqlite_conn_id=self.sqlite_conn_id) hook.run(self.sql, parameters=self.parameters)
def execute(self, context): logging.info('Executing: ' + self.sql) hook = SqliteHook(sqlite_conn_id=self.sqlite_conn_id) hook.run(self.sql, parameters=self.parameters)
def execute(self, context): self.logger.info('Executing: %s', self.sql) hook = SqliteHook(sqlite_conn_id=self.sqlite_conn_id) hook.run(self.sql, parameters=self.parameters)