def test_34_load_to_dwh_duplicate_insert(self): df_result = db_utils.get_dwh_result_as_df(self.conn, "select * from covid.daily_stats ds order by ds.rep_date desc", "") exp_shape = (253, 4) exp_recent_record = list([datetime.date(2020, 9, 30), 7262695, 206852, 2840688]) self.assertListEqual(list(df_result.iloc[0]), exp_recent_record) self.assertTupleEqual(df_result.shape, exp_shape) load_to_dwh(self._df_transformed, self.conn) df_result = db_utils.get_dwh_result_as_df(self.conn, "select * from covid.daily_stats ds order by ds.rep_date desc", "") exp_shape = (253, 4) exp_recent_record = list([datetime.date(2020, 9, 30), 7262695, 206852, 2840688]) self.assertTupleEqual(df_result.shape, exp_shape) self.assertListEqual(list(df_result.iloc[0]), exp_recent_record)
def test_30_load_to_dwh(self): load_to_dwh(self._df_transformed, self.conn) df_result = db_utils.get_dwh_result_as_df(self.conn, "select * from covid.daily_stats ds order by ds.rep_date desc", "") exp_shape = (253, 4) exp_recent_record = list([datetime.date(2020, 9, 30), 7262695, 206852, 2840688]) exp_columns = list(['rep_date', 'cases', 'deaths', 'recovered']) self.assertTupleEqual(df_result.shape, exp_shape) self.assertListEqual(list(df_result.columns), exp_columns) self.assertListEqual(list(df_result.iloc[0]), exp_recent_record)
def test_32_load_to_dwh_empty_dataframe(self): column_names = ['date', 'cases', 'deaths', 'recovered'] df_empty = pd.DataFrame(columns=column_names) load_to_dwh(df_empty, self.conn) df_result = db_utils.get_dwh_result_as_df(self.conn, "select * from covid.daily_stats ds order by ds.rep_date desc", "") exp_shape = (253, 4) exp_recent_record = list([datetime.date(2020, 9, 30), 7262695, 206852, 2840688]) self.assertTupleEqual(df_result.shape, exp_shape) self.assertListEqual(list(df_result.iloc[0]), exp_recent_record)
def test_42_load_incremental_duplicate_insert(self): df_result = db_utils.get_dwh_result_as_df(self.conn, "select * from covid.daily_stats ds order by ds.rep_date desc", "") exp_shape = (257, 4) exp_recent_record = list([datetime.date(2020, 10, 4), 7444705, 209603, 2911699]) self.assertTupleEqual(df_result.shape, exp_shape) self.assertListEqual(list(df_result.iloc[0]), exp_recent_record) test_covid_nyt_data_latest = os.path.join(sys.path[0], 'testdata/nyt_data_latest.csv') df_nyt_data, df_jh_data = extract_covid_data(test_covid_nyt_data_latest, self._test_covid_jh_data) df_transformed_latest = transform(df_nyt_data, df_jh_data) load_to_dwh(df_transformed_latest, self.conn) df_result = db_utils.get_dwh_result_as_df(self.conn, "select * from covid.daily_stats ds order by ds.rep_date desc", "") exp_shape = (257, 4) exp_recent_record = list([datetime.date(2020, 10, 4), 7444705, 209603, 2911699]) self.assertTupleEqual(df_result.shape, exp_shape) self.assertListEqual(list(df_result.iloc[0]), exp_recent_record)